diff --git a/Makefile b/Makefile index 57de16e..52b5614 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ range: py src/rangequeries.py output/rtree.txt data/ranges.txt knn: - py src/knnqueries.py output/rtree.txt data/knn.txt 10 + py src/knnqueries.py output/rtree.txt data/knn.txt $(k) .PHONY: clean clean: diff --git a/src/bulkloading.py b/src/bulkloading.py index 2d806cb..7c419ea 100644 --- a/src/bulkloading.py +++ b/src/bulkloading.py @@ -1,74 +1,31 @@ -from os import write -from os import path, makedirs import sys import csv +from mbr import MBR +from os import path, makedirs if not path.exists('output'): makedirs('output') -_DIVISORS = [180.0 / 2 ** n for n in range(32)] - -class MBR: - def __init__(self, id, xlow, xhigh, ylow, yhigh): - self.xlow = xlow - self.xhigh = xhigh - self.ylow = ylow - self.yhigh = yhigh - self.id = id - self.zcurve = self.findZcurve() - - def findZcurve(self): - x_median = (self.xlow + self.xhigh) / 2 - y_median = (self.ylow + self.yhigh) / 2 - return interleave_latlng(y_median, x_median) - -def interleave_latlng(lat, lng): - if lng > 180: - x = (lng % 180) + 180.0 - elif lng < -180: - x = (-((-lng) % 180)) + 180.0 - else: - x = lng + 180.0 - if lat > 90: - y = (lat % 90) + 90.0 - elif lat < -90: - y = (-((-lat) % 90)) + 90.0 - else: - y = lat + 90.0 - - morton_code = "" - for dx in _DIVISORS: - digit = 0 - if y >= dx: - digit |= 2 - y -= dx - if x >= dx: - digit |= 1 - x -= dx - morton_code += str(digit) - - return morton_code - # Given a set of coordinates find the MBR -def findMBR(objId, points): - x_min = min(points, key=lambda p: p[0])[0] - x_max = max(points, key=lambda p: p[0])[0] - y_min = min(points, key=lambda p: p[1])[1] - y_max = max(points, key=lambda p: p[1])[1] +def find_mbr(obj_id, points): + xlow = min(points, key=lambda p: p[0])[0] + xhigh = max(points, key=lambda p: p[0])[0] + ylow = min(points, key=lambda p: p[1])[1] + yhigh = max(points, key=lambda p: p[1])[1] - return MBR(objId, x_min, x_max, y_min, y_max) + return MBR(obj_id, xlow, xhigh, ylow, yhigh) -# Given a set of MBRs find the MBR -def createMBR(nodeId, mbrs): - x_min = min(mbr.xlow for mbr in mbrs) - x_max = max(mbr.xhigh for mbr in mbrs) - y_min = min(mbr.ylow for mbr in mbrs) - y_max = max(mbr.yhigh for mbr in mbrs) +# Given a set of MBRs find their MBR +def create_mbr(node_id, mbrs): + xlow = min(mbr.xlow for mbr in mbrs) + xhigh = max(mbr.xhigh for mbr in mbrs) + ylow = min(mbr.ylow for mbr in mbrs) + yhigh = max(mbr.yhigh for mbr in mbrs) - return MBR(nodeId, x_min, x_max, y_min, y_max) + return MBR(node_id, xlow, xhigh, ylow, yhigh) # Read data from input files -def inputReader(filename1, filename2): +def input_reader(filename1, filename2): set_of_points = [] with open(filename1, 'r') as offsets, open(filename2, 'r') as coords: reader = csv.reader(offsets, delimiter=',', quoting=csv.QUOTE_NONE) @@ -81,26 +38,26 @@ def inputReader(filename1, filename2): return set_of_points # Write data into output file -def outputWriter(filename, rTree): +def output_writer(filename, RTREE): with open(filename, 'w+') as rtree: - numNode = 0 + node_num = 0 leaves = 0 - for level in rTree: + for level in RTREE: for node in level: - rtree.write("[{}, {}, [[".format(leaves, numNode)) + rtree.write("[{}, {}, [[".format(leaves, node_num)) for i in range(len(node) - 1): rtree.write("{}, [{}, {}, {}, {}]],[".format( node[i].id, node[i].xlow, node[i].xhigh, node[i].ylow, node[i].yhigh)) rtree.write("{}, [{}, {}, {}, {}]]]]".format( node[-1].id, node[-1].xlow, node[-1].xhigh, node[-1].ylow, node[-1].yhigh)) rtree.write("\n") - numNode += 1 + node_num += 1 leaves = 1 -# -def makeRtree(collection): +# Bulk load R-Tree +def construct(collection): level = [] # list of levels(of nodes) of nodes(of mbrs) of mbrs(of points) - nodeId = 0 + node_id = 0 rank = 0 # Until we reach the root node while len(collection) > 1: @@ -117,8 +74,8 @@ def makeRtree(collection): # Make the new MBRs based on the corners of each 20-piece collection = [] for node in nodes: #for every 20 MBRs in 500 MBRs - collection.append(createMBR(nodeId, node)) - nodeId += 1 + collection.append(create_mbr(node_id, node)) + node_id += 1 level.append(nodes) print("{} nodes at level {}".format(len(nodes), rank)) @@ -128,16 +85,16 @@ def makeRtree(collection): if __name__ == '__main__': # Read the coordinates of the polygons points - set_of_points = inputReader(filename1=sys.argv[1], filename2=sys.argv[2]) + set_of_points = input_reader(filename1=sys.argv[1], filename2=sys.argv[2]) # Create the mbrs of the polygons mbrs = [] for sp in set_of_points: - mbrs.append(findMBR(sp[0], sp[1])) + mbrs.append(find_mbr(sp[0], sp[1])) # Sort the mbrs based on the Z curve mbrs.sort(key=lambda ld: ld.zcurve) # Make the Rtree - rTree = makeRtree(mbrs) - outputWriter("output/rtree.txt", rTree) + rTree = construct(mbrs) + output_writer("output/rtree.txt", rTree) diff --git a/src/knnqueries.py b/src/knnqueries.py index a86e2ef..704028b 100644 --- a/src/knnqueries.py +++ b/src/knnqueries.py @@ -1,57 +1,10 @@ import sys import ast +from mbr import MBR, Node, Point import heapq as hq -from math import sqrt -class Point: - def __init__(self, x, y): - self.x = x - self.y = y - -class MBR: - def __init__(self, id, xlow, xhigh, ylow, yhigh): - self.xlow = xlow - self.xhigh = xhigh - self.ylow = ylow - self.yhigh = yhigh - self.id = id - self.obj = False - - def setDistance(self, qp): - self.distance = self.findDistance(qp) - - def findDistance(self, qp): - dx = dy = 0 - if qp.x < self.xlow: - dx = self.xlow - qp.x - elif qp.x > self.xhigh: - dx = qp.x - self.xhigh - else: - dx = 0 - - if qp.y < self.ylow: - dy = self.ylow - qp.y - elif qp.y > self.yhigh: - dy = qp.y - self.yhigh - else: - dy = 0 - return sqrt(dx**2 + dy**2) - - def makeObject(self): - self.obj = True - - def __lt__(self, other): - if self.distance < other.distance: - return True - return False - -class Node: - def __init__(self, leaf, id, list_of_mbrs): - self.leaf = leaf - self.id = id - self.list_of_mbrs = list_of_mbrs - -def parseRtree(filename): +# Read R-Tree from data +def parse_tree(filename): with open(filename, 'r') as f: nodes = [] for line in f: @@ -64,17 +17,17 @@ def parseRtree(filename): return nodes -# Read and execute all the NN queries assigned to the r tree -def parseQuery(rtree, filename, k): +# Read and execute all the NN queries +def parse_query(rtree, filename, k): with open(filename, 'r') as f: num = 0 for line in f: [x, y] = line.split() - findKNN(rtree, Point(float(x), float(y)), k, num) + find_knn(rtree, Point(float(x), float(y)), k, num) num += 1 -# Search for the neareset k neighboors -def findKNN(rtree, qp, k, num): +# Search and print the neareset k neighboors +def find_knn(rtree, qp, k, num): # List to put our k nn objects results = [] # Initialize a priority queue @@ -112,5 +65,5 @@ def findKNN(rtree, qp, k, num): print(nn.id) if __name__ == '__main__': - rtree = parseRtree(sys.argv[1]) - parseQuery(rtree, sys.argv[2], int(sys.argv[3])) + rtree = parse_tree(sys.argv[1]) + parse_query(rtree, sys.argv[2], int(sys.argv[3])) diff --git a/src/mbr.py b/src/mbr.py new file mode 100644 index 0000000..2ef783f --- /dev/null +++ b/src/mbr.py @@ -0,0 +1,67 @@ +import sys +from os import path +from math import sqrt + +sys.path.insert(0, path.dirname(path.dirname(path.abspath(__file__)))) +from pymorton.pymorton.pymorton import interleave_latlng + +class Point: + def __init__(self, x, y): + self.x = x + self.y = y + +class MBR: + def __init__(self, id, xlow, xhigh, ylow, yhigh): + self.xlow = xlow + self.xhigh = xhigh + self.ylow = ylow + self.yhigh = yhigh + self.id = id + self.obj = False + self.zcurve = self.findZcurve() + + def findZcurve(self): + x_median = (self.xlow + self.xhigh) / 2 + y_median = (self.ylow + self.yhigh) / 2 + return interleave_latlng(y_median, x_median) + + def setDistance(self, p): + self.distance = self.findDistance(p) + + def findDistance(self, p): + dx = dy = 0 + if p.x < self.xlow: + dx = self.xlow - p.x + elif p.x > self.xhigh: + dx = p.x - self.xhigh + else: + dx = 0 + + if p.y < self.ylow: + dy = self.ylow - p.y + elif p.y > self.yhigh: + dy = p.y - self.yhigh + else: + dy = 0 + return sqrt(dx**2 + dy**2) + + def makeObject(self): + self.obj = True + + def intersects(self, other): + if self.xlow > other.xhigh or self.xhigh < other.xlow: + return False + if self.ylow > other.yhigh or self.yhigh < other.ylow: + return False + return True + + def __lt__(self, other): + if self.distance < other.distance: + return True + return False + +class Node: + def __init__(self, leaf, id, list_of_mbrs): + self.leaf = leaf + self.id = id + self.list_of_mbrs = list_of_mbrs diff --git a/src/rangequeries.py b/src/rangequeries.py index 1df8905..5b17354 100644 --- a/src/rangequeries.py +++ b/src/rangequeries.py @@ -1,28 +1,9 @@ import sys import ast +from mbr import MBR, Node -class MBR: - def __init__(self, id, xlow, xhigh, ylow, yhigh): - self.xlow = xlow - self.xhigh = xhigh - self.ylow = ylow - self.yhigh = yhigh - self.id = id - - def intersects(self, other): - if self.xlow > other.xhigh or self.xhigh < other.xlow: - return False - if self.ylow > other.yhigh or self.yhigh < other.ylow: - return False - return True - -class Node: - def __init__(self, leaf, id, list_of_mbrs): - self.leaf = leaf - self.id = id - self.list_of_mbrs = list_of_mbrs - -def parseRtree(filename): +# Read R-Tree from data +def parse_tree(filename): with open(filename, 'r') as f: nodes = [] for line in f: @@ -35,14 +16,14 @@ def parseRtree(filename): return nodes -# Read and execute all the range queries assigned to the r tree -def parseQuery(rtree, filename): +# Read and execute all the range queries +def parse_query(rtree, filename): with open(filename, 'r') as f: i = 0 for line in f: [xlow, ylow, xhigh, yhigh] = line.split() results = [] - rangeQuery(rtree, MBR(int(i), float(xlow), float(xhigh), + range_query(rtree, MBR(int(i), float(xlow), float(xhigh), float(ylow), float(yhigh)), results) print("{} ({}):".format(i, len(results)), end=' ') @@ -55,18 +36,18 @@ def parseQuery(rtree, filename): i += 1 # Find the intersecting rectangles -def rangeQuery(rtree, window, results, node=None): +def range_query(rtree, window, results, node=None): # At the root node if node == None: node = rtree[-1] for n in node.list_of_mbrs: if window.intersects(n) or n.intersects(window): - rangeQuery(rtree, window, results, rtree[n.id]) + range_query(rtree, window, results, rtree[n.id]) # At an intermediate node elif node.leaf == 1: for n in node.list_of_mbrs: if window.intersects(n) or n.intersects(window): - rangeQuery(rtree, window, results, rtree[n.id]) + range_query(rtree, window, results, rtree[n.id]) # At a leaf node else: for mbr in node.list_of_mbrs: @@ -74,5 +55,5 @@ def rangeQuery(rtree, window, results, node=None): results.append(mbr) if __name__ == '__main__': - rtree = parseRtree(sys.argv[1]) - parseQuery(rtree, sys.argv[2]) + rtree = parse_tree(sys.argv[1]) + parse_query(rtree, sys.argv[2])