From 1bbf4790637643566bf1ab83c74b603b2e83e945 Mon Sep 17 00:00:00 2001 From: stefiosif Date: Sat, 19 Feb 2022 12:21:42 +0200 Subject: [PATCH] First commit --- bulkloading.py | 139 ++++++++++++++++++++++++++++++++++++++++++++++++ knnqueries.py | 116 ++++++++++++++++++++++++++++++++++++++++ rangequeries.py | 78 +++++++++++++++++++++++++++ 3 files changed, 333 insertions(+) create mode 100644 bulkloading.py create mode 100644 knnqueries.py create mode 100644 rangequeries.py diff --git a/bulkloading.py b/bulkloading.py new file mode 100644 index 0000000..c88e36e --- /dev/null +++ b/bulkloading.py @@ -0,0 +1,139 @@ +from os import write +import sys +import csv + +_DIVISORS = [180.0 / 2 ** n for n in range(32)] + +class MBR: + def __init__(self, id, xlow, xhigh, ylow, yhigh): + self.xlow = xlow + self.xhigh = xhigh + self.ylow = ylow + self.yhigh = yhigh + self.id = id + self.zcurve = self.findZcurve() + + def findZcurve(self): + x_median = (self.xlow + self.xhigh) / 2 + y_median = (self.ylow + self.yhigh) / 2 + return interleave_latlng(y_median, x_median) + +def interleave_latlng(lat, lng): + if lng > 180: + x = (lng % 180) + 180.0 + elif lng < -180: + x = (-((-lng) % 180)) + 180.0 + else: + x = lng + 180.0 + if lat > 90: + y = (lat % 90) + 90.0 + elif lat < -90: + y = (-((-lat) % 90)) + 90.0 + else: + y = lat + 90.0 + + morton_code = "" + for dx in _DIVISORS: + digit = 0 + if y >= dx: + digit |= 2 + y -= dx + if x >= dx: + digit |= 1 + x -= dx + morton_code += str(digit) + + return morton_code + +# Given a set of coordinates find the MBR +def findMBR(objId, points): + x_min = min(points, key=lambda p: p[0])[0] + x_max = max(points, key=lambda p: p[0])[0] + y_min = min(points, key=lambda p: p[1])[1] + y_max = max(points, key=lambda p: p[1])[1] + + return MBR(objId, x_min, x_max, y_min, y_max) + +# Given a set of MBRs find the MBR +def createMBR(nodeId, mbrs): + x_min = min(mbr.xlow for mbr in mbrs) + x_max = max(mbr.xhigh for mbr in mbrs) + y_min = min(mbr.ylow for mbr in mbrs) + y_max = max(mbr.yhigh for mbr in mbrs) + + return MBR(nodeId, x_min, x_max, y_min, y_max) + +# Read data from input files +def inputReader(filename1, filename2): + set_of_points = [] + with open(filename1, 'r') as offsets, open(filename2, 'r') as coords: + reader = csv.reader(offsets, delimiter=',', quoting=csv.QUOTE_NONE) + for row in reader: + points = [] + for i in range(int(row[2]) - int(row[1]) + 1): + points.append([float(x) for x in coords.readline().split(',')]) + set_of_points.append([row[0], points]) + + return set_of_points + +# Write data into output file +def outputWriter(filename, rTree): + with open(filename, 'w') as rtree: + numNode = 0 + leaves = 0 + for level in rTree: + for node in level: + rtree.write("[{}, {}, [[".format(leaves, numNode)) + for i in range(len(node) - 1): + rtree.write("{}, [{}, {}, {}, {}]],[".format( + node[i].id, node[i].xlow, node[i].xhigh, node[i].ylow, node[i].yhigh)) + rtree.write("{}, [{}, {}, {}, {}]]]]".format( + node[-1].id, node[-1].xlow, node[-1].xhigh, node[-1].ylow, node[-1].yhigh)) + rtree.write("\n") + numNode += 1 + leaves = 1 + +# +def makeRtree(collection): + level = [] # list of levels(of nodes) of nodes(of mbrs) of mbrs(of points) + nodeId = 0 + rank = 0 + # Until we reach the root node + while len(collection) > 1: + # Split the mbr collection into nodes of 20 + nodes = [collection[x:x+20] for x in range(0, len(collection), 20)] + + # If the last node has less than 8 mbrs, fill with mbrs of the previous + balance = 8 - len(nodes[-1]) + if balance > 0 and len(nodes) > 1: + migrate = len(nodes[-2]) + nodes[-1] = nodes[-2][migrate-balance:] + nodes[-1] + nodes[-2] = nodes[-2][:migrate-balance] + + # Make the new MBRs based on the corners of each 20-piece + collection = [] + for node in nodes: #for every 20 MBRs in 500 MBRs + collection.append(createMBR(nodeId, node)) + nodeId += 1 + + level.append(nodes) + print("{} nodes at level {}".format(len(nodes), rank)) + rank += 1 + + return level + +if __name__ == '__main__': + # Read the coordinates of the polygons points + set_of_points = inputReader(filename1=sys.argv[1], filename2=sys.argv[2]) + + # Create the mbrs of the polygons + mbrs = [] + for sp in set_of_points: + mbrs.append(findMBR(sp[0], sp[1])) + + # Sort the mbrs based on the Z curve + mbrs.sort(key=lambda ld: ld.zcurve) + + # Make the Rtree + rTree = makeRtree(mbrs) + outputWriter("Rtree.txt", rTree) diff --git a/knnqueries.py b/knnqueries.py new file mode 100644 index 0000000..a86e2ef --- /dev/null +++ b/knnqueries.py @@ -0,0 +1,116 @@ +import sys +import ast +import heapq as hq +from math import sqrt + +class Point: + def __init__(self, x, y): + self.x = x + self.y = y + +class MBR: + def __init__(self, id, xlow, xhigh, ylow, yhigh): + self.xlow = xlow + self.xhigh = xhigh + self.ylow = ylow + self.yhigh = yhigh + self.id = id + self.obj = False + + def setDistance(self, qp): + self.distance = self.findDistance(qp) + + def findDistance(self, qp): + dx = dy = 0 + if qp.x < self.xlow: + dx = self.xlow - qp.x + elif qp.x > self.xhigh: + dx = qp.x - self.xhigh + else: + dx = 0 + + if qp.y < self.ylow: + dy = self.ylow - qp.y + elif qp.y > self.yhigh: + dy = qp.y - self.yhigh + else: + dy = 0 + return sqrt(dx**2 + dy**2) + + def makeObject(self): + self.obj = True + + def __lt__(self, other): + if self.distance < other.distance: + return True + return False + +class Node: + def __init__(self, leaf, id, list_of_mbrs): + self.leaf = leaf + self.id = id + self.list_of_mbrs = list_of_mbrs + +def parseRtree(filename): + with open(filename, 'r') as f: + nodes = [] + for line in f: + node = ast.literal_eval(line) + mbrs = [] + for mbr in node[2]: + mbrs.append(MBR(int(mbr[0]), float(mbr[1][0]), float(mbr[1][1]), + float(mbr[1][2]), float(mbr[1][3]))) + nodes.append(Node(node[0], node[1], mbrs)) + + return nodes + +# Read and execute all the NN queries assigned to the r tree +def parseQuery(rtree, filename, k): + with open(filename, 'r') as f: + num = 0 + for line in f: + [x, y] = line.split() + findKNN(rtree, Point(float(x), float(y)), k, num) + num += 1 + +# Search for the neareset k neighboors +def findKNN(rtree, qp, k, num): + # List to put our k nn objects + results = [] + # Initialize a priority queue + pq = [] + # Put the root nodes into the priority queue + for mbr in rtree[-1].list_of_mbrs: + mbr.setDistance(qp) + hq.heappush(pq, mbr) + + while len(pq) > 0 and len(results) < k: + # Retrieve the next node and remove it from the queue + e = hq.heappop(pq) + + if e.obj == True: + hq.heappush(results, e) + continue + + # At a leaf node + if rtree[e.id].leaf == 0: + for mbr in rtree[e.id].list_of_mbrs: + mbr.setDistance(qp) + mbr.makeObject() + hq.heappush(pq, mbr) + # At an intermediate node + else: + for node in rtree[e.id].list_of_mbrs: + node.setDistance(qp) + hq.heappush(pq, node) + + print("{}:".format(num), end=' ') + for r in range(k-1): + nn = hq.heappop(results) + print("{},".format(nn.id), end='') + nn = hq.heappop(results) + print(nn.id) + +if __name__ == '__main__': + rtree = parseRtree(sys.argv[1]) + parseQuery(rtree, sys.argv[2], int(sys.argv[3])) diff --git a/rangequeries.py b/rangequeries.py new file mode 100644 index 0000000..1df8905 --- /dev/null +++ b/rangequeries.py @@ -0,0 +1,78 @@ +import sys +import ast + +class MBR: + def __init__(self, id, xlow, xhigh, ylow, yhigh): + self.xlow = xlow + self.xhigh = xhigh + self.ylow = ylow + self.yhigh = yhigh + self.id = id + + def intersects(self, other): + if self.xlow > other.xhigh or self.xhigh < other.xlow: + return False + if self.ylow > other.yhigh or self.yhigh < other.ylow: + return False + return True + +class Node: + def __init__(self, leaf, id, list_of_mbrs): + self.leaf = leaf + self.id = id + self.list_of_mbrs = list_of_mbrs + +def parseRtree(filename): + with open(filename, 'r') as f: + nodes = [] + for line in f: + node = ast.literal_eval(line) + mbrs = [] + for mbr in node[2]: + mbrs.append(MBR(int(mbr[0]), float(mbr[1][0]), float(mbr[1][1]), + float(mbr[1][2]), float(mbr[1][3]))) + nodes.append(Node(node[0], node[1], mbrs)) + + return nodes + +# Read and execute all the range queries assigned to the r tree +def parseQuery(rtree, filename): + with open(filename, 'r') as f: + i = 0 + for line in f: + [xlow, ylow, xhigh, yhigh] = line.split() + results = [] + rangeQuery(rtree, MBR(int(i), float(xlow), float(xhigh), + float(ylow), float(yhigh)), results) + + print("{} ({}):".format(i, len(results)), end=' ') + for r in range(len(results) - 1): + print("{},".format(results[r].id), end='') + if len(results) != 0: + print(results[-1].id) + else: + print(' ') + i += 1 + +# Find the intersecting rectangles +def rangeQuery(rtree, window, results, node=None): + # At the root node + if node == None: + node = rtree[-1] + for n in node.list_of_mbrs: + if window.intersects(n) or n.intersects(window): + rangeQuery(rtree, window, results, rtree[n.id]) + # At an intermediate node + elif node.leaf == 1: + for n in node.list_of_mbrs: + if window.intersects(n) or n.intersects(window): + rangeQuery(rtree, window, results, rtree[n.id]) + # At a leaf node + else: + for mbr in node.list_of_mbrs: + if window.intersects(mbr) or mbr.intersects(window): + results.append(mbr) + +if __name__ == '__main__': + rtree = parseRtree(sys.argv[1]) + parseQuery(rtree, sys.argv[2])