Skip to content

Rect

BlockFinder

Source code in Docs2KG/utils/rect.py
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
class BlockFinder:
    @classmethod
    def find_closest_blocks(
        cls,
        block_rect: str,
        text_blocks: List[str],
    ) -> dict:
        """
        Find the closest text block to the image block

        TODO:
        - Based on how large it close to the margin, we can decide whether it is a important image or not
        - Based on the size of the image, we can determine whether we need the left, right information

        Args:
            block_rect (str): The bounding box of the image block as a string
            text_blocks (List[str]): The bounding boxes of the text blocks as strings

        Returns:
            dict: The index of the closest text block to the image block in each direction
        """
        left_index = None
        right_index = None
        above_index = None
        below_index = None
        distances = []
        for i, text_block in enumerate(text_blocks):
            distance = cls.bbox_distance(block_rect, text_block)
            distance["index"] = i
            distance["bbox"] = text_block
            distances.append(distance)

        df = pd.DataFrame(distances)

        # for all the text blocks on top, get the closest one
        # get above one and also the abs(min_vertical_distance) is > 1
        above_blocks = df[df["position_vertical"] == "above"].copy(deep=True)
        # get is all value to be abs
        above_blocks["min_vertical_distance"] = above_blocks[
            "min_vertical_distance"
        ].apply(abs)
        above_blocks = above_blocks[abs(above_blocks["min_vertical_distance"]) > 1]

        if not above_blocks.empty:
            # if it is above it, then the closest one is the one with the highest vertical distance as the value is <0
            closest_above_block = above_blocks[
                above_blocks["min_vertical_distance"]
                == above_blocks["min_vertical_distance"].min()
            ]
            above_index = closest_above_block["index"].values[0]

        # for all the text blocks on bottom, get the closest one
        below_blocks = df[df["position_vertical"] == "below"].copy(deep=True)
        # get is all value to be abs
        below_blocks["min_vertical_distance"] = below_blocks[
            "min_vertical_distance"
        ].apply(abs)
        below_blocks = below_blocks[abs(below_blocks["min_vertical_distance"]) > 1]
        if not below_blocks.empty:
            # if it is below it, then the closest one is the one with the lowest vertical distance as the value is >0
            closest_below_block = below_blocks[
                below_blocks["min_vertical_distance"]
                == below_blocks["min_vertical_distance"].min()
            ]
            below_index = closest_below_block["index"].values[0]

        # for all the text blocks on left, get the closest one
        left_blocks = df[df["position_horizontal"] == "left"].copy(deep=True)
        # get is all value to be abs
        left_blocks["min_horizontal_distance"] = left_blocks[
            "min_horizontal_distance"
        ].apply(abs)
        left_blocks = left_blocks[abs(left_blocks["min_horizontal_distance"]) > 1]
        if not left_blocks.empty:
            closest_left_block = left_blocks[
                left_blocks["min_horizontal_distance"]
                == left_blocks["min_horizontal_distance"].min()
            ]
            left_index = closest_left_block["index"].values[0]

        # for all the text blocks on right, get the closest one
        right_blocks = df[df["position_horizontal"] == "right"].copy(deep=True)
        # get is all value to be abs
        right_blocks["min_horizontal_distance"] = right_blocks[
            "min_horizontal_distance"
        ].apply(abs)
        right_blocks = right_blocks[abs(right_blocks["min_horizontal_distance"]) > 1]
        if not right_blocks.empty:
            closest_right_block = right_blocks[
                right_blocks["min_horizontal_distance"]
                == right_blocks["min_horizontal_distance"].min()
            ]
            right_index = closest_right_block["index"].values[0]

        return {
            "left": left_index,
            "right": right_index,
            "above": above_index,
            "below": below_index,
        }

    @classmethod
    def parse_bbox(cls, s):
        """Parse a bounding box string into a tuple of floats."""
        return tuple(map(float, ast.literal_eval(s)))

    @classmethod
    def bbox_distance(cls, bbox_a, bbox_b) -> dict:
        """
        Calculate the minimum distance between two bounding boxes.
        The (0,0) point is the top-left corner of the image.

        So within the bbox

        x0, y0 = top-left corner
        x1, y1 = bottom-right corner
        x0 <= x1
        y0 <= y1

        Args:
            bbox_a:
            bbox_b:

        Returns:

        """
        bbox_a = cls.parse_bbox(bbox_a)
        bbox_b = cls.parse_bbox(bbox_b)

        x0_a, y0_a, x1_a, y1_a = bbox_a
        x0_b, y0_b, x1_b, y1_b = bbox_b

        # if both x0_a and x1_a are less than x0_b or x1_b
        # this means that bbox_a is to the left of bbox_b
        # if both y0_a and y1_a are less than y0_b or y1_b
        # this means that bbox_a is above bbox_b
        horizontal_distance = [x0_a - x0_b, x1_a - x0_b, x0_a - x1_b, x1_a - x1_b]
        vertical_distance = [y0_a - y1_b, y1_a - y0_b, y0_a - y0_b, y1_a - y1_b]
        abs_horizontal_distance = [abs(item) for item in horizontal_distance]
        abs_vertical_distance = [abs(item) for item in vertical_distance]

        # get the mini distance and then find their real value, keep the sign
        min_horizontal_distance = min(abs_horizontal_distance)
        min_vertical_distance = min(abs_vertical_distance)

        # get the index
        min_horizontal_distance_index = abs_horizontal_distance.index(
            min_horizontal_distance
        )
        min_vertical_distance_index = abs_vertical_distance.index(min_vertical_distance)

        # get the real value
        min_horizontal_distance_value = horizontal_distance[
            min_horizontal_distance_index
        ]
        min_vertical_distance_value = vertical_distance[min_vertical_distance_index]

        # whether it is left or right, above or below will be control by the
        # angle of the center of the two bounding boxes
        center_a = ((x0_a + x1_a) / 2, (y0_a + y1_a) / 2)
        center_b = ((x0_b + x1_b) / 2, (y0_b + y1_b) / 2)

        # use bbox_a as the reference point
        center_x_diff = center_a[0] - center_b[0]
        center_y_diff = center_a[1] - center_b[1]

        # if center_x_diff > 0, then mean bbox_a is to the right of bbox_b, so bbox_b is on the left
        position_horizontal = "left" if center_x_diff > 0 else "right"
        # if center_y_diff > 0, then mean bbox_a is below bbox_b, so bbox_b is above
        position_vertical = "above" if center_y_diff > 0 else "below"
        return {
            "min_horizontal_distance": min_horizontal_distance_value,
            "min_vertical_distance": min_vertical_distance_value,
            "min_distance": min(min_horizontal_distance, min_vertical_distance),
            "position_horizontal": position_horizontal,
            "position_vertical": position_vertical,
        }

bbox_distance(bbox_a, bbox_b) classmethod

Calculate the minimum distance between two bounding boxes. The (0,0) point is the top-left corner of the image.

So within the bbox

x0, y0 = top-left corner x1, y1 = bottom-right corner x0 <= x1 y0 <= y1

Parameters:

Name Type Description Default
bbox_a
required
bbox_b
required

Returns:

Source code in Docs2KG/utils/rect.py
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
@classmethod
def bbox_distance(cls, bbox_a, bbox_b) -> dict:
    """
    Calculate the minimum distance between two bounding boxes.
    The (0,0) point is the top-left corner of the image.

    So within the bbox

    x0, y0 = top-left corner
    x1, y1 = bottom-right corner
    x0 <= x1
    y0 <= y1

    Args:
        bbox_a:
        bbox_b:

    Returns:

    """
    bbox_a = cls.parse_bbox(bbox_a)
    bbox_b = cls.parse_bbox(bbox_b)

    x0_a, y0_a, x1_a, y1_a = bbox_a
    x0_b, y0_b, x1_b, y1_b = bbox_b

    # if both x0_a and x1_a are less than x0_b or x1_b
    # this means that bbox_a is to the left of bbox_b
    # if both y0_a and y1_a are less than y0_b or y1_b
    # this means that bbox_a is above bbox_b
    horizontal_distance = [x0_a - x0_b, x1_a - x0_b, x0_a - x1_b, x1_a - x1_b]
    vertical_distance = [y0_a - y1_b, y1_a - y0_b, y0_a - y0_b, y1_a - y1_b]
    abs_horizontal_distance = [abs(item) for item in horizontal_distance]
    abs_vertical_distance = [abs(item) for item in vertical_distance]

    # get the mini distance and then find their real value, keep the sign
    min_horizontal_distance = min(abs_horizontal_distance)
    min_vertical_distance = min(abs_vertical_distance)

    # get the index
    min_horizontal_distance_index = abs_horizontal_distance.index(
        min_horizontal_distance
    )
    min_vertical_distance_index = abs_vertical_distance.index(min_vertical_distance)

    # get the real value
    min_horizontal_distance_value = horizontal_distance[
        min_horizontal_distance_index
    ]
    min_vertical_distance_value = vertical_distance[min_vertical_distance_index]

    # whether it is left or right, above or below will be control by the
    # angle of the center of the two bounding boxes
    center_a = ((x0_a + x1_a) / 2, (y0_a + y1_a) / 2)
    center_b = ((x0_b + x1_b) / 2, (y0_b + y1_b) / 2)

    # use bbox_a as the reference point
    center_x_diff = center_a[0] - center_b[0]
    center_y_diff = center_a[1] - center_b[1]

    # if center_x_diff > 0, then mean bbox_a is to the right of bbox_b, so bbox_b is on the left
    position_horizontal = "left" if center_x_diff > 0 else "right"
    # if center_y_diff > 0, then mean bbox_a is below bbox_b, so bbox_b is above
    position_vertical = "above" if center_y_diff > 0 else "below"
    return {
        "min_horizontal_distance": min_horizontal_distance_value,
        "min_vertical_distance": min_vertical_distance_value,
        "min_distance": min(min_horizontal_distance, min_vertical_distance),
        "position_horizontal": position_horizontal,
        "position_vertical": position_vertical,
    }

find_closest_blocks(block_rect, text_blocks) classmethod

Find the closest text block to the image block

TODO: - Based on how large it close to the margin, we can decide whether it is a important image or not - Based on the size of the image, we can determine whether we need the left, right information

Parameters:

Name Type Description Default
block_rect str

The bounding box of the image block as a string

required
text_blocks List[str]

The bounding boxes of the text blocks as strings

required

Returns:

Name Type Description
dict dict

The index of the closest text block to the image block in each direction

Source code in Docs2KG/utils/rect.py
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
@classmethod
def find_closest_blocks(
    cls,
    block_rect: str,
    text_blocks: List[str],
) -> dict:
    """
    Find the closest text block to the image block

    TODO:
    - Based on how large it close to the margin, we can decide whether it is a important image or not
    - Based on the size of the image, we can determine whether we need the left, right information

    Args:
        block_rect (str): The bounding box of the image block as a string
        text_blocks (List[str]): The bounding boxes of the text blocks as strings

    Returns:
        dict: The index of the closest text block to the image block in each direction
    """
    left_index = None
    right_index = None
    above_index = None
    below_index = None
    distances = []
    for i, text_block in enumerate(text_blocks):
        distance = cls.bbox_distance(block_rect, text_block)
        distance["index"] = i
        distance["bbox"] = text_block
        distances.append(distance)

    df = pd.DataFrame(distances)

    # for all the text blocks on top, get the closest one
    # get above one and also the abs(min_vertical_distance) is > 1
    above_blocks = df[df["position_vertical"] == "above"].copy(deep=True)
    # get is all value to be abs
    above_blocks["min_vertical_distance"] = above_blocks[
        "min_vertical_distance"
    ].apply(abs)
    above_blocks = above_blocks[abs(above_blocks["min_vertical_distance"]) > 1]

    if not above_blocks.empty:
        # if it is above it, then the closest one is the one with the highest vertical distance as the value is <0
        closest_above_block = above_blocks[
            above_blocks["min_vertical_distance"]
            == above_blocks["min_vertical_distance"].min()
        ]
        above_index = closest_above_block["index"].values[0]

    # for all the text blocks on bottom, get the closest one
    below_blocks = df[df["position_vertical"] == "below"].copy(deep=True)
    # get is all value to be abs
    below_blocks["min_vertical_distance"] = below_blocks[
        "min_vertical_distance"
    ].apply(abs)
    below_blocks = below_blocks[abs(below_blocks["min_vertical_distance"]) > 1]
    if not below_blocks.empty:
        # if it is below it, then the closest one is the one with the lowest vertical distance as the value is >0
        closest_below_block = below_blocks[
            below_blocks["min_vertical_distance"]
            == below_blocks["min_vertical_distance"].min()
        ]
        below_index = closest_below_block["index"].values[0]

    # for all the text blocks on left, get the closest one
    left_blocks = df[df["position_horizontal"] == "left"].copy(deep=True)
    # get is all value to be abs
    left_blocks["min_horizontal_distance"] = left_blocks[
        "min_horizontal_distance"
    ].apply(abs)
    left_blocks = left_blocks[abs(left_blocks["min_horizontal_distance"]) > 1]
    if not left_blocks.empty:
        closest_left_block = left_blocks[
            left_blocks["min_horizontal_distance"]
            == left_blocks["min_horizontal_distance"].min()
        ]
        left_index = closest_left_block["index"].values[0]

    # for all the text blocks on right, get the closest one
    right_blocks = df[df["position_horizontal"] == "right"].copy(deep=True)
    # get is all value to be abs
    right_blocks["min_horizontal_distance"] = right_blocks[
        "min_horizontal_distance"
    ].apply(abs)
    right_blocks = right_blocks[abs(right_blocks["min_horizontal_distance"]) > 1]
    if not right_blocks.empty:
        closest_right_block = right_blocks[
            right_blocks["min_horizontal_distance"]
            == right_blocks["min_horizontal_distance"].min()
        ]
        right_index = closest_right_block["index"].values[0]

    return {
        "left": left_index,
        "right": right_index,
        "above": above_index,
        "below": below_index,
    }

parse_bbox(s) classmethod

Parse a bounding box string into a tuple of floats.

Source code in Docs2KG/utils/rect.py
108
109
110
111
@classmethod
def parse_bbox(cls, s):
    """Parse a bounding box string into a tuple of floats."""
    return tuple(map(float, ast.literal_eval(s)))