def match_page()

in processing/processing/text_merge.py [0:0]


def match_page(words: Dict[str, Any], page: Page) -> MatchedPage:
    """Match words bboxes to annotation bboxes."""
    matched_page = MatchedPage(page_num=page.page_num, paragraph_bboxes={})
    for raw_word_bbox in words["objs"]:
        if raw_word_bbox["type"] != "text":
            continue
        word_bbox = BorderBox(
            raw_word_bbox["bbox"][0],
            raw_word_bbox["bbox"][1],
            raw_word_bbox["bbox"][2],
            raw_word_bbox["bbox"][3],
        )

        for paragraph in page.objs:
            if word_bbox.box_is_inside_box(
                BorderBox(*(int(i) for i in paragraph["bbox"])), threshold=0.8
            ):
                nest_bbox = {
                    "x1": word_bbox.top_left_x,
                    "y1": word_bbox.top_left_y,
                    "x2": word_bbox.bottom_right_x,
                    "y2": word_bbox.bottom_right_y,
                    "text": raw_word_bbox["text"],
                }

                matched_page.paragraph_bboxes.setdefault(
                    paragraph["id"],
                    ParagraphBbox(bbox=paragraph["bbox"], nested_bboxes=[]),
                ).nested_bboxes.append(nest_bbox)
                break
    return matched_page