Skip to content

Json2triplets

JSON2Triplets

Convert JSON to triplets

A JSON for all nodes:

{ "nodes": [ { "uuid": uuid1 "labels": ["label1", "label2"], "properties": { "prop1": "value1", "prop2": "value2" } }, { "uuid": uuid2 "labels": ["label3"], "properties": { "prop3": "value3", "prop4": "value4" } } ], "relationships": [ { "start_node": uuid1, "end_node": uuid2, "type": "type1", "properties": { "prop5": "value5", "prop6": "value6" } } ] }

Source code in Docs2KG/kg/utils/json2triplets.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
class JSON2Triplets:
    """
    Convert JSON to triplets

    A JSON for all nodes:

    {
        "nodes": [
            {
                "uuid": uuid1
                "labels": ["label1", "label2"],
                "properties": {
                    "prop1": "value1",
                    "prop2": "value2"
                }
            },
            {
                "uuid": uuid2
                "labels": ["label3"],
                "properties": {
                    "prop3": "value3",
                    "prop4": "value4"
                }
            }
        ],
        "relationships": [
            {
                "start_node": uuid1,
                "end_node": uuid2,
                "type": "type1",
                "properties": {
                    "prop5": "value5",
                    "prop6": "value6"
                }
            }
        ]
    }

    """

    def __init__(self, folder_path: Path):
        self.folder_path = folder_path
        self.kg_folder = folder_path / "kg"
        self.kg_json = self.load_kg()
        self.triplets_json = {"nodes": [], "relationships": []}
        self.entities_mapping = {}

    def transform(self):
        """
        Transform the JSON to triplets
        """

        self.transform_node(self.kg_json)

        self.transform_images()
        self.transform_tables()
        self.transform_text2kg(self.kg_json)

        self.export_json()

    def transform_node(self, node: dict, parent_uuid: str = None):
        """
        Transform the node to triplets

        For the relationship part.

        Also need to consider the relation within the for loop.

        Before and After

        Args:
            node (dict): The node
            parent_uuid (str): The UUID of the node

        Returns:

        """
        labels = [node["node_type"].upper()]
        uuid = node["uuid"]
        properties = node["node_properties"]
        # deep copy the properties
        copied_properties = self.clean_nested_properties(properties)
        entity = {"uuid": uuid, "labels": labels, "properties": copied_properties}
        self.triplets_json["nodes"].append(entity)
        rel = {
            "start_node": parent_uuid,
            "end_node": uuid,
            "type": "HAS_CHILD",
        }
        self.triplets_json["relationships"].append(rel)
        for index, child in enumerate(node["children"]):
            # if the children is text_block, then stop here
            before_node_uuid = None
            if index > 0:
                before_node_uuid = node["children"][index - 1]["uuid"]

            if before_node_uuid is not None:
                before_rel = {
                    "start_node": before_node_uuid,
                    "end_node": child["uuid"],
                    "type": "BEFORE",
                }
                self.triplets_json["relationships"].append(before_rel)

            if child["node_type"] == "text_block":
                continue
            self.transform_node(child, parent_uuid=uuid)

    @staticmethod
    def clean_nested_properties(properties: dict):
        """
        Clean the nested properties
        Args:
            properties:

        Returns:

        """
        copied_properties = deepcopy(properties)
        if "text2kg" in copied_properties:
            copied_properties.pop("text2kg")
        return copied_properties

    def transform_images(self):
        """
        Connect the image to nearby text
        """
        for page in self.kg_json["children"]:
            for node in page["children"]:
                if node["node_type"] == "image":
                    image_uuid = node["uuid"]
                    # add text_block node and relationship
                    # first add where the image is mentioned
                    mentioned_in = node["node_properties"].get("mentioned_in", [])
                    for mention_uuid in mentioned_in:
                        mention_rel = {
                            "start_node": image_uuid,
                            "end_node": mention_uuid,
                            "type": "MENTIONED_IN",
                        }
                        self.triplets_json["relationships"].append(mention_rel)
                    if "children" not in node:
                        continue
                    # then add the nearby text block
                    for child in node["children"]:
                        if child["node_type"] == "text_block":
                            text_block_uuid = child["uuid"]
                            copied_properties = self.clean_nested_properties(
                                child["node_properties"]
                            )
                            self.triplets_json["nodes"].append(
                                {
                                    "uuid": text_block_uuid,
                                    "labels": ["TEXT_BLOCK"],
                                    "properties": copied_properties,
                                }
                            )

                            rel = {
                                "start_node": image_uuid,
                                "end_node": text_block_uuid,
                                "type": "NEARBY_TEXT",
                            }
                            self.triplets_json["relationships"].append(rel)
                            # include where the text block belong to the tree
                            text_block_linkage = child.get("linkage", [])
                            for linkage_uuid in text_block_linkage:
                                linkage_rel = {
                                    "start_node": text_block_uuid,
                                    "end_node": linkage_uuid,
                                    "type": "TEXT_LINKAGE",
                                }
                                self.triplets_json["relationships"].append(linkage_rel)

    def transform_tables(self):
        """
        This is to transform the text into a format can be used in neo4j, etc.
        Returns:

        """

        for page in self.kg_json["children"]:
            for node in page["children"]:
                if node["node_type"] == "table_csv":
                    table_uuid = node["uuid"]
                    # add text_block node and relationship
                    # first add where the table is mentioned
                    mentioned_in = node["node_properties"].get("mentioned_in", [])
                    for mention_uuid in mentioned_in:
                        mention_rel = {
                            "start_node": table_uuid,
                            "end_node": mention_uuid,
                            "type": "MENTIONED_IN",
                        }
                        self.triplets_json["relationships"].append(mention_rel)
                    if "children" not in node:
                        continue
                    # then add the nearby text block
                    for child in node["children"]:
                        if child["node_type"] == "text_block":
                            text_block_uuid = child["uuid"]
                            copied_properties = self.clean_nested_properties(
                                child["node_properties"]
                            )
                            self.triplets_json["nodes"].append(
                                {
                                    "uuid": text_block_uuid,
                                    "labels": ["TEXT_BLOCK"],
                                    "properties": copied_properties,
                                }
                            )

                            rel = {
                                "start_node": table_uuid,
                                "end_node": text_block_uuid,
                                "type": "NEARBY_TEXT",
                            }
                            self.triplets_json["relationships"].append(rel)
                            # include where the text block belong to the tree
                            text_block_linkage = child.get("linkage", [])
                            for linkage_uuid in text_block_linkage:
                                linkage_rel = {
                                    "start_node": text_block_uuid,
                                    "end_node": linkage_uuid,
                                    "type": "TEXT_LINKAGE",
                                }
                                self.triplets_json["relationships"].append(linkage_rel)

    def transform_text2kg(self, node: dict):
        """

        Loop through the kg, and then figure out the Text2KG part, get them into the triplets

        However, before that we will need to give each Text2KG node an uuid
        And if they are the same content, they should have the same uuid

        Returns:

        """
        for child in node["children"]:
            if "children" in child:
                self.transform_text2kg(child)
            text2kg_list = child["node_properties"].get("text2kg", [])
            if len(text2kg_list) == 0:
                continue
            for text2kg in text2kg_list:

                subject = text2kg.get("subject", None)
                subject_ner_type = text2kg.get("subject_ner_type", None)
                predicate = text2kg.get("predicate", None)
                object_ent = text2kg.get("object", None)
                object_ner_type = text2kg.get("object_ner_type", None)
                if any(
                    [
                        subject is None,
                        predicate is None,
                        object_ent is None,
                        subject_ner_type is None,
                        object_ner_type is None,
                        subject == "",
                        object_ent == "",
                        predicate == "",
                    ]
                ):
                    continue
                # strip the text and then clean again
                subject = subject.strip()
                object_ent = object_ent.strip()
                predicate = predicate.strip()
                subject_ner_type = subject_ner_type.strip()
                object_ner_type = object_ner_type.strip()
                predicate = "".join([i for i in predicate if i.isalnum() or i == " "])
                # should not start with number
                if predicate and predicate[0].isdigit():
                    continue
                predicate = predicate.replace(" ", "_")
                if any(
                    [
                        subject == "",
                        object_ent == "",
                        predicate == "",
                        subject_ner_type == "",
                        object_ner_type == "",
                    ]
                ):
                    continue
                logger.info(f"Text2KG: {text2kg}")
                # check if the subject is in the entities_mapping
                if subject not in self.entities_mapping:
                    self.entities_mapping[subject] = str(uuid4())
                    # add the subject rel to the parent
                    self.triplets_json["relationships"].append(
                        {
                            "start_node": node["uuid"],
                            "end_node": self.entities_mapping[subject],
                            "type": "HAS_ENTITY",
                        }
                    )
                if object_ent not in self.entities_mapping:
                    self.entities_mapping[object_ent] = str(uuid4())
                    # add the object rel to the parent
                    self.triplets_json["relationships"].append(
                        {
                            "start_node": node["uuid"],
                            "end_node": self.entities_mapping[object_ent],
                            "type": "HAS_ENTITY",
                        }
                    )
                subject_uuid = self.entities_mapping[subject]
                object_uuid = self.entities_mapping[object_ent]
                # add the subject
                subject_ner_type = "".join(
                    [i for i in subject_ner_type if i.isalnum() or i == " "]
                )
                subject_ner_type = subject_ner_type.replace(" ", "_")
                # object_ner_type and subject_ner_type can not start with number
                if subject_ner_type and subject_ner_type[0].isdigit():
                    continue
                self.triplets_json["nodes"].append(
                    {
                        "uuid": subject_uuid,
                        "labels": [
                            "ENTITY",
                            subject_ner_type.upper(),
                            "TEXT2KG",
                        ],
                        "properties": {"text": subject},
                    }
                )
                # add the object
                # replace object_ner_type, clean all special characters, only keep the letters and numbers
                object_ner_type = "".join(
                    [i for i in object_ner_type if i.isalnum() or i == " "]
                )
                object_ner_type = object_ner_type.replace(" ", "_")
                if object_ner_type and object_ner_type[0].isdigit():
                    continue
                self.triplets_json["nodes"].append(
                    {
                        "uuid": object_uuid,
                        "labels": [
                            "ENTITY",
                            object_ner_type.upper(),
                            "TEXT2KG",
                        ],
                        "properties": {"text": object_ent},
                    }
                )
                # do same to the predicate

                # add the relationship
                rel = {
                    "start_node": subject_uuid,
                    "end_node": object_uuid,
                    "type": predicate,
                    "properties": {"source": "TEXT2KG"},
                }
                self.triplets_json["relationships"].append(rel)

    def load_kg(self) -> dict:
        """
        Load the layout knowledge graph from JSON
        """
        with open(self.kg_folder / "layout_kg.json", "r") as f:
            kg_json = json.load(f)
        return kg_json

    def export_json(self):
        """
        Export the triplets JSON
        """
        # how many nodes
        logger.info(f"Number of nodes: {len(self.triplets_json['nodes'])}")
        # how many relationships
        logger.info(
            f"Number of relationships: {len(self.triplets_json['relationships'])}"
        )
        with open(self.kg_folder / "triplets_kg.json", "w") as f:
            json.dump(self.triplets_json, f, indent=4)
        logger.info(f"Triplets JSON exported to {self.kg_folder / 'triplets_kg.json'}")

clean_nested_properties(properties) staticmethod

Clean the nested properties Args: properties:

Returns:

Source code in Docs2KG/kg/utils/json2triplets.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
@staticmethod
def clean_nested_properties(properties: dict):
    """
    Clean the nested properties
    Args:
        properties:

    Returns:

    """
    copied_properties = deepcopy(properties)
    if "text2kg" in copied_properties:
        copied_properties.pop("text2kg")
    return copied_properties

export_json()

Export the triplets JSON

Source code in Docs2KG/kg/utils/json2triplets.py
378
379
380
381
382
383
384
385
386
387
388
389
390
def export_json(self):
    """
    Export the triplets JSON
    """
    # how many nodes
    logger.info(f"Number of nodes: {len(self.triplets_json['nodes'])}")
    # how many relationships
    logger.info(
        f"Number of relationships: {len(self.triplets_json['relationships'])}"
    )
    with open(self.kg_folder / "triplets_kg.json", "w") as f:
        json.dump(self.triplets_json, f, indent=4)
    logger.info(f"Triplets JSON exported to {self.kg_folder / 'triplets_kg.json'}")

load_kg()

Load the layout knowledge graph from JSON

Source code in Docs2KG/kg/utils/json2triplets.py
370
371
372
373
374
375
376
def load_kg(self) -> dict:
    """
    Load the layout knowledge graph from JSON
    """
    with open(self.kg_folder / "layout_kg.json", "r") as f:
        kg_json = json.load(f)
    return kg_json

transform()

Transform the JSON to triplets

Source code in Docs2KG/kg/utils/json2triplets.py
58
59
60
61
62
63
64
65
66
67
68
69
def transform(self):
    """
    Transform the JSON to triplets
    """

    self.transform_node(self.kg_json)

    self.transform_images()
    self.transform_tables()
    self.transform_text2kg(self.kg_json)

    self.export_json()

transform_images()

Connect the image to nearby text

Source code in Docs2KG/kg/utils/json2triplets.py
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
def transform_images(self):
    """
    Connect the image to nearby text
    """
    for page in self.kg_json["children"]:
        for node in page["children"]:
            if node["node_type"] == "image":
                image_uuid = node["uuid"]
                # add text_block node and relationship
                # first add where the image is mentioned
                mentioned_in = node["node_properties"].get("mentioned_in", [])
                for mention_uuid in mentioned_in:
                    mention_rel = {
                        "start_node": image_uuid,
                        "end_node": mention_uuid,
                        "type": "MENTIONED_IN",
                    }
                    self.triplets_json["relationships"].append(mention_rel)
                if "children" not in node:
                    continue
                # then add the nearby text block
                for child in node["children"]:
                    if child["node_type"] == "text_block":
                        text_block_uuid = child["uuid"]
                        copied_properties = self.clean_nested_properties(
                            child["node_properties"]
                        )
                        self.triplets_json["nodes"].append(
                            {
                                "uuid": text_block_uuid,
                                "labels": ["TEXT_BLOCK"],
                                "properties": copied_properties,
                            }
                        )

                        rel = {
                            "start_node": image_uuid,
                            "end_node": text_block_uuid,
                            "type": "NEARBY_TEXT",
                        }
                        self.triplets_json["relationships"].append(rel)
                        # include where the text block belong to the tree
                        text_block_linkage = child.get("linkage", [])
                        for linkage_uuid in text_block_linkage:
                            linkage_rel = {
                                "start_node": text_block_uuid,
                                "end_node": linkage_uuid,
                                "type": "TEXT_LINKAGE",
                            }
                            self.triplets_json["relationships"].append(linkage_rel)

transform_node(node, parent_uuid=None)

Transform the node to triplets

For the relationship part.

Also need to consider the relation within the for loop.

Before and After

Parameters:

Name Type Description Default
node dict

The node

required
parent_uuid str

The UUID of the node

None

Returns:

Source code in Docs2KG/kg/utils/json2triplets.py
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def transform_node(self, node: dict, parent_uuid: str = None):
    """
    Transform the node to triplets

    For the relationship part.

    Also need to consider the relation within the for loop.

    Before and After

    Args:
        node (dict): The node
        parent_uuid (str): The UUID of the node

    Returns:

    """
    labels = [node["node_type"].upper()]
    uuid = node["uuid"]
    properties = node["node_properties"]
    # deep copy the properties
    copied_properties = self.clean_nested_properties(properties)
    entity = {"uuid": uuid, "labels": labels, "properties": copied_properties}
    self.triplets_json["nodes"].append(entity)
    rel = {
        "start_node": parent_uuid,
        "end_node": uuid,
        "type": "HAS_CHILD",
    }
    self.triplets_json["relationships"].append(rel)
    for index, child in enumerate(node["children"]):
        # if the children is text_block, then stop here
        before_node_uuid = None
        if index > 0:
            before_node_uuid = node["children"][index - 1]["uuid"]

        if before_node_uuid is not None:
            before_rel = {
                "start_node": before_node_uuid,
                "end_node": child["uuid"],
                "type": "BEFORE",
            }
            self.triplets_json["relationships"].append(before_rel)

        if child["node_type"] == "text_block":
            continue
        self.transform_node(child, parent_uuid=uuid)

transform_tables()

This is to transform the text into a format can be used in neo4j, etc. Returns:

Source code in Docs2KG/kg/utils/json2triplets.py
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
def transform_tables(self):
    """
    This is to transform the text into a format can be used in neo4j, etc.
    Returns:

    """

    for page in self.kg_json["children"]:
        for node in page["children"]:
            if node["node_type"] == "table_csv":
                table_uuid = node["uuid"]
                # add text_block node and relationship
                # first add where the table is mentioned
                mentioned_in = node["node_properties"].get("mentioned_in", [])
                for mention_uuid in mentioned_in:
                    mention_rel = {
                        "start_node": table_uuid,
                        "end_node": mention_uuid,
                        "type": "MENTIONED_IN",
                    }
                    self.triplets_json["relationships"].append(mention_rel)
                if "children" not in node:
                    continue
                # then add the nearby text block
                for child in node["children"]:
                    if child["node_type"] == "text_block":
                        text_block_uuid = child["uuid"]
                        copied_properties = self.clean_nested_properties(
                            child["node_properties"]
                        )
                        self.triplets_json["nodes"].append(
                            {
                                "uuid": text_block_uuid,
                                "labels": ["TEXT_BLOCK"],
                                "properties": copied_properties,
                            }
                        )

                        rel = {
                            "start_node": table_uuid,
                            "end_node": text_block_uuid,
                            "type": "NEARBY_TEXT",
                        }
                        self.triplets_json["relationships"].append(rel)
                        # include where the text block belong to the tree
                        text_block_linkage = child.get("linkage", [])
                        for linkage_uuid in text_block_linkage:
                            linkage_rel = {
                                "start_node": text_block_uuid,
                                "end_node": linkage_uuid,
                                "type": "TEXT_LINKAGE",
                            }
                            self.triplets_json["relationships"].append(linkage_rel)

transform_text2kg(node)

Loop through the kg, and then figure out the Text2KG part, get them into the triplets

However, before that we will need to give each Text2KG node an uuid And if they are the same content, they should have the same uuid

Returns:

Source code in Docs2KG/kg/utils/json2triplets.py
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
def transform_text2kg(self, node: dict):
    """

    Loop through the kg, and then figure out the Text2KG part, get them into the triplets

    However, before that we will need to give each Text2KG node an uuid
    And if they are the same content, they should have the same uuid

    Returns:

    """
    for child in node["children"]:
        if "children" in child:
            self.transform_text2kg(child)
        text2kg_list = child["node_properties"].get("text2kg", [])
        if len(text2kg_list) == 0:
            continue
        for text2kg in text2kg_list:

            subject = text2kg.get("subject", None)
            subject_ner_type = text2kg.get("subject_ner_type", None)
            predicate = text2kg.get("predicate", None)
            object_ent = text2kg.get("object", None)
            object_ner_type = text2kg.get("object_ner_type", None)
            if any(
                [
                    subject is None,
                    predicate is None,
                    object_ent is None,
                    subject_ner_type is None,
                    object_ner_type is None,
                    subject == "",
                    object_ent == "",
                    predicate == "",
                ]
            ):
                continue
            # strip the text and then clean again
            subject = subject.strip()
            object_ent = object_ent.strip()
            predicate = predicate.strip()
            subject_ner_type = subject_ner_type.strip()
            object_ner_type = object_ner_type.strip()
            predicate = "".join([i for i in predicate if i.isalnum() or i == " "])
            # should not start with number
            if predicate and predicate[0].isdigit():
                continue
            predicate = predicate.replace(" ", "_")
            if any(
                [
                    subject == "",
                    object_ent == "",
                    predicate == "",
                    subject_ner_type == "",
                    object_ner_type == "",
                ]
            ):
                continue
            logger.info(f"Text2KG: {text2kg}")
            # check if the subject is in the entities_mapping
            if subject not in self.entities_mapping:
                self.entities_mapping[subject] = str(uuid4())
                # add the subject rel to the parent
                self.triplets_json["relationships"].append(
                    {
                        "start_node": node["uuid"],
                        "end_node": self.entities_mapping[subject],
                        "type": "HAS_ENTITY",
                    }
                )
            if object_ent not in self.entities_mapping:
                self.entities_mapping[object_ent] = str(uuid4())
                # add the object rel to the parent
                self.triplets_json["relationships"].append(
                    {
                        "start_node": node["uuid"],
                        "end_node": self.entities_mapping[object_ent],
                        "type": "HAS_ENTITY",
                    }
                )
            subject_uuid = self.entities_mapping[subject]
            object_uuid = self.entities_mapping[object_ent]
            # add the subject
            subject_ner_type = "".join(
                [i for i in subject_ner_type if i.isalnum() or i == " "]
            )
            subject_ner_type = subject_ner_type.replace(" ", "_")
            # object_ner_type and subject_ner_type can not start with number
            if subject_ner_type and subject_ner_type[0].isdigit():
                continue
            self.triplets_json["nodes"].append(
                {
                    "uuid": subject_uuid,
                    "labels": [
                        "ENTITY",
                        subject_ner_type.upper(),
                        "TEXT2KG",
                    ],
                    "properties": {"text": subject},
                }
            )
            # add the object
            # replace object_ner_type, clean all special characters, only keep the letters and numbers
            object_ner_type = "".join(
                [i for i in object_ner_type if i.isalnum() or i == " "]
            )
            object_ner_type = object_ner_type.replace(" ", "_")
            if object_ner_type and object_ner_type[0].isdigit():
                continue
            self.triplets_json["nodes"].append(
                {
                    "uuid": object_uuid,
                    "labels": [
                        "ENTITY",
                        object_ner_type.upper(),
                        "TEXT2KG",
                    ],
                    "properties": {"text": object_ent},
                }
            )
            # do same to the predicate

            # add the relationship
            rel = {
                "start_node": subject_uuid,
                "end_node": object_uuid,
                "type": predicate,
                "properties": {"source": "TEXT2KG"},
            }
            self.triplets_json["relationships"].append(rel)