Bases: SemanticKGConstructionBase
It will query the
- project description
- content of the pdf (total text)
- current entity type list
And then ask the LLM to generate the entity types.
Also, we will combine generated entity types with the current entity type list.
And update the entity type list within the folder
Source code in Docs2KG/kg_construction/semantic_kg/ontology/entity_type_llm.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120 | class EntityTypesLLMGenerator(SemanticKGConstructionBase):
"""
It will query the
- project description
- content of the pdf (total text)
- current entity type list
And then ask the LLM to generate the entity types.
Also, we will combine generated entity types with the current entity type list.
And update the entity type list within the folder
"""
def __init__(
self, project_id: str, agent_name="phi3.5", agent_type="ollama", **kwargs
):
super().__init__(project_id)
self.ontology_agent = AgentManager(agent_name, agent_type, **kwargs)
# first load project description
self.project_description = self.load_project_description()
self.load_entity_type()
@staticmethod
def load_project_description():
project_description_path = Path(PROJECT_CONFIG.semantic_kg.domain_description)
if not project_description_path.exists():
raise FileNotFoundError(
f"Project description not found at {project_description_path}"
)
with open(project_description_path, "r") as file:
project_description = file.read()
return project_description
def generate_entity_types(
self,
content: Optional[str] = None,
):
prompt = f"""You are a expert to generate entity types based on the following project description:
'{self.project_description}'
and the content of the pdf:
'{content}'
The current entity types are:
{self.entity_type_list}
Please generate some new related entity types based on the information above
**mainly based on the content of pdf**
Do not generate repeated entity types.
Generated entity type should be short, concise, representative and concise.
Return in JSON format with key entity_types,
and value as a list of entity types which will be a string of the entity type, separated by comma.
If the current entity types already cover most of the entities, you can return an empty list.
"""
response = self.ontology_agent.process_input(prompt, reset_session=True)
res_json_str = response["response"]
logger.debug(f"LLM response: {res_json_str}")
new_entity_types = self.extract_entity_types(res_json_str)
logger.critical(f"New entity types: {new_entity_types}")
return new_entity_types
@staticmethod
def extract_entity_types(res_json_str):
try:
res_json_str = res_json_str.strip()
res_json = json.loads(res_json_str)
entity_types_list_str = res_json.get("entity_types", "")
if isinstance(entity_types_list_str, list):
entity_types = entity_types_list_str
else:
entity_types_list_str = entity_types_list_str.strip()
entity_types = entity_types_list_str.split(",")
entity_types = [entity_type.strip() for entity_type in entity_types]
return entity_types
except Exception as e:
logger.error(f"Failed to extract entity types from response: {str(e)}")
return None
def construct_ontology(self):
new_entity_types = self.generate_entity_types()
logger.critical(f"New entity types: {new_entity_types}")
if new_entity_types:
self.update_ontology(new_entity_types)
@staticmethod
def update_ontology(new_entity_types):
ontology_json_path = Path(PROJECT_CONFIG.semantic_kg.ontology)
if not ontology_json_path.exists():
logger.warning(f"Ontology json not found at {ontology_json_path}")
ontology_entity_types = []
else:
with timer(logger, "Loading ontology json"):
with open(ontology_json_path, "r") as f:
ontology_json = json.load(f)
logger.info(f"Ontology json: {ontology_json}")
ontology = Ontology(**ontology_json)
ontology_entity_types = ontology.entity_types
new_entity_types = list(set(new_entity_types) | set(ontology_entity_types))
ontology.entity_types = new_entity_types
json_str = ontology.model_dump_json()
with open(ontology_json_path, "w") as f:
f.write(json_str)
|