Skip to content

Base

JSONEncoder

Bases: JSONEncoder

Custom JSON encoder to handle numpy types and other special objects

Source code in Docs2KG/kg_construction/base.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
class JSONEncoder(json.JSONEncoder):
    """Custom JSON encoder to handle numpy types and other special objects"""

    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        if isinstance(obj, np.bool_):
            return bool(obj)
        if isinstance(obj, Path):
            return str(obj)
        if hasattr(obj, "to_json"):
            return obj.to_json()
        if hasattr(obj, "to_dict"):
            return obj.to_dict()
        return super().default(obj)

KGConstructionBase

Source code in Docs2KG/kg_construction/base.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
class KGConstructionBase:
    def __init__(self, project_id: str):
        self.project_id = project_id
        # create and set the project folder
        self.project_folder = PROJECT_CONFIG.data.output_dir / "projects" / project_id
        self.project_folder.mkdir(parents=True, exist_ok=True)

        # create a sub folder for layout kg
        layout_folder = self.project_folder / "layout"
        layout_folder.mkdir(parents=True, exist_ok=True)
        self.layout_folder = layout_folder
        self.entity_type_list = []

    def construct(self, docs):
        raise NotImplementedError

    def export_json(
        self, data: Any, filename: Union[str, Path], ensure_ascii: bool = False
    ) -> Path:
        """
        Export data to a JSON file with improved type handling.

        Args:
            data: The data to export
            filename: Name of the output file
            ensure_ascii: If False, allow non-ASCII characters in output

        Returns:
            Path: Path to the exported file

        Raises:
            IOError: If there's an error writing the file
            TypeError: If an object type cannot be serialized
        """
        try:
            # Ensure filename has .json extension
            if not str(filename).endswith(".json"):
                filename = str(filename) + ".json"

            # Create output directory if it doesn't exist
            self.project_folder.mkdir(parents=True, exist_ok=True)

            output_path = self.project_folder / filename

            with open(output_path, "w", encoding="utf-8") as f:
                json.dump(data, f, cls=JSONEncoder, ensure_ascii=ensure_ascii, indent=4)

            logger.info(f"Successfully exported {filename} to {self.project_folder}")
            return output_path

        except IOError as e:
            logger.error(f"Failed to write file {filename}: {str(e)}")
            raise

        except TypeError as e:
            logger.error(f"Serialization error for {filename}: {str(e)}")
            raise

export_json(data, filename, ensure_ascii=False)

Export data to a JSON file with improved type handling.

Parameters:

Name Type Description Default
data Any

The data to export

required
filename Union[str, Path]

Name of the output file

required
ensure_ascii bool

If False, allow non-ASCII characters in output

False

Returns:

Name Type Description
Path Path

Path to the exported file

Raises:

Type Description
IOError

If there's an error writing the file

TypeError

If an object type cannot be serialized

Source code in Docs2KG/kg_construction/base.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def export_json(
    self, data: Any, filename: Union[str, Path], ensure_ascii: bool = False
) -> Path:
    """
    Export data to a JSON file with improved type handling.

    Args:
        data: The data to export
        filename: Name of the output file
        ensure_ascii: If False, allow non-ASCII characters in output

    Returns:
        Path: Path to the exported file

    Raises:
        IOError: If there's an error writing the file
        TypeError: If an object type cannot be serialized
    """
    try:
        # Ensure filename has .json extension
        if not str(filename).endswith(".json"):
            filename = str(filename) + ".json"

        # Create output directory if it doesn't exist
        self.project_folder.mkdir(parents=True, exist_ok=True)

        output_path = self.project_folder / filename

        with open(output_path, "w", encoding="utf-8") as f:
            json.dump(data, f, cls=JSONEncoder, ensure_ascii=ensure_ascii, indent=4)

        logger.info(f"Successfully exported {filename} to {self.project_folder}")
        return output_path

    except IOError as e:
        logger.error(f"Failed to write file {filename}: {str(e)}")
        raise

    except TypeError as e:
        logger.error(f"Serialization error for {filename}: {str(e)}")
        raise