Skip to content

Base

ExcelParseBase

Source code in Docs2KG/parser/excel/base.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
class ExcelParseBase:
    def __init__(self, excel_file: Path, output_dir: Path = None):
        """
        Initialize the ExcelParseBase class

        Args:
            excel_file (Path): Path to the excel file
            output_dir (Path): Path to the output directory where the converted files will be saved

        """
        self.excel_file = excel_file

        self.output_dir = output_dir
        if self.output_dir is None:
            excel_output_folder = DATA_OUTPUT_DIR / self.excel_file.name
            excel_output_folder.mkdir(parents=True, exist_ok=True)
            self.output_dir = excel_output_folder

        # export excel metadata
        self.metadata = self.output_dir / "metadata.json"
        from openpyxl import load_workbook

        wb = load_workbook(self.excel_file)
        properties = wb.properties
        metadata_dict = {
            "filename": self.excel_file.name,
            "title": properties.title,
            "subject": properties.subject,
            "creator": properties.creator,
            "keywords": properties.keywords,
            "description": properties.description,
            "lastModifiedBy": properties.lastModifiedBy,
            "revision": properties.revision,
            "created": properties.created.isoformat() if properties.created else None,
            "modified": (
                properties.modified.isoformat() if properties.modified else None
            ),
            "lastPrinted": (
                properties.lastPrinted.isoformat() if properties.lastPrinted else None
            ),
            "category": properties.category,
            "contentStatus": properties.contentStatus,
            "identifier": properties.identifier,
            "language": properties.language,
            "version": properties.version,
        }

        metadata_json = json.dumps(metadata_dict, indent=4)

        with open(self.metadata, "w") as f:
            f.write(metadata_json)

__init__(excel_file, output_dir=None)

Initialize the ExcelParseBase class

Parameters:

Name Type Description Default
excel_file Path

Path to the excel file

required
output_dir Path

Path to the output directory where the converted files will be saved

None
Source code in Docs2KG/parser/excel/base.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def __init__(self, excel_file: Path, output_dir: Path = None):
    """
    Initialize the ExcelParseBase class

    Args:
        excel_file (Path): Path to the excel file
        output_dir (Path): Path to the output directory where the converted files will be saved

    """
    self.excel_file = excel_file

    self.output_dir = output_dir
    if self.output_dir is None:
        excel_output_folder = DATA_OUTPUT_DIR / self.excel_file.name
        excel_output_folder.mkdir(parents=True, exist_ok=True)
        self.output_dir = excel_output_folder

    # export excel metadata
    self.metadata = self.output_dir / "metadata.json"
    from openpyxl import load_workbook

    wb = load_workbook(self.excel_file)
    properties = wb.properties
    metadata_dict = {
        "filename": self.excel_file.name,
        "title": properties.title,
        "subject": properties.subject,
        "creator": properties.creator,
        "keywords": properties.keywords,
        "description": properties.description,
        "lastModifiedBy": properties.lastModifiedBy,
        "revision": properties.revision,
        "created": properties.created.isoformat() if properties.created else None,
        "modified": (
            properties.modified.isoformat() if properties.modified else None
        ),
        "lastPrinted": (
            properties.lastPrinted.isoformat() if properties.lastPrinted else None
        ),
        "category": properties.category,
        "contentStatus": properties.contentStatus,
        "identifier": properties.identifier,
        "language": properties.language,
        "version": properties.version,
    }

    metadata_json = json.dumps(metadata_dict, indent=4)

    with open(self.metadata, "w") as f:
        f.write(metadata_json)