Skip to content

Web2markdown

Web2Markdown

Bases: WebParserBase

Source code in Docs2KG/parser/web/web2markdown.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
class Web2Markdown(WebParserBase):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.markdown_output_dir = self.output_dir / "markdowns"
        self.markdown_output_dir.mkdir(parents=True, exist_ok=True)

    def convert2markdown(self):
        """
        Convert the HTML file to markdown and save it to the output directory

        """
        with open(f"{self.output_dir}/index.html", "r") as f:
            html_content = f.read()
        markdown_file = md(html_content)
        with open(f"{self.markdown_output_dir}/content.md", "w") as f:
            f.write(str(markdown_file))

convert2markdown()

Convert the HTML file to markdown and save it to the output directory

Source code in Docs2KG/parser/web/web2markdown.py
15
16
17
18
19
20
21
22
23
24
def convert2markdown(self):
    """
    Convert the HTML file to markdown and save it to the output directory

    """
    with open(f"{self.output_dir}/index.html", "r") as f:
        html_content = f.read()
    markdown_file = md(html_content)
    with open(f"{self.markdown_output_dir}/content.md", "w") as f:
        f.write(str(markdown_file))