From 323816262704dbee1fc1b2aa08204d55d4386b16 Mon Sep 17 00:00:00 2001 From: David Allemang Date: Wed, 13 Nov 2024 16:49:01 -0500 Subject: [PATCH] wip python prototype to feel out the parser --- notes.md | 2 +- pyparse.py | 112 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 2 + 3 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 pyparse.py create mode 100644 requirements.txt diff --git a/notes.md b/notes.md index 20bc895..a4de5b6 100644 --- a/notes.md +++ b/notes.md @@ -34,4 +34,4 @@ So the first task in the parser should be to parse the block structure. - HTML - Text - + \ No newline at end of file diff --git a/pyparse.py b/pyparse.py new file mode 100644 index 0000000..27db082 --- /dev/null +++ b/pyparse.py @@ -0,0 +1,112 @@ +import sys +from enum import Enum, auto +from idlelib.configdialog import is_int +from pathlib import Path +from pprint import pprint +from typing import Optional + + +class Block: + def __init__(self, *parts): + self.tag = type(self).__name__ + self.data = list(parts) + + def extend(self, *parts): + self.data.extend(parts) + + def __repr__(self): + return f'{self.tag}:: {''.join(self.data)!r}' + + +class Break(Block): pass + + +class ATXHeading(Block): pass + + +class SetextHeading(Block): pass + + +class IndentedChunk(Block): pass + + +class Fence(Block): + def __init__(self, meta, *data): + super().__init__(*data) + self.meta = meta + self.complete = False + + def __repr__(self): + return f'{self.tag}:{self.meta}:: {''.join(self.data)!r}' + + +class HTML(Block): pass + + +class Definition(Block): pass + + +class Paragraph(Block): pass + + +class Blank(Block): pass + + +def convert(md: str): + blocks: list[Block] = [] + + cur_fence: Optional[Fence] = None + + def get(idx): + try: + return blocks[idx] + except IndexError: + return None + + for line in md.splitlines(keepends=True): + if cur_fence: + if line.lstrip(' ').startswith('```'): + blocks.append(cur_fence) + cur_fence = None + else: + cur_fence.extend(line) + else: + if line.isspace(): + if len(blocks) >= 1 and isinstance(blocks[-1], Blank): + blocks[-1].extend(line) + else: + blocks.append(Blank(line)) + + elif line.startswith(' ') or line.startswith('\t'): + if len(blocks) >= 1 and isinstance(blocks[-1], IndentedChunk): + blocks[-1].extend(line) + elif len(blocks) >= 2 and isinstance(blocks[-1], Blank) and isinstance(blocks[-2], IndentedChunk): + blocks[-2].extend(*blocks[-1].data, line) + blocks.pop(-1) + else: + blocks.append(IndentedChunk(line)) + + elif line.lstrip(' ').startswith('```'): + meta = line.strip().removeprefix('```') + cur_fence = Fence(meta) + else: + if len(blocks) >= 1 and isinstance(blocks[-1], Paragraph): + blocks[-1].extend(line) + else: + blocks.append(Paragraph(line)) + + pprint(blocks) + + +def main(): + for arg in sys.argv[1:]: + md = Path(arg).read_text() + html = convert(md) + + print('=' * 80) + print(html) + print('=' * 80) + + +if __name__ == '__main__': + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e7a2eec --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +httpx +parsel