wip python prototype to feel out the parser
This commit is contained in:
2
notes.md
2
notes.md
@@ -34,4 +34,4 @@ So the first task in the parser should be to parse the block structure.
|
||||
- HTML
|
||||
- Text
|
||||
|
||||
<!--vim: ts=2 sw=2 et linebreak :-->
|
||||
<!--vim: ts=2 sw=2 et linebreak :-->
|
112
pyparse.py
Normal file
112
pyparse.py
Normal file
@@ -0,0 +1,112 @@
|
||||
import sys
|
||||
from enum import Enum, auto
|
||||
from idlelib.configdialog import is_int
|
||||
from pathlib import Path
|
||||
from pprint import pprint
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class Block:
|
||||
def __init__(self, *parts):
|
||||
self.tag = type(self).__name__
|
||||
self.data = list(parts)
|
||||
|
||||
def extend(self, *parts):
|
||||
self.data.extend(parts)
|
||||
|
||||
def __repr__(self):
|
||||
return f'{self.tag}:: {''.join(self.data)!r}'
|
||||
|
||||
|
||||
class Break(Block): pass
|
||||
|
||||
|
||||
class ATXHeading(Block): pass
|
||||
|
||||
|
||||
class SetextHeading(Block): pass
|
||||
|
||||
|
||||
class IndentedChunk(Block): pass
|
||||
|
||||
|
||||
class Fence(Block):
|
||||
def __init__(self, meta, *data):
|
||||
super().__init__(*data)
|
||||
self.meta = meta
|
||||
self.complete = False
|
||||
|
||||
def __repr__(self):
|
||||
return f'{self.tag}:{self.meta}:: {''.join(self.data)!r}'
|
||||
|
||||
|
||||
class HTML(Block): pass
|
||||
|
||||
|
||||
class Definition(Block): pass
|
||||
|
||||
|
||||
class Paragraph(Block): pass
|
||||
|
||||
|
||||
class Blank(Block): pass
|
||||
|
||||
|
||||
def convert(md: str):
|
||||
blocks: list[Block] = []
|
||||
|
||||
cur_fence: Optional[Fence] = None
|
||||
|
||||
def get(idx):
|
||||
try:
|
||||
return blocks[idx]
|
||||
except IndexError:
|
||||
return None
|
||||
|
||||
for line in md.splitlines(keepends=True):
|
||||
if cur_fence:
|
||||
if line.lstrip(' ').startswith('```'):
|
||||
blocks.append(cur_fence)
|
||||
cur_fence = None
|
||||
else:
|
||||
cur_fence.extend(line)
|
||||
else:
|
||||
if line.isspace():
|
||||
if len(blocks) >= 1 and isinstance(blocks[-1], Blank):
|
||||
blocks[-1].extend(line)
|
||||
else:
|
||||
blocks.append(Blank(line))
|
||||
|
||||
elif line.startswith(' ') or line.startswith('\t'):
|
||||
if len(blocks) >= 1 and isinstance(blocks[-1], IndentedChunk):
|
||||
blocks[-1].extend(line)
|
||||
elif len(blocks) >= 2 and isinstance(blocks[-1], Blank) and isinstance(blocks[-2], IndentedChunk):
|
||||
blocks[-2].extend(*blocks[-1].data, line)
|
||||
blocks.pop(-1)
|
||||
else:
|
||||
blocks.append(IndentedChunk(line))
|
||||
|
||||
elif line.lstrip(' ').startswith('```'):
|
||||
meta = line.strip().removeprefix('```')
|
||||
cur_fence = Fence(meta)
|
||||
else:
|
||||
if len(blocks) >= 1 and isinstance(blocks[-1], Paragraph):
|
||||
blocks[-1].extend(line)
|
||||
else:
|
||||
blocks.append(Paragraph(line))
|
||||
|
||||
pprint(blocks)
|
||||
|
||||
|
||||
def main():
|
||||
for arg in sys.argv[1:]:
|
||||
md = Path(arg).read_text()
|
||||
html = convert(md)
|
||||
|
||||
print('=' * 80)
|
||||
print(html)
|
||||
print('=' * 80)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
2
requirements.txt
Normal file
2
requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
httpx
|
||||
parsel
|
Reference in New Issue
Block a user