initial commit; wip markdown parser

This commit is contained in:
2024-11-11 11:36:37 -05:00
parent 16efc2525b
commit 4173229b20
6 changed files with 11616 additions and 0 deletions

44
build.zig Normal file
View File

@@ -0,0 +1,44 @@
const std = @import("std");
pub fn build(b: *std.Build) void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
const exe = b.addExecutable(.{
.name = "ziglue",
.root_source_file = b.path("src/main.zig"),
.target = target,
.optimize = optimize,
});
b.installArtifact(exe);
const run_cmd = b.addRunArtifact(exe);
run_cmd.step.dependOn(b.getInstallStep());
if (b.args) |args| {
run_cmd.addArgs(args);
}
const run_step = b.step("run", "Run the app");
run_step.dependOn(&run_cmd.step);
const exe_unit_tests = b.addTest(.{
.root_source_file = b.path("src/main.zig"),
.target = target,
.optimize = optimize,
});
const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests);
const example_tests = b.addTest(.{
.root_source_file = b.path("src/examples.zig"),
.target = target,
.optimize = optimize,
});
const test_step = b.step("test", "Run unit tests");
test_step.dependOn(&run_exe_unit_tests.step);
test_step.dependOn(&example_tests.step);
}

10
build.zig.zon Normal file
View File

@@ -0,0 +1,10 @@
.{
.name = "ziglue",
.version = "0.0.0",
.paths = .{
"build.zig",
"build.zig.zon",
"src",
},
}

52
examples.py Normal file
View File

@@ -0,0 +1,52 @@
import parsel
import httpx
from pathlib import Path
from textwrap import indent
VERSION = '0.31.2'
CACHE = Path('.spec', VERSION)
URL = f'https://spec.commonmark.org/{VERSION}/'
if CACHE.exists():
sel = parsel.Selector(CACHE.read_text())
else:
response = httpx.get('https://spec.commonmark.org/0.31.2/')
response.raise_for_status()
CACHE.parent.mkdir(parents=True, exist_ok=True)
CACHE.write_text(response.text)
sel = parsel.Selector(response.text)
print(
'''const std = @import("std");
const convert = @import("main.zig").convert;
'''
)
for example in sel.css('.example'):
name = example.css('.examplenum > a::text').get()
md = example.css('.language-markdown::text').get()
html = example.css('.language-html::text').get()
assert name is not None
print(f'''
test "{name}" {{
const alloc = std.testing.allocator;
const md = (
{indent(md, r' \\', lambda _: True) if md else '""'}
);
const html = (
{indent(html, r' \\', lambda _: True) if html else '""'}
);
const output = try convert(alloc, md);
defer alloc.free(output);
try std.testing.expectEqualStrings(html, output);
}}
''')
# mdtext = textwrap.indent("md", '\\')

35
notes.md Normal file
View File

@@ -0,0 +1,35 @@
The spec is organized into "content blocks", "leaf blocks", and inline content. I should take this as a hint to do the same.
So the first task in the parser should be to parse the block structure.
- Blocks
- Leaf
- Thematic break
- ATX heading
- Setext heading
- Indented chunk
+ Indented code block is a sequence of indented chunks.
+ Preserve count of blank lines.
- Fenced code block
- HTML blocks
- Link reference definition
- Paragraph
- Blank lines
+ These are part of the document, but they are not rendered.
- Container
- Blockquote
- List Item
+ List is a sequence of list items of the same type.
- Inline
- Inline code
- Strong, emph
- Links
- Inline
- Reference
- Images
- Auto
- HTML
- Text
<!--vim: ts=2 sw=2 et linebreak :-->

11414
src/examples.zig Normal file

File diff suppressed because it is too large Load Diff

61
src/main.zig Normal file
View File

@@ -0,0 +1,61 @@
const std = @import("std");
pub fn find_blocks(src: []const u8, blocks: *std.ArrayList([]const u8)) !void {
var idx: usize = 0;
var blk: usize = 0;
while (idx < src.len) {
const end = std.mem.indexOfAnyPos(u8, src, idx, "\n") orelse src.len;
const line = src[idx..end];
if (std.mem.indexOfNone(u8, line, " \t\r\n") == null) {
// the line is blank; a block has ended.
const block = src[blk..end];
std.debug.print("BLOCK:\n{s}", .{block});
try blocks.append(block);
blk = end + 1;
}
idx = end + 1;
}
}
pub fn convert(alloc: std.mem.Allocator, md: []const u8) ![]const u8 {
_ = alloc;
_ = md;
return error.UhOh;
// return error.NotImplemented;
}
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const alloc = gpa.allocator();
defer _ = gpa.deinit();
var args = try std.process.argsWithAllocator(alloc);
defer args.deinit();
std.debug.assert(args.skip());
var blocks = std.ArrayList([]const u8).init(alloc);
defer blocks.deinit();
while (args.next()) |fname| {
const src = load: {
const file = try std.fs.cwd().openFile(fname, .{ .mode = .read_only });
defer file.close();
break :load try file.readToEndAlloc(alloc, std.math.maxInt(u32));
};
defer alloc.free(src);
std.log.debug("arg '{s}' {d} bytes", .{ fname, src.len });
blocks.clearRetainingCapacity();
try find_blocks(src, &blocks);
// try parse(alloc, src);
// const tokens = try parse(alloc, src);
// defer alloc.free(tokens);
}
}