From 4173229b204503b0f58cfa31cab3fa6be0497039 Mon Sep 17 00:00:00 2001 From: David Allemang Date: Mon, 11 Nov 2024 11:36:37 -0500 Subject: [PATCH] initial commit; wip markdown parser --- build.zig | 44 + build.zig.zon | 10 + examples.py | 52 + notes.md | 35 + src/examples.zig | 11414 +++++++++++++++++++++++++++++++++++++++++++++ src/main.zig | 61 + 6 files changed, 11616 insertions(+) create mode 100644 build.zig create mode 100644 build.zig.zon create mode 100644 examples.py create mode 100644 notes.md create mode 100644 src/examples.zig create mode 100644 src/main.zig diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..813d69f --- /dev/null +++ b/build.zig @@ -0,0 +1,44 @@ +const std = @import("std"); + +pub fn build(b: *std.Build) void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + + const exe = b.addExecutable(.{ + .name = "ziglue", + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + }); + + b.installArtifact(exe); + + const run_cmd = b.addRunArtifact(exe); + + run_cmd.step.dependOn(b.getInstallStep()); + + if (b.args) |args| { + run_cmd.addArgs(args); + } + + const run_step = b.step("run", "Run the app"); + run_step.dependOn(&run_cmd.step); + + const exe_unit_tests = b.addTest(.{ + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + }); + + const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests); + + const example_tests = b.addTest(.{ + .root_source_file = b.path("src/examples.zig"), + .target = target, + .optimize = optimize, + }); + + const test_step = b.step("test", "Run unit tests"); + test_step.dependOn(&run_exe_unit_tests.step); + test_step.dependOn(&example_tests.step); +} diff --git a/build.zig.zon b/build.zig.zon new file mode 100644 index 0000000..23fe8b7 --- /dev/null +++ b/build.zig.zon @@ -0,0 +1,10 @@ +.{ + .name = "ziglue", + .version = "0.0.0", + + .paths = .{ + "build.zig", + "build.zig.zon", + "src", + }, +} diff --git a/examples.py b/examples.py new file mode 100644 index 0000000..728439f --- /dev/null +++ b/examples.py @@ -0,0 +1,52 @@ +import parsel +import httpx +from pathlib import Path +from textwrap import indent + +VERSION = '0.31.2' + +CACHE = Path('.spec', VERSION) +URL = f'https://spec.commonmark.org/{VERSION}/' + +if CACHE.exists(): + sel = parsel.Selector(CACHE.read_text()) +else: + response = httpx.get('https://spec.commonmark.org/0.31.2/') + response.raise_for_status() + CACHE.parent.mkdir(parents=True, exist_ok=True) + CACHE.write_text(response.text) + sel = parsel.Selector(response.text) + +print( +'''const std = @import("std"); +const convert = @import("main.zig").convert; +''' +) + +for example in sel.css('.example'): + name = example.css('.examplenum > a::text').get() + md = example.css('.language-markdown::text').get() + html = example.css('.language-html::text').get() + + assert name is not None + + print(f''' +test "{name}" {{ + const alloc = std.testing.allocator; + + const md = ( +{indent(md, r' \\', lambda _: True) if md else '""'} +); + const html = ( +{indent(html, r' \\', lambda _: True) if html else '""'} +); + + const output = try convert(alloc, md); + defer alloc.free(output); + + try std.testing.expectEqualStrings(html, output); +}} +''') + + # mdtext = textwrap.indent("md", '\\') + diff --git a/notes.md b/notes.md new file mode 100644 index 0000000..89a6f8a --- /dev/null +++ b/notes.md @@ -0,0 +1,35 @@ +The spec is organized into "content blocks", "leaf blocks", and inline content. I should take this as a hint to do the same. + +So the first task in the parser should be to parse the block structure. + +- Blocks + - Leaf + - Thematic break + - ATX heading + - Setext heading + - Indented chunk + + Indented code block is a sequence of indented chunks. + + Preserve count of blank lines. + - Fenced code block + - HTML blocks + - Link reference definition + - Paragraph + - Blank lines + + These are part of the document, but they are not rendered. + - Container + - Blockquote + - List Item + + List is a sequence of list items of the same type. + +- Inline + - Inline code + - Strong, emph + - Links + - Inline + - Reference + - Images + - Auto + - HTML + - Text + + diff --git a/src/examples.zig b/src/examples.zig new file mode 100644 index 0000000..9fdfbdf --- /dev/null +++ b/src/examples.zig @@ -0,0 +1,11414 @@ +const std = @import("std"); +const convert = @import("main.zig").convert; + +comptime { + std.testing.refAllDecls(@This()); +} + +test "Example 1" { + return error.BBBBBBBBBBBBb; +} + +test "Example 2" { + const alloc = std.testing.allocator; + + const md = ( + \\→foo→baz→→bim + ); + const html = ( + \\
foo→baz→→bim
+        \\
+ ); + + const output = try convert(alloc, md); + defer alloc.free(output); + + try std.testing.expectEqualStrings(html, output); +} + +test "Example 3" { + const alloc = std.testing.allocator; + + const md = ( + \\a→a + ); + const html = ( + \\
a→a
+        \\ὐ→a
+        \\
+ ); + + const output = try convert(alloc, md); + defer alloc.free(output); + + try std.testing.expectEqualStrings(html, output); +} + +test "Example 4" { + const alloc = std.testing.allocator; + + const md = ( + \\- + ); + const html = ( + \\ + ); + + const output = try convert(alloc, md); + defer alloc.free(output); + + try std.testing.expectEqualStrings(html, output); +} + +test "Example 5" { + const alloc = std.testing.allocator; + + const md = ( + \\- + ); + const html = ( + \\