From 336a7931494b149298ec0d215bf643c4ca07a712 Mon Sep 17 00:00:00 2001 From: pommicket Date: Wed, 1 Sep 2021 18:27:51 -0400 Subject: markdown to HTML converter --- .gitignore | 1 + 00/Makefile | 4 +- 00/README.md | 6 +- 01/Makefile | 4 +- 01/README.md | 2 +- Makefile | 7 ++ README.md | 15 +++-- bootstrap.sh | 2 + markdown.c | 207 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 233 insertions(+), 15 deletions(-) create mode 100644 Makefile create mode 100644 markdown.c diff --git a/.gitignore b/.gitignore index aaefded..d1648a7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ README.html out?? +markdown diff --git a/00/Makefile b/00/Makefile index fa229a2..a328fee 100644 --- a/00/Makefile +++ b/00/Makefile @@ -1,5 +1,5 @@ all: README.html out00 -%.html: %.md - markdown $< > $@ out00: in00 ./hexcompile +%.html: %.md ../markdown + ../markdown $< diff --git a/00/README.md b/00/README.md index 25dc1ae..a8d4c38 100644 --- a/00/README.md +++ b/00/README.md @@ -1,7 +1,7 @@ # stage 00 This directory contains the file `hexcompile`, a handwritten executable. It -takes input file `in00` containing space/newline/[any character]-separated +takes input file `in00` containing space/newline/(any character)-separated hexadecimal digit pairs (e.g. `3f`) and outputs them as bytes to the file `out00`. On 64-bit Linux, try running `./hexcompile` from this directory (I've already provided an `in00` file, which you can take a look at), and you will get @@ -369,7 +369,7 @@ That's quite a lot to take in for such a simple program, but here we are! We now have something that will let us write individual bytes with an ordinary text editor and get them translated into a binary file. -## Limitations +## limitations There are many ways in which this is a bad program. It will *only* properly handle lowercase hexadecimal digit pairs, separated by exactly one character, @@ -381,7 +381,7 @@ Also, we only read in data *three bytes at a time*, and output one byte at a time. This is a very bad idea because syscalls (e.g. `read`) are slow. `read` might take ~3 microseconds, which doesn't sound like a lot, but it means that if we used code like this to process a 50 megabyte file, say, we'd be waiting for -a long time. +a while. But these problems aren't really a big deal. We'll only be running this on little programs and we'll be sure to check that our input is in the right diff --git a/01/Makefile b/01/Makefile index aa48d00..5dde439 100644 --- a/01/Makefile +++ b/01/Makefile @@ -3,5 +3,5 @@ out01: in01 out00 ./out00 out00: in00 ../00/hexcompile -%.html: %.md - markdown $< > $@ +%.html: %.md ../markdown + ../markdown $< diff --git a/01/README.md b/01/README.md index 96de656..859996f 100644 --- a/01/README.md +++ b/01/README.md @@ -333,7 +333,7 @@ header. But by a lucky coincidence, all those entries actually land on 0 bytes, so they'll just be treated as unrecognized (as they should be). So it's all good. -## Limitations +## limitations Like our last program, this one will be slow for large files. Again, that isn't much of a problem for us. Also, if you forget a `;` at the end of a file, it'll diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1ed93a0 --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +all: markdown README.html + $(MAKE) -C 00 + $(MAKE) -C 01 +markdown: markdown.c + $(CC) -O2 -o markdown -Wall -Wconversion -Wshadow -std=c89 markdown.c +README.html: markdown README.md + ./markdown README.md diff --git a/README.md b/README.md index 8eaea4b..ca14543 100644 --- a/README.md +++ b/README.md @@ -11,14 +11,15 @@ executable, and the last one will be a C compiler. Each directory has its own README explaining what's going on. You can run `bootstrap.sh` to run through and test every stage. +To get HTML versions of all README pages, run `make`. ## the basics In this series, I want to explain *everything* that's going on. I'm going to -need to assume some passing knowledge about computers, but here's a quick -overview of what you'll want to know before starting. I can't explain everything -so you may need to do your own research. You don't need to understand each of -these in full, just get a general idea at least: +need to assume some passing knowledge, so here's a quick overview of what you'll +want to know before starting. I can't explain everything so you may need to do +your own research. You don't need to understand each of these in full, just get +a general idea at least: - what an operating system is - what memory is @@ -59,8 +60,8 @@ not right away. Bootstrapping a compiler is not an easy task, so we're trying to make it as easy as possible. We don't even necessarily need a standard-compliant C compiler, we -only need enough to compile someone else's C compiler, specifically TCC -(https://bellard.org/tcc/) since that's a compiler with very few dependencies. +only need enough to compile someone else's C compiler, specifically we'll be +using [TCC](https://bellard.org/tcc/) since it's written in standard C89. - efficiency is not a concern @@ -71,7 +72,7 @@ with itself, we'll get the same executable either way. ## reflections on trusting trust In 1984, Ken Thompson wrote the well-known article -[*Reflections on Trusting Trust*](http://users.ece.cmu.edu/~ganger/712.fall02/papers/p761-thompson.pdf). +[Reflections on Trusting Trust](http://users.ece.cmu.edu/~ganger/712.fall02/papers/p761-thompson.pdf). This is one of the things that inspired me to start this project. To summarize the article: it is possible to create a malicious C compiler which will replicate its own malicious functionalities (e.g. detecting password-checking diff --git a/bootstrap.sh b/bootstrap.sh index 7a374d0..7025b8c 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -26,6 +26,7 @@ else exit 1 fi +echo 'Processing stage 00...' cd 00 rm -f out00 make -s out00 @@ -36,6 +37,7 @@ fi rm -f out00 cd .. +echo 'Processing stage 01...' cd 01 rm -f out0[01] make -s out01 diff --git a/markdown.c b/markdown.c new file mode 100644 index 0000000..e7fed5d --- /dev/null +++ b/markdown.c @@ -0,0 +1,207 @@ +/* +a little program to convert markdown to html, for READMEs +I was using markdown.pl but that has some annoying problems +This doesn't support all of markdown; I'll add more as I need it. +*/ + +#include +#include +#include + +/* output text with *s for italics and stuff */ +static void output_md_text(FILE *out, int *flags, int line_number, const char *text) { + enum { + FLAG_I = 0x01, /* italics */ + FLAG_B = 0x02, + FLAG_CODE = 0x04 + }; + const char *p; + + for (p = text; *p; ++p) { + if ((*flags & FLAG_CODE) && *p != '`') { + putc(*p, out); + continue; + } + switch (*p) { + case '*': + if (p[1] == '*') { + /* bold */ + if (*flags & FLAG_B) { + fprintf(out, ""); + *flags &= ~FLAG_B; + } else { + fprintf(out, ""); + *flags |= FLAG_B; + } + ++p; + } else { + /* italics */ + if (*flags & FLAG_I) { + fprintf(out, ""); + *flags &= ~FLAG_I; + } else { + fprintf(out, ""); + *flags |= FLAG_I; + } + } + break; + case '`': + /* code */ + if (*flags & FLAG_CODE) { + fprintf(out, ""); + *flags &= ~FLAG_CODE; + } else { + fprintf(out, ""); + *flags |= FLAG_CODE; + } + break; + case '[': { + /* link */ + const char *label, *url, *label_end, *url_end; + int n_label, n_url; + + label = p+1; + label_end = strchr(label, ']'); + if (!label_end) { + fprintf(stderr, "line %d: Unterminated link.\n", line_number); + exit(-1); + } + if (label_end[1] != '(') { + fprintf(stderr, "line %d: Bad link syntax.\n", line_number); + exit(-1); + } + url = label_end + 2; + url_end = strchr(url, ')'); + if (!url_end) { + fprintf(stderr, "line %d: Unterminated URL.\n", line_number); + exit(-1); + } + + n_label = (int)(label_end - label); + n_url = (int)(url_end - url); + fprintf(out, "%.*s", + n_url, url, n_label, label); + p = url_end; + } break; + case '-': + if (p[1] == '-') { + /* em dash */ + fprintf(out, "—"); + ++p; + } else { + goto default_case; + } + break; + default: + default_case: + putc(*p, out); + break; + } + } +} + +int main(int argc, char **argv) { + FILE *in, *out; + char line[1024] = {0}; + char title[256] = {0}; + int flags = 0, txtflags = 0; + int line_number = 0; + enum { + FLAG_UL = 1 + }; + + if (argc < 2) { + fprintf(stderr, "Please provide an input file.\n"); + return -1; + } + + { + const char *in_filename = argv[1]; + char out_filename[256] = {0}; + char *dot; + strncpy(out_filename, argv[1], 200); + dot = strrchr(out_filename, '.'); + if (!dot || strcmp(dot, ".md") != 0) { + fprintf(stderr, "Input filename does not end in .md\n"); + return -1; + } + *dot = '\0'; + strcpy(title, out_filename); + strcpy(dot, ".html"); + + + in = fopen(in_filename, "rb"); + out = fopen(out_filename, "wb"); + } + + if (!in) { + perror("Couldn't open input file"); + return -1; + } + if (!out) { + perror("Couldn't open output file"); + return -1; + } + + fprintf(out, + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "%s\n" + "\n" + "\n" + "

\n", title + ); + while (fgets(line, sizeof line, in)) { + ++line_number; + line[strcspn(line, "\r\n")] = '\0'; + + if (line[0] == '#') { + /* heading */ + int n = 1; + while (line[n] == '#') ++n; + fprintf(out, "

", n); + output_md_text(out, &txtflags, line_number, line + n); + fprintf(out, "

\n", n); + } else if (line[0] == '\0') { + if (flags & FLAG_UL) { + fprintf(out, "\n"); + flags &= ~FLAG_UL; + } + fprintf(out, "

\n

\n"); + } else if (strncmp(line, "- ", 2) == 0) { + /* bullet */ + if (flags & FLAG_UL) { + fprintf(out, "

  • "); + } else { + fprintf(out, "
    • "); + flags |= FLAG_UL; + } + output_md_text(out, &txtflags, line_number, line + 2); + fprintf(out, "\n"); + } else if (strncmp(line, "```", 3) == 0) { + fprintf(out, "
      \n");
      +			
      +			while (fgets(line, sizeof line, in)) {
      +				++line_number;
      +				if (strncmp(line, "```", 3) == 0)
      +					break;
      +				fprintf(out, "%s", line);
      +			}
      +
      +			fprintf(out, "
      \n"); + } else { + output_md_text(out, &txtflags, line_number, line); + fprintf(out, "\n"); + } + + + + } + fprintf(out, "

      \n\n\n"); +} -- cgit v1.2.3