squeeze

A static site generator that can put the toothpaste back in the tube.
git clone https://git.mulligrubs.me/squeeze
Log | Files | Refs | README | LICENSE

commit 654fec5bf26699a16a4bd9bb7d1eec27eee09bf5
parent c32e5274505effc303bd2e2381625f1c5ba0ec14
Author: St John Karp <stjohn@fuzzjunket.com>
Date:   Sun, 14 Jul 2019 16:04:01 -0500

Comment and document code

Wrote a readme file, and added comments throughout the code.

Diffstat:
Dentries.pl | 38--------------------------------------
Agenerate_rss.pl | 69+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mhelpers.pl | 13++++++++++++-
Mhtml.pl | 6++++++
Mmarkdown.pl | 7++++++-
Aparse_entry.pl | 58++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Areadme.md | 55+++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mrss.pl | 55++++++-------------------------------------------------
Mtastic.sh | 28++++++++++++++--------------
9 files changed, 226 insertions(+), 103 deletions(-)

diff --git a/entries.pl b/entries.pl @@ -1,37 +0,0 @@ -:- include('helpers.pl'). -:- include('html.pl'). -:- include('markdown.pl'). - -parse_entry:- - read_file(user_input, HTML), - parse_html(HTML). - -parse_entry(Filename):- - open(Filename, read, Stream), - read_file(Stream, HTML), - close(Stream), - parse_html(HTML). - -parse_html(HTML):- - page(EntryCodes, Title, Subtitle, Date, HTML, []), - markdown(EntryCodes, Title, Subtitle, Date, MarkdownCodes, []), - atom_codes(Markdown, MarkdownCodes), - write(Markdown), - halt. - -generate_entry:- - read_file(user_input, Entry), - generate_html(Entry). - -generate_entry(Filename):- - open(Filename, read, Stream), - read_file(Stream, Entry), - close(Stream), - generate_html(Entry). - -generate_html(Markdown):- - markdown(EntryCodes, Title, Subtitle, Date, Markdown, []), - page(EntryCodes, Title, Subtitle, Date, HTMLCodes, []), - atom_codes(HTML, HTMLCodes), - write(HTML), - halt.- \ No newline at end of file diff --git a/generate_rss.pl b/generate_rss.pl @@ -0,0 +1,68 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% File: generate_rss.pl +% Description: Predicates to generate an RSS file. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +:- include('helpers.pl'). +:- include('markdown.pl'). +:- include('rss.pl'). + +% generate_rss(+BuildDate, +Filenames). +% BuildDate is a list of character codes representing today's date (e.g. "2109-07-14"). +% Filenames is a list of atoms containing paths to all Markdown files with a date. +% These files will be read, sorted by date, and used to generate an RSS of the most +% recent posts. +generate_rss(BuildDate, Filenames):- + % Read in all the files so we have their dates and contents. + files_to_articles(Filenames, Articles), + % Sort articles by date. + sort(Articles, SortedArticles), + % Grab the most recent 5. + take_last(5, SortedArticles, TakenArticles), + % Convert to RSS and write to stdout. + rss(BuildDate, TakenArticles, RSSCodes, []), + atom_codes(RSS, RSSCodes), + write(RSS), + halt. + + +% files_to_articles(+Filenames, -Articles). +% Read in each file as an article predicate. +files_to_articles([], []). + +files_to_articles([Filename|Filenames], [article(Date, Title, Link, Description)|Articles]):- + open(Filename, read, Stream), + read_file(Stream, Markdown), + close(Stream), + % Grab the link. + get_link(Filename, Link), + % Extract the title, entry, etc. from the Markdown. + markdown(Entry, Title, _, Date, Markdown, []), + % XML escape the description. + replace("&", "&amp;", Entry, EntryAmp), + replace("<", "&lt;", EntryAmp, EntryLT), + replace(">", "&gt;", EntryLT, Description), + files_to_articles(Filenames, Articles). + + +% get_link(?Filename, ?Link). +% Calculate a file's URL, given its current path. +get_link(Filename, Link):- + atom_codes(Filename, FilenameCodes), + % Just assert that this is an index file before we go further. + % Backtracking after this point will take us down a rabbit hole. + append(_, "index.md", FilenameCodes), + site_url(URL, []), + append(_, "/source", StartPath), + append(StartPath, Path, FilenameCodes), + append(PathWithoutFile, "index.md", Path), + append(URL, PathWithoutFile, Link). + +get_link(Filename, Link):- + atom_codes(Filename, FilenameCodes), + site_url(URL, []), + append(_, "/source", StartPath), + append(StartPath, Path, FilenameCodes), + append(PathWithoutExtension, ".md", Path), + append(PathWithoutExtension, "/", PathWithSlash), + append(URL, PathWithSlash, Link).+ \ No newline at end of file diff --git a/helpers.pl b/helpers.pl @@ -1,5 +1,10 @@ -% Helpers +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% File: helpers.pl +% Description: Misc. utility predicates. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% read_file(+Stream, -Codes). +% Read a file to a list of character codes. read_file(Stream, []):- at_end_of_stream(Stream). @@ -9,6 +14,8 @@ read_file(Stream, [Code|Rest]):- read_file(Stream, Rest). +% take_last(+Max, +List, -Results). +% Return the last Max elements of List. take_last(_, [], []). take_last(Max, [First|Rest], Result):- @@ -20,6 +27,10 @@ take_append(Max, _, ResultSoFar, ResultSoFar):- take_append(_, Item, ResultSoFar, [Item|ResultSoFar]). + +% replace(+FindCodes, +ReplaceCodes, +Haystack, -Result). +% Find instances of FindCodes in Haystack and replace with ReplaceCodes. +% All four arguments are lists of character codes. replace(_, _, [], []). replace(FindCodes, ReplaceCodes, Haystack, Result):- diff --git a/html.pl b/html.pl @@ -1,3 +1,9 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% File: html.pl +% Description: DCG definition of an HTML file. +% This is basically your static website's template. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + page(Entry, Title, Subtitle, Date) --> doctype, whitespace, diff --git a/markdown.pl b/markdown.pl @@ -1,4 +1,9 @@ -% Markdown definition +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% File: markdown.pl +% Description: DCG definition of a Markdown file. +% Markdown files may have no metadata at the start, +% or they may have a Title, Subtitle, and Date (all optional, but in that order). +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% markdown(Entry, Title, Subtitle, Date) --> metadata("Title", Title), diff --git a/parse_entry.pl b/parse_entry.pl @@ -0,0 +1,57 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% File: parse_entry.pl +% Description: Predicates to generate and parse a static site's Markdown/HTML. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +:- include('helpers.pl'). +:- include('html.pl'). +:- include('markdown.pl'). + +% parse_entry. +% Read in an HTML file from stdin. +parse_entry:- + read_file(user_input, HTML), + parse_html(HTML). + +% parse_entry(+Filename). +% Read in an HTML file from Filename. +parse_entry(Filename):- + open(Filename, read, Stream), + read_file(Stream, HTML), + close(Stream), + parse_html(HTML). + + +% parse_html(+HTML). +% Parse HTML into a Markdown file and write to stdout. +parse_html(HTML):- + page(EntryCodes, Title, Subtitle, Date, HTML, []), + markdown(EntryCodes, Title, Subtitle, Date, MarkdownCodes, []), + atom_codes(Markdown, MarkdownCodes), + write(Markdown), + halt. + + +% generate_entry. +% Read in a Markdown file from stdin. +generate_entry:- + read_file(user_input, Entry), + generate_html(Entry). + +% generate_entry(Filename). +% Read in a Markdown file from Filename. +generate_entry(Filename):- + open(Filename, read, Stream), + read_file(Stream, Entry), + close(Stream), + generate_html(Entry). + + +% generate_html(Markdown). +% Parse Markdown into an HTML file and write to stdout. +generate_html(Markdown):- + markdown(EntryCodes, Title, Subtitle, Date, Markdown, []), + page(EntryCodes, Title, Subtitle, Date, HTMLCodes, []), + atom_codes(HTML, HTMLCodes), + write(HTML), + halt.+ \ No newline at end of file diff --git a/readme.md b/readme.md @@ -0,0 +1,54 @@ +# Tastic + +A static site generator in Prolog (mostly). + +## What is this? + +A few months ago I lost the source files I used to generate my static website. Fortunately there was no irreparable data loss because I still had the generated site up on my server. The problem was now I needed to write a script that would extract all the articles into source files again, and then reconfigure the site generator. Then I went, "Oh. This is a Prolog problem." I figured if I could write a Prolog program that described my HTML template then I could use the same code both to un-generate and re-generate the website, because a Prolog program is basically a set of rules and the logic can be run in either direction. (But then I love Prolog so every problem is a Prolog problem but I don't care. Fight me.) + +So the skinny is I wound up writing my own static website generator in Prolog. Well, the main components are in Prolog. I also wrote a bash script to make use of a bunch of common \*nix utilities (find, sed, grep, etc.) and to pipe output to some third-party programs where I needed them (HTML Tidy and it's still TBD, but possibly Pandoc in the future). Weirdest bit was that I just couldn't find anything decent to generate RSS feeds. I considered dropping the RSS all together, but I've spent enough time haranguing people for not supporting interoperable standards that I didn't want to be a hypocrite. I wound up writing my own RSS generator too, also in Prolog. + +It's all reeeeeally alpha and is pretty closely tailored to my specific needs, but it works, and IMHO it works better than my old site generator which injected a bunch of nonsense into my HTML. + +## Dependencies + +* Bash. Used to run the script that automates everything else. +* A Prolog interpreter. Tested with [SWI-Prolog](https://www.swi-prolog.org/), but the syntax should be vanilla ISO Prolog and should work with any implementation. +* [HTML Tidy](http://www.html-tidy.org/). Used to format the HTML output nicely — not something I relished doing in Prolog. +* [Smartypants](https://github.com/leohemsted/smartypants.py) located at ~/.local/bin/smartypants. Used to smarten the punctuation in the HTML output. + +## Assumptions + +The website folder used in the second argument is expected to contain three things: + +* a "source" folder containing the website's source; +* an "output" folder containing the website's static output; +* a "site.pl" file containing site-specific definitions. + +One or the other of the "source" and "output" folders must be populated, but not necessarily both. + +site.pl contains DCG definitions of this site's specifics, such as title, author, etc. An example site.pl file might look like this: + + site_title --> "My website name". + + site_subtitle --> "My website description/subtitle". + + site_url --> "https://www.example.com". + + email --> "webmaster@example.com". + + name --> "Harold Gruntfuttock". + +## Use + +Generate a static website from Markdown sources: + + ./tastic.sh generate /home/user/website + +Generate source files from a static website: + + ./tastic.sh ungenerate /home/user/website + +## Still to do + +The source Markdown files are currently assumed to be plain HTML with a Markdown header containing metadata. I'm going to need something to convert proper Markdown to HTML, so I'll probably add Pandoc as a dependency to tastic.sh. I expect this will also replace Smartypants for doing smart punctuation.+ \ No newline at end of file diff --git a/rss.pl b/rss.pl @@ -1,50 +1,7 @@ -:- include('helpers.pl'). -:- include('markdown.pl'). - -generate_rss(BuildDate, Filenames):- - files_to_articles(Filenames, Articles), - sort(Articles, SortedArticles), - take_last(5, SortedArticles, TakenArticles), - rss(BuildDate, TakenArticles, RSSCodes, []), - atom_codes(RSS, RSSCodes), - write(RSS), - halt. - -files_to_articles([], []). - -files_to_articles([Filename|Filenames], [article(Date, Title, Link, Description)|Articles]):- - open(Filename, read, Stream), - read_file(Stream, Markdown), - close(Stream), - % Grab the link. - get_link(Filename, Link), - % Extract the title, entry, etc. from the Markdown. - markdown(Entry, Title, _, Date, Markdown, []), - % XML escape the description. - replace("&", "&amp;", Entry, EntryAmp), - replace("<", "&lt;", EntryAmp, EntryLT), - replace(">", "&gt;", EntryLT, Description), - files_to_articles(Filenames, Articles). - -get_link(Filename, Link):- - atom_codes(Filename, FilenameCodes), - % Just assert that this is an index file before we go further. - % Backtracking after this point will take us down a rabbit hole. - append(_, "index.md", FilenameCodes), - site_url(URL, []), - append(_, "/source", StartPath), - append(StartPath, Path, FilenameCodes), - append(PathWithoutFile, "index.md", Path), - append(URL, PathWithoutFile, Link). - -get_link(Filename, Link):- - atom_codes(Filename, FilenameCodes), - site_url(URL, []), - append(_, "/source", StartPath), - append(StartPath, Path, FilenameCodes), - append(PathWithoutExtension, ".md", Path), - append(PathWithoutExtension, "/", PathWithSlash), - append(URL, PathWithSlash, Link). +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% File: rss.pl +% Description: DCG definition of an RSS file. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% rss(BuildDate, Articles) --> rss_open, @@ -115,7 +72,7 @@ webmaster --> last_build_date(BuildDate) --> "<lastBuildDate>", - BuildDate, + anything(BuildDate), "</lastBuildDate>". items([]) --> []. @@ -146,7 +103,7 @@ author --> pubdate(Date) --> "<pubDate>", - Date, + anything(Date), "</pubDate>". item_close --> "</item>". diff --git a/tastic.sh b/tastic.sh @@ -8,17 +8,17 @@ SITE_PATH=$2 if [ "$1" == "ungenerate" ] then # Create the directory structure. - rm -rf $SITE_PATH/$SOURCE_DIR/* - find $SITE_PATH/$OUTPUT_DIR -type d | + rm -rf "$SITE_PATH"/"$SOURCE_DIR"/* + find "$SITE_PATH"/"$OUTPUT_DIR" -type d | sed "s|^$SITE_PATH/$OUTPUT_DIR|$SITE_PATH/$SOURCE_DIR|" | xargs -0 -d '\n' mkdir -p -- # Parse and create all the markdown files. - find $SITE_PATH/$OUTPUT_DIR -type f -name "*.html" -print0 | + find "$SITE_PATH"/"$OUTPUT_DIR" -type f -name "*.html" -print0 | while IFS= read -r -d '' file; do NEW_PATH=`echo "$file" | sed "s|^$SITE_PATH/$OUTPUT_DIR|$SITE_PATH/$SOURCE_DIR|" | sed 's|.html$|.md|'` cat "$file" | - swipl --traditional -q -l entries.pl -g "consult('$SITE_PATH/site.pl'), parse_entry." | + swipl --traditional -q -l parse_entry.pl -g "consult('$SITE_PATH/site.pl'), parse_entry." | # Unsmarten the punctuation. sed "s|&nbsp;| |g" | sed "s|&#8216;|'|g" | @@ -29,7 +29,7 @@ then done # Copy anything else directly. - find $SITE_PATH/$OUTPUT_DIR -type f -not -name "*.html" -print0 | + find "$SITE_PATH"/"$OUTPUT_DIR" -type f -not -name "*.html" -print0 | while IFS= read -r -d '' file; do NEW_PATH=`echo "$file" | sed "s|^$SITE_PATH/$OUTPUT_DIR|$SITE_PATH/$SOURCE_DIR|"` cp "$file" "$NEW_PATH" @@ -37,37 +37,37 @@ then elif [ "$1" == "generate" ] then # Create the directory structure. - rm -rf $SITE_PATH/$OUTPUT_DIR/* - find $SITE_PATH/$SOURCE_DIR -type d | + rm -rf "$SITE_PATH"/"$OUTPUT_DIR"/* + find "$SITE_PATH"/"$SOURCE_DIR" -type d | sed "s|^$SITE_PATH/$SOURCE_DIR|$SITE_PATH/$OUTPUT_DIR|" | xargs -0 -d '\n' mkdir -p -- # Parse and create all the HTML files. - find $SITE_PATH/$SOURCE_DIR -type f -name "*.md" -print0 | + find "$SITE_PATH"/"$SOURCE_DIR" -type f -name "*.md" -print0 | while IFS= read -r -d '' file; do echo $file NEW_PATH=`echo "$file" | sed "s|^$SITE_PATH/$SOURCE_DIR|$SITE_PATH/$OUTPUT_DIR|" | sed 's|.md$|.html|'` cat "$file" | - swipl --traditional -q -l entries.pl -g "consult('$SITE_PATH/site.pl'), generate_entry." | + swipl --traditional -q -l parse_entry.pl -g "consult('$SITE_PATH/site.pl'), generate_entry." | tidy -quiet --indent auto --indent-with-tabs yes --wrap 0 -asxml --tidy-mark no | ~/.local/bin/smartypants \ > "$NEW_PATH" done # Copy anything else directly. - find $SITE_PATH/$SOURCE_DIR -type f -not -name "*.md" -print0 | + find "$SITE_PATH"/"$SOURCE_DIR" -type f -not -name "*.md" -print0 | while IFS= read -r -d '' file; do NEW_PATH=`echo "$file" | sed "s|^$SITE_PATH/$SOURCE_DIR|$SITE_PATH/$OUTPUT_DIR|"` cp "$file" "$NEW_PATH" done # Generate the RSS feed. - mkdir -p $SITE_PATH/$OUTPUT_DIR/feeds - ARTICLES=`grep -Rl --include=\*.md "^Date: " $SITE_PATH/$SOURCE_DIR | paste -sd ',' - | sed "s|,|','|g"` + mkdir -p "$SITE_PATH"/"$OUTPUT_DIR"/feeds + ARTICLES=`grep -Rl --include=\*.md "^Date: " "$SITE_PATH"/"$SOURCE_DIR" | paste -sd ',' - | sed "s|,|','|g"` BUILD_DATE=`date +"%Y-%m-%d %T"` - swipl --traditional -q -l rss.pl -g "consult('$SITE_PATH/site.pl'), generate_rss(\"$BUILD_DATE\", ['$ARTICLES'])." | + swipl --traditional -q -l generate_rss.pl -g "consult('$SITE_PATH/site.pl'), generate_rss(\"$BUILD_DATE\", ['$ARTICLES'])." | tidy -quiet --indent auto --indent-with-tabs yes --wrap 0 -xml --tidy-mark no \ - > $SITE_PATH/$OUTPUT_DIR/feeds/rss.xml + > "$SITE_PATH"/"$OUTPUT_DIR"/feeds/rss.xml else echo "Invalid argument." exit 1