squeeze

A static site generator that can put the toothpaste back in the tube.
git clone https://git.stjo.hn/squeeze
Log | Files | Refs | README | LICENSE

squeeze.sh (4214B)


      1 #!/usr/bin/env sh
      2 
      3 # Generate a static website.
      4 
      5 # Usage: squeeze.sh [-f|--force] site_path
      6 
      7 force=0
      8 
      9 # Loop through all the arguments and set flags/options.
     10 while [ "$#" -gt 0 ] ; do
     11 	case "$1" in
     12 		-f|--force)
     13 			force=1
     14 			shift
     15 			;;
     16 		*)
     17 			site_path="$1"
     18 			shift
     19 			;;
     20 	esac
     21 done
     22 
     23 output_path="$site_path/output"
     24 source_path="$site_path/source"
     25 feed_path="$output_path/feeds/rss.xml"
     26 
     27 # A space-separated list of all the process IDs we've started.
     28 proc_ids=""
     29 # Max number of processes to run at once.
     30 # There is no way to do `nproc` with only POSIX tools,
     31 # so the best way to make this portable is with fallbacks.
     32 # `nproc` itself isn't even universal on Linux, so the safest
     33 # place to get the number of processors on Linux is /proc/cpuinfo.
     34 max_processes="$(
     35 	grep -c ^processor /proc/cpuinfo ||
     36 	sysctl -n hw.ncpu 2>/dev/null ||
     37 	getconf _NPROCESSORS_ONLN 2>/dev/null
     38 )"
     39 
     40 # Regenerate everything if the force flag has been used or there is
     41 # no RSS file, but otherwise only regenerate Markdown files that have
     42 # changed since the RSS feed was updated.
     43 rsync_exclude=
     44 find_test=
     45 [ "$force" -eq 0 ] &&
     46 	[ -f "$feed_path" ] &&
     47 	# Don't delete already generated HTML files.
     48 	rsync_exclude="--exclude *.html" &&
     49 	# Only find Markdown files newer than the RSS feed.
     50 	find_test="-newer $feed_path" &&
     51 	# Find and delete any HTML files for which a source Markdown
     52 	# no longer exists.
     53 	find "$output_path" -type f -name "*.html" |
     54 		sed "s|$output_path/||" |
     55 		while IFS= read -r file ; do
     56 			[ ! -f "$source_path/${file%.html}.md" ] &&
     57 				echo "deleting orphaned $file" &&
     58 				rm "$output_path/$file"
     59 		done
     60 
     61 # Copy everything that's not Markdown.
     62 # This will also create the folder structure for the destination Markdown files.
     63 rsync --archive --delete --verbose \
     64 	--exclude "*.md" --exclude "feeds" $rsync_exclude \
     65 	"$source_path/" "$output_path/"
     66 
     67 # Parse and create all the HTML files.
     68 markdown_files="$(find "$source_path" -type f -name "*.md" $find_test)"
     69 line_count="$(echo "$markdown_files" | wc -l | tr -d -c '[:digit:]')"
     70 index=0
     71 
     72 echo "$markdown_files" |
     73 	sed "s|$source_path/||" |
     74 	while IFS= read -r file ; do
     75 		echo "$file"
     76 		index="$(expr "$index" + 1)"
     77 
     78 		# Determine if this file has any metadata at the start.
     79 		# Metadata are in the format Key: value, so it's easy to detect.
     80 		head -n 1 "$source_path/$file" | grep -q "^[A-Za-z]*: " &&
     81 			headers=1 ||
     82 	       		headers=0
     83 
     84 		# Get everything after the metadata.
     85 		([ "$headers" -eq 1 ] && sed '1,/^$/d' || cat) < "$source_path/$file" |
     86 			# Convert Markdown to HTML.
     87 			markdown_py --extension footnotes --extension md_in_html --extension smarty --quiet --output_format xhtml |
     88 			# Recombine with the metadata and hand it to Prolog.
     89 			([ "$headers" -eq 1 ] && sed '/^$/q' "$source_path/$file" ; cat) |
     90 			swipl --traditional --quiet -l parse_entry.pl -g "consult('$site_path/site.pl'), generate_entry." |
     91 			# Unwrap block-level elements that have erroneously been wrapped in <p> tags.
     92 			sed 's|<p><details|<details|g' |
     93 			sed 's|</summary></p>|</summary>|g' |
     94 			sed 's|<p></details></p>|</details>|g' |
     95 			sed 's|<p><figure|<figure|g' |
     96 			sed 's|</figure></p>|</figure>|g' |
     97 			# Smarten punctuation.
     98 			smartypants \
     99 			> "$output_path/${file%.md}.html" &
    100 
    101 		if [ "$index" -eq "$line_count" ] ; then
    102 			# Wait until all jobs have completed.
    103 			wait
    104 		else
    105 			# Add the most recent process ID to the list.
    106 			proc_ids="$! $proc_ids"
    107 			# Pause while the number of created processes is greater than
    108 			# or equal to the max processes.
    109 			while [ "$(ps -p "${proc_ids%% }" | tail -n +2 | wc -l | tr -d -c '[:digit:]')" -ge "$max_processes" ] ; do
    110 				true
    111 			done
    112 		fi
    113 	done
    114 
    115 # Generate the RSS feed.
    116 mkdir -p "${feed_path%/*}"
    117 # Grep the date of each article.
    118 find "$output_path" -type f -name "*.html" \
    119 	-exec grep "id=\"article-date\"" {} + |
    120 	# Sort articles by date (skipping the first field).
    121 	sort -k 2 |
    122 	# Get the last (i.e. most recent) posts for the RSS feed.
    123 	tail -5 |
    124 	# Reformat to just the file names.
    125 	cut -f 1 -d : |
    126 	# Parse the articles and generate the RSS.
    127 	swipl --traditional --quiet -l generate_rss.pl -g "consult('$site_path/site.pl'), generate_rss(\"$(date '+%a, %d %b %Y %T %Z')\")." \
    128 	> "$feed_path"