rivet/rivet.sh
Alessandro Mauri 5ac9e2ae5f fixed date
Before the date was taken on the destination files, since they are generated
in the right order, lists based on last modified date were right but with
the wrong date.

Now rivet creates another list during the creation of the main object list just
for storing dates of the source files. There is a 1:1 correspondance between
the files in the objlist and the dates in the objdate lists so they can be
retrived with the same counter.

Another change is that now shortened dates are displayed in the index link
list.
2021-06-11 16:01:05 +02:00

281 lines
7.5 KiB
Bash

#!/bin/sh -e
# Copyright (c) 2021 Alessandro Mauri
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
set -e
unset SKIP_FOOTER
unset SKIP_HEADER
unset SKIP_LIST
unset VERBOSE
unset PRINT_HELP
unset SKIP_SITEMAP
unset SKIP_FEED
usage() {
printf "Usage: rivet [-hvelfsu] [-p string] [-o destdir] srcdir domain\n"
printf "\t-h: prints this message\n"
printf "\t-o [destdir]: specifies the output direcotory to be [destdir]\n"
printf "\t-p [string]: Rename the \"Pages\" section to [string]\n"
printf "\t-n [string]: Set the title of the atom feed\n"
printf "\t-d [string]: Set the description of the atom feed\n"
printf "\t-m [number]: Set the max number of elements in the atom feed,
0 to include all files\n"
printf "\t-v: Makes the script verbose\n"
printf "\t-e: Do not prepend _header.html to .html files\n"
printf "\t-f: Do not prepend _footer.html to .html files\n"
printf "\t-l: Do not generate \"Pages\" section in index.html\n"
printf "\t-s: Do not generate sitemap.xml\n"
printf "\t-r: Do not generate an atom feed\n"
printf "\t-u: Makes all references to the url 'http' instead of 'https'\n"
exit 2
}
convert() {
infile="$1"
extension="${infile##*.}"
if [ "$extension" = 'md' ]; then
tmpfile="tmpconvfile.tmp"
outfile="${infile%md}html"
cp "$infile" "$tmpfile"
lowdown -s -Thtml -o "$outfile" "$tmpfile"
rm -f "$tmpfile" "$infile"
fi
# TODO: convert links to .md to .html
}
# Check dependencies
if ! command -v lowdown > /dev/null; then
echo "lowdown is not installed"
exit
fi
destdir='dst'
prefix='https'
linksec='Pages'
blog_title='Atom feed'
blog_desc=''
blog_nmax='0'
while getopts 'o:vhelfsrup:n:d:m:' c
do
case "$c" in
o) destdir=${OPTARG%%\/} ;;
v) VERBOSE=true ;;
h) PRINT_HELP=true ;;
e) SKIP_HEADER=true ;;
l) SKIP_LIST=true ;;
f) SKIP_FOOTER=true ;;
s) SKIP_SITEMAP=true ;;
r) SKIP_FEED=true ;;
u) prefix='http' ;;
p) linksec="$OPTARG" ;;
n) blog_title="$OPTARG" ;;
d) blog_desc="$OPTARG" ;;
m) blog_nmax="$OPTARG" ;;
*) ;;
esac
done
shift $((OPTIND - 1))
if ! [ "$1" ] || ! [ "$2" ]; then
echo "Not enough arguments"
usage
fi
src="$1"
srcdir=${src%%\/}
unset src
headerfile=$srcdir/_header.html
footerfile=$srcdir/_footer.html
objlist=objlist.tmp
objdate=objdate.tmp
# Check if index.md is present
if ! [ -e "$srcdir"/index.md ]; then
echo "Missing index.md in $srcdir"
exit 1
fi
# Check header and footer files
if ! [ -e "$headerfile" ]; then
echo "Missing _header.html in $srcdir"
exit 1
fi
if ! [ -e "$footerfile" ]; then
echo "Missing _footer.html in $srcdir"
exit 1
fi
# Remove junk from {header,footer} files
sed -i 's/<header.*>//' "$headerfile"
sed -i 's/<\/header>//' "$headerfile"
sed -i 's/<footer.*>//' "$footerfile"
sed -i 's/<\/footer>//' "$footerfile"
# Remove any junk from the domain eg. [https://]domain.com[/]
domain="$(echo "$2" | sed -e 's/^https*:\/\///' -e 's/\/$//')"
# Save the real url
url="$prefix"'://'"$domain"
if [ "$PRINT_HELP" ]; then
usage
fi
if [ "$VERBOSE" ]; then
set -x
fi
if ! [ -d "$srcdir" ]; then
echo "Error: missing source direcotry"
usage
fi
rm -rf "$destdir"
mkdir -p "$destdir"
cp -r "$srcdir"/* "$destdir"
rm -f "$destdir"/_header.html "$destdir"/_footer.html
# Generate an ordered (by open time) file list
find "$srcdir" -type f -regex '.*\/[^_].+\..*' -exec ls -1t {} + |
awk '/.*\.(md|html)$/' > "$objlist"
rm -f "$objdate"
while IFS="" read -r file; do
stat -c '%y' "$file" >> "$objdate"
done < "$objlist"
sed -i -e "s,^\/*[^\/]*\/,$destdir/," "$objlist"
# Convert markdown files
while IFS="" read -r file; do
convert "$file"
done < "$objlist"
# Convert the file list to a list that contains the path of all the html files
sed -i -e 's/\.md$/\.html/' "$objlist"
# Create a list that contains the links to all html files
linklist=linklist.tmp
cp -f "$objlist" "$linklist"
sed -i -e "s/^$destdir//" -e "s/^/$prefix:\/\/$domain/" "$linklist"
# Prepare the header
if ! [ "$SKIP_HEADER" ]; then
find "$destdir" -name "*.html" |
while IFS="" read -r file; do
sed -i "/<head>/r $headerfile" "$file"
done
fi
# Prepate the footer
if ! [ "$SKIP_FOOTER" ]; then
tmpfoot="tmpfootfile.tmp"
cp "$footerfile" "$tmpfoot"
sed -i '1s/^/<footer>/' "$tmpfoot"
echo '</footer>' >> "$tmpfoot"
find "$destdir" -name "*.html" |
while IFS="" read -r file; do
sed -i "/<\/body>/r $tmpfoot" "$file"
done
rm -f "$tmpfoot"
fi
# Prepare index file list
if ! [ "$SKIP_LIST" ]; then
tmpfile="linkindex.tmp"
rm -f "$tmpfile"
cat << EOF >> "$tmpfile"
<div id="map">
<h2 id="Pages">$linksec</h2>
EOF
count='0'
while IFS="" read -r line; do
count=$((count + 1))
if echo "$line" | grep -q 'index\.html'; then
continue
fi
tfile="$(awk "NR==$count" "$objlist")"
title="$(sed -E -n 's/.*<title>\s*(.+)\s*<\/title>.*/\1/p' "$tfile")"
if [ -z "$title" ]; then
title="${tfile##*/}"
fi
ldate="$(awk "NR==$count" "$objdate" | sed 's/\s.*$//')"
printf "<p><a href=\"%s\">%s - %s</a></p>\n" "$line" "$ldate" "$title" >> "$tmpfile"
done < "$linklist"
echo '</div>' >> "$tmpfile"
sed -i '/<\/body>/i REPLACE' "$destdir"/index.html
sed -i "/^REPLACE/r $tmpfile" "$destdir"/index.html
sed -i 's/^REPLACE//' "$destdir"/index.html
rm -f "$tmpfile"
fi
# Generate sitemap
if ! [ "$SKIP_SITEMAP" ]; then
cat << EOF >> "$destdir"/sitemap.xml
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
</urlset>
EOF
while IFS="" read -r line; do
sed -i "/<\/urlset>/i \
<url><loc>$line<\/loc><\/url>" "$destdir"/sitemap.xml
done < "$linklist"
sed -i 's/^<url>/\t<url>/' "$destdir"/sitemap.xml
fi
# Generate atom feed
if ! [ "$SKIP_FEED" ]; then
feed="$destdir"/atom.xml
cat << EOF >> "$feed"
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>$blog_title</title>
<subtitle>$blog_desc</subtitle>
<link href="$url/atom.xml/" rel="self" />
<link href="$url/" />
<id>$url</id>
<updated>$(date -Is)</updated>
EOF
count='0'
while IFS="" read -r line; do
count=$((count + 1))
if [ "$blog_nmax" -gt '0' ]; then
if [ $count -gt "$blog_nmax" ]; then
break
fi
fi
tfile="$(awk "NR==$count" "$objlist")"
title="$(sed -E -n 's/.*<title>\s*(.+)\s*<\/title>.*/\1/p' "$tfile")"
if [ -z "$title" ]; then
title="${tfile##*/}"
fi
ldate="$(awk "NR==$count" "$objdate")"
{
printf '\t<entry>\n'
printf '\t\t<title>%s</title>\n' "$title"
printf '\t\t<link href="%s" />\n' "$line"
printf '\t\t<id>%s</id>\n' "$line"
printf '\t\t<updated>%s</updated>\n' "$ldate"
printf '\t</entry>\n'
} >> "$feed"
done < "$linklist"
printf '</feed>\n' >> "$feed"
fi
rm -f "$objlist" "$linklist" "$objdate"
exit