You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

197 lines
5.0KB

  1. #!/bin/bash
  2. ##
  3. # Generate html page with blog article excerpts from ./posts.txt. Post file names should
  4. # be added to ./posts.txt in the exact order that they are supposed to appear on the blog
  5. # page.
  6. # Check if required executables can be found
  7. if ! type readlink dirname html2text mv cat cksum base64 pup; then
  8. echo 'One or more required executables are not present. Generation cancelled' >&2
  9. echo 'Note: You can install pup with "go get github.com/ericchiang/pup"' >&2
  10. exit 1
  11. fi
  12. # Determine script directory (requires GNU readlink)
  13. here="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
  14. printf 'Changing directory: '
  15. pushd "$here" || exit $?
  16. posts_file="$here/posts.txt"
  17. if ! [[ -f "$posts_file" ]]; then
  18. printf 'Posts file "%s" not found. Generation cancelled.\n' "$posts_file" >&2
  19. exit 1
  20. fi
  21. escape-html() {
  22. sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g; s/"/\&quot;/g; s/'"'"'/\&#39;/g'
  23. }
  24. html-to-text() {
  25. html2text -nobs -style compact "$@"
  26. }
  27. print-blog-html-top() {
  28. echo '<html>
  29. <head>
  30. <title>Blog</title>
  31. <link rel="stylesheet" type="text/css" href="style.css">
  32. <meta charset="UTF-8">
  33. </head>
  34. <style type="text/css">
  35. h2 a {
  36. color: #5b4636;
  37. text-decoration: none;
  38. }
  39. h2 a:visited {
  40. color: #5b4636;
  41. text-decoration: none;
  42. }
  43. </style>
  44. <body>
  45. <div style="display: flex; flex-direction: horizontal;">
  46. <a href="index.html">Home</a>
  47. <span style="margin-left: 1em; margin-right: 1em;">|</span>
  48. <a href="feed.xml">RSS Feed</a>
  49. </div>
  50. <h1>Blog</h1>
  51. '
  52. }
  53. print-blog-html-bottom() {
  54. echo ' </body>
  55. </html>'
  56. }
  57. rfc-822-date-time() {
  58. LC_ALL=C date "$@" --rfc-email
  59. }
  60. # Note: pubDate and lastBuildDate are both set to the current time.
  61. print-blog-rss-top() {
  62. cat <<EOF
  63. <?xml version="1.0"?>
  64. <rss version="2.0">
  65. <channel>
  66. <title>Hugot Blog</title>
  67. <link>https://hugot.nl/blog.html</link>
  68. <description>Hugo's personal blog</description>
  69. <language>en-us</language>
  70. <pubDate>$(rfc-822-date-time)</pubDate>
  71. <lastBuildDate>$(rfc-822-date-time)</lastBuildDate>
  72. <docs>http://blogs.law.harvard.edu/tech/rss</docs>
  73. <generator>Hugo's Custom Bash Script</generator>
  74. <managingEditor>social@hugot.nl (Hugot)</managingEditor>
  75. <webMaster>infra@hugot.nl (Hugot Infra)</webMaster>
  76. EOF
  77. }
  78. print-blog-rss-bottom() {
  79. echo '</channel>
  80. </rss>'
  81. }
  82. el() {
  83. format_string="$1"
  84. shift
  85. printf "<$format_string>" "$@"
  86. }
  87. el-close() {
  88. echo "</$1>"
  89. }
  90. el-enclose() {
  91. element_name="$1"
  92. shift
  93. printf '%s' "<$element_name>"
  94. printf '%s' "$@"
  95. printf '%s' "</$element_name>"
  96. }
  97. site_url="https://hugot.nl"
  98. blog_html="$here/blog.html"
  99. new_html="$blog_html.new"
  100. blog_rss="$here/feed.xml"
  101. new_rss="$blog_rss.new"
  102. print-blog-html-top > "$new_html"
  103. print-blog-rss-top > "$new_rss"
  104. while read -r post_html; do
  105. # Convert the post's html to text to make it easier to use the blog's text
  106. text="$(html-to-text "$post_html" | escape-html)" || exit $?
  107. # The title should be on the 2nd line of text, right after the link to the
  108. # homepage. This is a bit inflexible but it will do for now.
  109. title="$(tail -n +3 <<<"$text" | head -n 1 | tr -d '*')" || exit $?
  110. # Use the first 5 lines after the title as post excerpt.
  111. excerpt="$(tail -n +4 <<<"$text" | head -n 5)" || exit $?
  112. # Escape just the article element for use in the RSS feed article description.
  113. # This way the entire article can be read from an RSS reader.
  114. article_html="$(pup article < "$post_html" | head -n -1 | tail -n +2 | escape-html)"
  115. # Escape the post html file name to safely use it in the generated html.
  116. href="$(escape-html <<<"$post_html")" || exit $?
  117. post_dir="$(dirname "$post_html")" || exit $?
  118. pubdate_file="$post_dir/publish_date.txt"
  119. # Determine a publishing date for the post
  120. if [[ -f "$pubdate_file" ]]; then
  121. read -r pubdate < "$pubdate_file"
  122. else
  123. pubdate="$(date)"
  124. echo "$pubdate" > "$pubdate_file"
  125. fi
  126. # Convert publishing date to be conform RFC 822
  127. pubdate="$(rfc-822-date-time --date="$pubdate")"
  128. {
  129. el div
  130. printf '<h2 style="margin-bottom: 0.1em;"><a href="%s">%s</a></h2>' "$href" "$title"
  131. printf '<i style="font-size: 0.8em;">%s</i>' "$pubdate"
  132. el 'p style="margin-top: 0.5em;"'
  133. printf '%s ... <a href="%s">Continue reading</a>' "$excerpt" "$href"
  134. el-close p
  135. el-close div
  136. el hr
  137. } >> "$new_html"
  138. {
  139. el item
  140. el-enclose title "$title"
  141. el-enclose link "$site_url/$href"
  142. el-enclose description "$article_html"
  143. el-enclose pubDate "$pubdate"
  144. echo "<guid isPermaLink=\"false\">$title$(base64 <(cksum <<<"$text"))</guid>"
  145. el-close item
  146. } >> "$new_rss"
  147. done < "$posts_file"
  148. print-blog-html-bottom >> "$new_html"
  149. print-blog-rss-bottom >> "$new_rss"
  150. mv -v "$new_html" "$blog_html" || exit $?
  151. mv -v "$new_rss" "$blog_rss" || exit $?
  152. echo 'SUCCESS!'