Blame
Date:
Tue Jun 30 16:51:32 2020 UTC
Message:
sitemap.xml function
001
2020-06-28
dev
#!/usr/bin/env python3
002
2020-06-28
dev
003
2020-06-28
dev
# Copyright (c) 2020 Daniel Jakots
004
2020-06-28
dev
# 2020 Frédéric Galusik
005
2020-06-28
dev
#
006
2020-06-28
dev
# Licensed under the MIT license. See the LICENSE file.
007
2020-06-28
dev
008
2020-06-28
dev
import datetime
009
2020-06-28
dev
import glob
010
2020-06-30
dev
import os
011
2020-06-28
dev
import sys
012
2020-06-28
dev
013
2020-06-28
dev
import jinja2
014
2020-06-28
dev
import markdown
015
2020-06-28
dev
016
2020-06-28
dev
import feedgenerator
017
2020-06-28
dev
018
2020-06-30
dev
content_path = "content/*"
019
2020-06-30
dev
siteroot = "https://galusik.fr/blog"
020
2020-06-30
dev
site = {}
021
2020-06-30
dev
site["author"] = "Frédéric Galusik"
022
2020-06-30
dev
site["name"] = site["author"]
023
2020-06-30
dev
site["url"] = ""
024
2020-06-30
dev
site["feed_path"] = "~/feeds/atom.xml"
025
2020-06-30
dev
output_dir = "output"
026
2020-06-30
dev
files_in_dir = []
027
2020-06-30
dev
sitemap = output_dir + "/sitemap.xml"
028
2020-06-30
dev
today = str(datetime.date.today())
029
2020-06-28
dev
030
2020-06-28
dev
031
2020-06-28
dev
def md2html(md):
032
2020-06-28
dev
html = markdown.markdown(md, extensions=["codehilite", "fenced_code", "attr_list"])
033
2020-06-28
dev
return html
034
2020-06-28
dev
035
2020-06-28
dev
036
2020-06-28
dev
def parse_article(article_path):
037
2020-06-28
dev
article = {}
038
2020-06-30
dev
article["file"] = f"{article_path.replace('.md', '')[len(content_path) - 1:]}"
039
2020-06-28
dev
with open(article_path, "r") as f:
040
2020-06-28
dev
metadata = [next(f) for x in range(3)]
041
2020-06-28
dev
for line in metadata:
042
2020-06-28
dev
if line.startswith("Title: "):
043
2020-06-28
dev
article["title"] = line[7:].strip()
044
2020-06-28
dev
elif line.startswith("Date: "):
045
2020-06-28
dev
article["date"] = line[6:].strip()
046
2020-06-28
dev
elif line.startswith("Category: "):
047
2020-06-28
dev
article["category"] = line[10:].strip()
048
2020-06-28
dev
article["markdown"] = f.read()
049
2020-06-28
dev
050
2020-06-28
dev
if len(article) < 4:
051
2020-06-28
dev
print(f"There's a problem with metadata for {article_path}")
052
2020-06-28
dev
sys.exit(1)
053
2020-06-28
dev
return article
054
2020-06-28
dev
055
2020-06-28
dev
056
2020-06-28
dev
def parse_articles(content_path):
057
2020-06-28
dev
content = []
058
2020-06-28
dev
for article in glob.glob(content_path):
059
2020-06-28
dev
article = parse_article(article)
060
2020-06-28
dev
content.append(article)
061
2020-06-28
dev
content.sort(reverse=True, key=lambda i: i["date"])
062
2020-06-28
dev
return content
063
2020-06-28
dev
064
2020-06-28
dev
065
2020-06-28
dev
def generate_website(content):
066
2020-06-28
dev
jinja2_env = jinja2.Environment(
067
2020-06-28
dev
loader=jinja2.FileSystemLoader("templates"), trim_blocks=True
068
2020-06-28
dev
)
069
2020-06-28
dev
jinja2_template = jinja2_env.get_template("index.html.j2")
070
2020-06-30
dev
result = jinja2_template.render(articles=content, site=site)
071
2020-06-30
dev
with open(f"{output_dir}/index.html", "w") as f:
072
2020-06-28
dev
f.write(result)
073
2020-06-28
dev
074
2020-06-28
dev
jinja2_template = jinja2_env.get_template("article.html.j2")
075
2020-06-28
dev
for article in content:
076
2020-06-30
dev
result = jinja2_template.render(article=article, site=site)
077
2020-06-28
dev
prefix = ""
078
2020-06-28
dev
if article["category"] != "othercontent":
079
2020-06-30
dev
prefix = "~/"
080
2020-06-30
dev
with open(f"{output_dir}/{prefix}{article['file']}.html", "w") as f:
081
2020-06-28
dev
f.write(result)
082
2020-06-28
dev
083
2020-06-28
dev
084
2020-06-28
dev
def create_feed():
085
2020-06-28
dev
return feedgenerator.Atom1Feed(
086
2020-06-30
dev
title=site["name"],
087
2020-06-30
dev
link=f'{site["url"]}/',
088
2020-06-30
dev
feed_url=f'{site["url"]}/{site["feed_path"]}',
089
2020-06-30
dev
description=f"Feed for {site['url']}",
090
2020-06-28
dev
)
091
2020-06-28
dev
092
2020-06-28
dev
093
2020-06-30
dev
def gen_sitemap():
094
2020-06-30
dev
# r root d dir f files
095
2020-06-30
dev
for r, d, f in os.walk(output_dir):
096
2020-06-30
dev
for item in f:
097
2020-06-30
dev
if '.html' in item:
098
2020-06-30
dev
files_in_dir.append(os.path.join(r, item))
099
2020-06-30
dev
# rm old sitemap.xml
100
2020-06-30
dev
if os.path.isfile(sitemap):
101
2020-06-30
dev
os.remove(sitemap)
102
2020-06-30
dev
with open(sitemap, "a") as sm:
103
2020-06-30
dev
sm.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")
104
2020-06-30
dev
sm.write("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n")
105
2020-06-30
dev
for item in files_in_dir:
106
2020-06-30
dev
sm.write(" <url>\n")
107
2020-06-30
dev
sm.write(" <loc>" + item.replace(output_dir, siteroot) + "</loc>\n")
108
2020-06-30
dev
sm.write(" <lastmod>" + today + "</lastmod>\n")
109
2020-06-30
dev
sm.write(" </url>\n")
110
2020-06-30
dev
sm.write("</urlset>\n")
111
2020-06-30
dev
112
2020-06-30
dev
113
2020-06-28
dev
def main():
114
2020-06-30
dev
content = parse_articles(content_path)
115
2020-06-28
dev
feed = create_feed()
116
2020-06-28
dev
for article in content:
117
2020-06-28
dev
article["html"] = md2html(article.pop("markdown"))
118
2020-06-28
dev
if article["category"] == "othercontent":
119
2020-06-28
dev
continue
120
2020-06-28
dev
date = [int(i) for i in article["date"].split("-")]
121
2020-06-28
dev
date = datetime.datetime(*date, 10, 0, 0)
122
2020-06-28
dev
123
2020-06-28
dev
feed.add_item(
124
2020-06-28
dev
title=article["title"],
125
2020-06-30
dev
link=f"{site['url']}/~/{article['file']}.html",
126
2020-06-30
dev
author_name=site["author"],
127
2020-06-28
dev
pubdate=date,
128
2020-06-28
dev
description=article["html"],
129
2020-06-28
dev
)
130
2020-06-30
dev
with open(f"{output_dir}/{site['feed_path']}", "w") as f:
131
2020-06-28
dev
feed.write(f, "utf-8")
132
2020-06-28
dev
generate_website(content)
133
2020-06-30
dev
gen_sitemap()
134
2020-06-28
dev
135
2020-06-28
dev
136
2020-06-28
dev
if __name__ == "__main__":
137
2020-06-28
dev
main()
Frédéric Galusik