Site Generator

I suppose I should up come with a snappier name.

For the moment this site is hacked to gether from a small collection of python scripts and a makefile. The code parses markdown files and renders them using Jinja2 templates. Meta data is stored in a YAML header in each file. Some of the code is show below.

Design

Folders of the website are also python modules. __init__.py must export a string named folder, which is the folder name, and a function named gen that accepts the main jinja2 template, a precomputed global context and a configuration dictionary.

The gen function must at least add "content" to the context, call the template parse function, and write the output to a file.

Top level functions handle repetitive work and are imported by the subfolder/modules.

main.py is the entry point. It must load the template, setup the config dictionary, the global context, import and run any sub folders/modules and finally generate the top level index.html.

Makefile

I like to use makefiles to document the commands to run.

all:
    python main.py

test:
    cd dist; \
    python -m SimpleHTTPServer 8080

publish:
    scp -r dist/* [user]@[host]:/[destination]

generic_process.py

import os
import shutil
import indexer
import parser

def process_all(folder, template, globalcontext, config, preloaded=None):
    """ This basic process: load all md files in the folder
        render the markdown and add to context
        add meta to context
        render the template with the assembled context
        Copy all other files (static files) to dist/[folder]
        Do not add a side bar (although, you could pass one in with globalcontext)

        If you've already loaded the files, say to create a tag list, you can pass them
        in preloaded as a list of dictionaries

    """

    if config["verbose"]:
        print("Processing folder: %s" % folder)
    if preloaded is None:
        files = indexer.list_md(folder, verbose=config["verbose"])
        contextbyfn = indexer.load_files(files, verbose=config["verbose"]) 
        todo = contextbyfn.values()
    else:
        todo = preloaded

    for filecontext in todo:
        filecontext.update(globalcontext)
        output = parser.render_tpl(template, filecontext)
        parser.write_out(output, config["subdest"], filecontext["outfn"])

    if config["verbose"]:
        print("Copying static files...")
    for fn in indexer.list_static(folder, config["verbose"]):
        shutil.copyfile(fn, os.path.join(config["subdest"], os.path.basename(fn)))

parser.py

""" parser.py - some functions for parsing markdown files, stripping 
out the yaml metadata, rendering to html
"""

import os
import markdown
import yaml

def load_md(filecontent):
    lines = filecontent.splitlines()
    raw_meta = ""
    if len(lines) > 0 and lines[0].startswith("---"):
        end_meta = lines[1:].index("---")
        raw_meta = "\n".join(lines[0:end_meta])
        content = "\n".join(lines[end_meta+2:])
    else:
        content = filecontent
    return content, raw_meta

def parse_meta(raw_meta):
    meta = yaml.load(raw_meta)
    if meta is None:
        meta = {}
    if "tags" in meta:
        assert(type(meta["tags"]) is list)
    return meta

def render_md(md):
    html = markdown.markdown(md)
    if html is None:
        html = ""
    return html

def render_tpl(template, context):
    return template.render(context)

def write_out(filecontent, folder, filename):
    of = open(os.path.join(folder, filename), "w")
    of.write(filecontent)
    of.close()

indexer.py

"""
    Indexer handles the file input side of things.
    It has functions for listing files (markdown or static files) and loading.
    It depends on parser.
    It also has transformations for running on metadata and creating ordered structures like tag lists.

"""
import os
import glob
from datetime import datetime
import collections

import parser

def list_md(path=None, verbose=False):
    if path is None:
        path = os.getcwd()
    if verbose:
        print("Searching: " + path)
    return glob.glob(os.path.join(path, "*.md"))

def list_static(path=None, verbose=False):
    if path is None:
        path = os.getcwd()
    if verbose:
        print("Searching: %s for static files" % path)
    result = []
    for fn in glob.glob(os.path.join(path, "*")):
        if fn.endswith(".md") or fn.endswith(".py") or fn.endswith(".pyc"):
            continue
        result.append(fn)
    return result

def load_files(filenames, verbose=False, relpath=""):
    everything = {}
    for fn in filenames:
        if verbose:
            print(fn)
        raw = open(fn).read()
        content, meta = parser.load_md(raw)
        try:
            everything[fn] = parser.parse_meta(meta)
        except Exception as e:
            print("Invalid meta-data in: " + fn)
            raise e
        if "date" in everything[fn] and type(everything[fn]["date"]) is str:
            try:
                everything[fn]["date"] = datetime.strptime(everything[fn]["date"], "%Y-%m-%d")
            except:
                print("Invalid date field in: %s, use YYYY-MM-DD" % fn)
                raise

        everything[fn]["content"] = parser.render_md(content)
        basefn = os.path.splitext(os.path.basename(fn))[0]
        everything[fn]["outfn"] = os.path.join(relpath, basefn+".html")
        if not "name" in everything[fn]:
            name = basefn
    return everything

def by_tag(everything):
    """ Returns a dictionary of all entries sorted by tag """
    tag_dict = {}
    for meta in everything.values():
        if not "tags" in meta:
            continue
        for tag in meta["tags"]:
            if not tag in tag_dict:
                tag_dict[tag] = {"name":tag, "links":[
                    {"href": meta["outfn"], "name":meta["name"]}
                ]}
            else:
                tag_dict[tag]["links"].append(
                    {"href": meta["outfn"], "name":meta["name"]}
                )
    return collections.OrderedDict(sorted(tag_dict.items()))

def by_monthyear(everything):
    """ Return a dictionary of all entries sorted by Year-Month """
    sections = {}
    for meta in everything.values():
        if not "date" in meta:
            continue
        monthyear = datetime.datetime.strftime(meta["date"], "%Y-%m")
        if not monthyear in sections:
            sections[monthyear] = {"name": monthyear, "links":[
                {"href": meta["outfn"], "name":meta["name"]}
            ]}
        else:
            sections[monthyear]["links"].append(
                {"href": meta["outfn"], "name":meta["name"]}
            )
    return collections.OrderedDict(sorted(sections.items()))