infrastructure framework

jukent · jukent · commit df6a12b46864 · 2025-07-11T16:07:51.000-06:00
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -7,7 +7,6 @@ repos:
       - id: check-docstring-first
       - id: check-json
       - id: check-yaml
-      - id: double-quote-string-fixer
 
   - repo: https://github.com/psf/black
     rev: 25.1.0
diff --git a/environment.yml b/environment.yml
@@ -13,6 +13,7 @@ dependencies:
 - numpy
 - matplotlib
 - google-api-python-client
+- feedgen
 - pip
 - pip:
     - google-analytics-data
diff --git a/portal/myst.yml b/portal/myst.yml
@@ -6,26 +6,34 @@ project:
   id: 770e49e5-344a-4c46-adaa-3afb060b2085
   authors: Project Pythia Community
   github: https://github.com/projectpythia/projectpythia.github.io
+  plugins:
+    - type: executable
+      path: src/blogpost.py
 
   toc:
     - file: index.md
     - file: about.md
-    - title: Blog
+    - file: posts/blog.md
       children:
-        # - pattern: posts/*.md
-        #  Temporary until we have blog infrastructure: explicit list of posts by date (newest first)
+      - title: "2025"
+        children:
         - file: posts/2025/mystification.md
         - file: posts/2025/cookoff2025-website.md
         - file: posts/2025/binderhub_status.md
         - file: posts/2025/new-cookbooks.md
+      - title: "2024"
+        children:
         - file: posts/2024/cookoff2024-website.md
+      - title: "2023"
+        children:
         - file: posts/2023/cookoff2024-savethedate.md
         - file: posts/2023/fundraiser.md
         - file: posts/2023/cookoff2023.md
     - file: contributing.md
     - file: cookbook-guide.md
     - file: quick-cookbook-guide.md
     - file: metrics.md
+
 site:
   domains: []
   options:
diff --git a/portal/posts/blog.md b/portal/posts/blog.md
@@ -0,0 +1,8 @@
+# Blog
+
+Below are a few of the latest posts in my blog.
+You can see a full list by year to the left.
+
+:::{postlist}
+:number: 25
+:::
diff --git a/portal/src/blogpost.py b/portal/src/blogpost.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python
+import argparse
+import json
+import sys
+from pathlib import Path
+
+import pandas as pd
+import unist as u
+from feedgen.feed import FeedGenerator
+from yaml import safe_load
+
+DEFAULTS = {"number": 10}
+
+root = Path(__file__).parent.parent
+
+# Aggregate all posts from the markdown and ipynb files
+posts = []
+for ifile in root.rglob("posts/**/*.md"):
+    if "drafts" in str(ifile):
+        continue
+
+    text = ifile.read_text()
+    try:
+        _, meta, content = text.split("---", 2)
+    except Exception:
+        print(f"Skipping file with error: {ifile}", file=sys.stderr)
+        continue
+
+    # Load in YAML metadata
+    meta = safe_load(meta)
+    meta["path"] = ifile.relative_to(root).with_suffix("")
+    if "title" not in meta:
+        lines = text.splitlines()
+        for ii in lines:
+            if ii.strip().startswith("#"):
+                meta["title"] = ii.replace("#", "").strip()
+                break
+
+    # Summarize content
+    skip_lines = ["#", "--", "%", "++"]
+    content = "\n".join(
+        ii
+        for ii in content.splitlines()
+        if not any(ii.startswith(char) for char in skip_lines)
+    )
+    N_WORDS = 50
+    words = " ".join(content.split(" ")[:N_WORDS])
+    meta["content"] = meta.get("description", words)
+    posts.append(meta)
+posts = pd.DataFrame(posts)
+posts["date"] = pd.to_datetime(posts["date"]).dt.tz_localize("US/Pacific")
+posts = posts.dropna(subset=["date"])
+posts = posts.sort_values("date", ascending=False)
+
+# Generate an RSS feed
+fg = FeedGenerator()
+fg.id("https://projectpythia.org/")
+fg.title("Project Pythia blog")
+fg.author({"name": "Project Pythia Team", "email": "projectpythia@ucar.edu"})
+fg.link(href="https://projectpythia.org/", rel="alternate")
+fg.logo("_static/images/logos/pythia_logo-blue-btext.svg")
+fg.subtitle("")
+fg.link(href="http://chrisholdgraf.com/rss.xml", rel="self")
+fg.language("en")
+
+# Add all my posts to it
+for ix, irow in posts.iterrows():
+    fe = fg.add_entry()
+    fe.id(f'https://projectpythia.org/{irow["path"]}')
+    fe.published(irow["date"])
+    fe.title(irow["title"])
+    fe.link(href=f'https://projectpythia.org/{irow["path"]}')
+    fe.content(content=irow["content"])
+
+# Write an RSS feed with latest posts
+fg.atom_file(root / "atom.xml", pretty=True)
+fg.rss_file(root / "rss.xml", pretty=True)
+
+plugin = {
+    "name": "Blog Post list",
+    "directives": [
+        {
+            "name": "postlist",
+            "doc": "An example directive for showing a nice random image at a custom size.",
+            "alias": ["bloglist"],
+            "arg": {},
+            "options": {
+                "number": {
+                    "type": "int",
+                    "doc": "The number of posts to include",
+                }
+            },
+        }
+    ],
+}
+
+children = []
+for ix, irow in posts.iterrows():
+    children.append(
+        {
+            "type": "card",
+            "url": f'/{irow["path"].with_suffix("")}',
+            "children": [
+                {"type": "cardTitle", "children": [u.text(irow["title"])]},
+                {"type": "paragraph", "children": [u.text(irow["content"])]},
+                {
+                    "type": "footer",
+                    "children": [
+                        u.strong([u.text("Date: ")]),
+                        u.text(f'{irow["date"]:%B %d, %Y} | '),
+                        u.strong([u.text("Author: ")]),
+                        u.text(f'{irow["author"]}'),
+                    ],
+                },
+            ],
+        }
+    )
+
+
+def declare_result(content):
+    """Declare result as JSON to stdout
+
+    :param content: content to declare as the result
+    """
+
+    # Format result and write to stdout
+    json.dump(content, sys.stdout, indent=2)
+    # Successfully exit
+    raise SystemExit(0)
+
+
+def run_directive(name, data):
+    """Execute a directive with the given name and data
+
+    :param name: name of the directive to run
+    :param data: data of the directive to run
+    """
+    assert name == "postlist"
+    opts = data["node"].get("options", {})
+    number = int(opts.get("number", DEFAULTS["number"]))
+    output = children[:number]
+    return output
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    group = parser.add_mutually_exclusive_group()
+    group.add_argument("--role")
+    group.add_argument("--directive")
+    group.add_argument("--transform")
+    args = parser.parse_args()
+
+    if args.directive:
+        data = json.load(sys.stdin)
+        declare_result(run_directive(args.directive, data))
+    elif args.transform:
+        raise NotImplementedError
+    elif args.role:
+        raise NotImplementedError
+    else:
+        declare_result(plugin)
diff --git a/setup.cfg b/setup.cfg
@@ -7,7 +7,7 @@ select = B,C,E,F,W,T4,B9
 
 [isort]
 known_first_party=
-known_third_party=
+known_third_party=feedgen,pandas,unist,yaml
 multi_line_output=3
 include_trailing_comma=True
 force_grid_wrap=0