slashtechno · slashtechno · Jul 15, 2024 · Jul 14, 2024 · Jul 14, 2024 · Jul 14, 2024
diff --git a/README.md b/README.md
@@ -1,8 +1,17 @@
-# cross-blogger  
-Headless CMS for static site generators powered by Google's Blogger.
-It can be used, at the time of writing, to publish **between** the following destinations. However, the intention is to output to static site generators, especially Hugo.  
-- Blogger  
-- Markdown (with frontmatter)  
+# Cross Blogger  
+Headless CMS for static site generators (especially Hugo) powered by Google's Blogger.  
+## Introduction  
+The intended use case is to use the "watch" mode to watch the Blogger blog add content to a Hugo content directory. When a new post is published on Blogger (including publishing a draft), it is automatically added to the Hugo content directory and can be committed and pushed to a Git repository. Using a continuous deployment service, such as GitHub actions, the site can be automatically built and deployed with the new post.  
+If enabled, posts which were added automatically can be updated or deleted from the content directory when they are deleted or unpublished on Blogger. Thus, a post can be unpublished, updated, and republished on Blogger, although this assumes that the program is given enough time to detect the unpublishing.  
+[Repository](https://git.slashtechno.com/slashtechno/test-cross-blogger), on my Gitea instance, with a Hugo site showcasing posts fetched from a Blogger blog with workflows set up to automatically build and deploy the site.
+### Other features  
+- LLM-generated descriptions through any OpenAI-compatible API or Ollama.
+  - Ollama can be used with its OpenAI-compatible API or the Ollama REST API.
+- Support for categories and tags. 
+  - To set these in Blogger, labels can be used. For categories, a prefix, such as `category::`, can be used to specify a category. The prefix is removed from the Blogger label and added to a `categories` array in the frontmatter. If the label does not have a prefix, it is added to a `tags` array in the frontmatter.
+- Customizable frontmatter mappings for compatibility with other static site generators or specific themes.
+- Customizable configuration formats (TOML, JSON, YAML, etc.) due to the use of Viper.  
+
 
 ### Installation  
 #### Compiled Binaries  
@@ -14,17 +23,25 @@ Using `go install`, you can compile and add the program to the PATH.
 Either run `go install github.com/slashtechno/cross-blogger@latest`, follow the same process as compiling the program locally, but replace `go build` with `go install`.  
 
 ### Usage  
+#### Configuration  
+**`config.example.toml`** has an example configuration file with comments.
 Sources and destinations should first be configured in the `config.toml` file.  
-By default, `credentials.yaml` is used to store the Google OAuth credentials and `config.toml` is used to store the configuration. These will be generated with placeholders/defaults if they do not exist. You can specify both the path to the credentials file and the path to the config file using the `--credentials-file` and `--config` flags. The file extension will dictate the format of the file. Command-line flags can also be used. Environment variables can be used for credentials and the log level although they should be prefixed with `CROSS_BLOGGER_`. If credentials are not provided through the credentials file **and the refresh token is not passed**, the credentials will be written to the credentials file as a byproduct of the refresh token being stored. It's always possible to just pass the refresh token, once obtained, some other way to prevent the credentials from being written.  
-Docker can be used by placing configuration files in `config/` and running `docker compose up -d` (`-d` runs it in the background). For additional configuration, the `docker-compose.yml` file can be edited.
+By default, `credentials.yaml` is used to store the Google OAuth credentials and `config.toml` is used to store the configuration. These will be generated with placeholders/defaults if they do not exist. You can specify both the path to the credentials file and the path to the config file using the `--credentials-file` and `--config` flags. The file extension will dictate the format of the file. Command-line flags can also be used in some cases. Environment variables can be used for credentials and the log level although they should be prefixed with `CROSS_BLOGGER_`. If credentials are not provided through the credentials file **and the refresh token is not passed**, the credentials will be written to the credentials file as a byproduct of the refresh token being stored. It's always possible to just pass the refresh token, once obtained, some other way to prevent the credentials from being written.  
+Docker can be used by placing configuration files in `config/` and running `docker compose up -d` (`-d` runs the services in the background). Edit the `docker-compose.yml` file to allow the program to access any directories you want to use, such as the directory where markdown files should be created.
+#### Watching a source  
+Currently, the only source is Blogger. To watch a Blogger blog, run `cross-blogger publish watch blogger <Blogger URL> <destination>`. Multiple destinations can be set by separating them with spaces. The Blogger URL should be the URL of the blog, not a specific post. The destination should be the name of the destination specified in the config file.  
+When watching a source, the program will fetch posts every 30 seconds. This can be changed with the `--interval` flag (or in the config file). The interval should be any duration parsable by Go's `time.ParseDuration` function, such as `30s`, `1m`, or `1h30m`.  
+Running with Docker is recommended for watching a source as it allows it to easily be run in the background and start on boot.
+You can commit and push the changes to a Git repository by setting `git_dir` in the destination configuration.  
+
 #### Help Output  
-From `cross-blogger publish --help` (run `cross-blogger --help` for the root help output):  
+From `cross-blogger publish --help`:    
 ```text
 Publish to a destination from a source. 
-        Specify the source with the first positional argument. 
+        Specify the source with the first positional argument.
         The second positional argument is the specifier, such as a Blogger post URL or a file path.
         All arguments after the first are treated as destinations.
-        Destinations should be the name of the destinations specified in the config file
+        Destinations should be the name of the destinations specified in the config file 
 
 Usage:
   cross-blogger publish [flags]
@@ -39,11 +56,42 @@ Flags:
       --google-client-secret string   Google OAuth client secret
       --google-refresh-token string   Google OAuth refresh token
   -h, --help                          help for publish
+      --llm-api-key string            OpenAI API key
+      --llm-base-url string           Base URL
+      --llm-model string              LLM model to use for OpenAI-compatible platforms   
+      --llm-provider string           LLM platform ("openai" or "ollama")
 
 Global Flags:
       --config string             config file path (default "config.toml")
-      --credentials-file string   credentials file path (default "credentials.yaml")
+      --credentials-file string   credentials file path (default "credentials.yaml")     
       --log-level string          Set the log level
 
-Use "cross-blogger publish [command] --help" for more information about a command.
+Use "cross-blogger publish [command] --help" for more information about a command. 
 ```  
+From `cross-blogger publish watch --help`:  
+```text
+Act as a headless CMS of sorts by watching a source for new content and publishing it to configured destinations.
+        Specify the source with the first positional argument.
+        The second positional argument and on are treated as destination names.
+        Ensure that these are configured in the config file.
+
+Usage:
+  cross-blogger publish watch [flags]
+
+Flags:
+  -h, --help              help for watch
+  -i, --interval string   Interval to check for new content (default "30s")
+
+Global Flags:
+      --config string                 config file path (default "config.toml")
+      --credentials-file string       credentials file path (default "credentials.yaml") 
+      --dry-run                       Dry run - don't actually push the data
+      --google-client-id string       Google OAuth client ID
+      --google-client-secret string   Google OAuth client secret
+      --google-refresh-token string   Google OAuth refresh token
+      --llm-api-key string            OpenAI API key
+      --llm-base-url string           Base URL
+      --llm-model string              LLM model to use for OpenAI-compatible platforms   
+      --llm-provider string           LLM platform ("openai" or "ollama")
+      --log-level string              Set the log level
+```
diff --git a/cmd/publish.go b/cmd/publish.go
@@ -21,23 +21,6 @@ var publishCmd = &cobra.Command{
 	// Arg 2: Specifier
 	// Arg 3+: Destinations
 	Args: cobra.MinimumNArgs(3),
-	// PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
-	// 	var err error
-	// 	var redisOptions *redis.Options
-	// 	if redisOptions, err = internal.InitializeRedisOptions(internal.CredentialViper.GetStringMap("db")); err != nil {
-	// 		return err
-	// 	}
-	// 	// https://github.com/spf13/viper?tab=readme-ov-file#accessing-nested-keys
-	// 	if !internal.CredentialViper.GetBool("db.enable") {
-	// 		log.Debug("DB is disabled")
-	// 		return nil
-	// 	}
-	// 	if err := internal.InitializeDb("redis", redisOptions); err != nil {
-	// 		return err
-	// 	}
-
-	// 	return nil
-	// },
 	Run: func(cmd *cobra.Command, args []string) {
 		destinations := internal.ConfigViper.Get("destinations")
 		sources := internal.ConfigViper.Get("sources")
@@ -190,6 +173,9 @@ func pushToDestinations(postData platforms.PostData, destinationSlice []platform
 		var options platforms.PushPullOptions
 		switch destination.GetType() {
 		case "markdown":
+			// No runtime options for Markdown
+			// Filepath is generated from by turning the title into a URL-friendly slug
+			// The content directory is part of the Markdown struct
 			options = platforms.PushPullOptions{}
 
 		case "blogger":

diff --git a/cmd/root.go b/cmd/root.go
@@ -16,7 +16,6 @@ var RootCmd = &cobra.Command{
 	Short: "A utility to cross-publish content between different platforms",
 	Long: `cross-blogger is a utility to cross-publish content between different platforms.
 	By default, the files for storing credentials and configuration are separate.`,
-
 	// Uncomment the following line if your bare application
 	// has an action associated with it:
 	// Run: func(cmd *cobra.Command, args []string) { },

diff --git a/cmd/watch.go b/cmd/watch.go
@@ -79,23 +79,21 @@ var watchCmd = &cobra.Command{
 		// This will send new posts to postChan and errors to errChan
 		wg.Add(1)
 		go watcher.Watch(&wg, internal.ConfigViper.GetDuration("interval"), options, postChan, errChan)
-		// Assert that the source is Blogger
-		blogger, ok := source.(*platforms.Blogger)
 		if ok {
 			for _, dest := range destinationSlice {
 				if markdownDest, ok := dest.(*platforms.Markdown); ok {
 					// Check if overwriting is enabled
 					if markdownDest.Overwrite {
 						wg.Add(1)
-						go blogger.CleanMarkdownPosts(&wg, internal.ConfigViper.GetDuration("interval"), markdownDest, options, errChan)
+						go watcher.CleanMarkdownPosts(&wg, internal.ConfigViper.GetDuration("interval"), markdownDest, options, errChan)
 					} else {
 						log.Debug("Overwriting is disabled; not cleaning up posts", "destination", dest.GetName())
 					}
 				} else {
 					log.Debug("Destination is not Markdown; not cleaning up posts", "destination", dest.GetName())
 				}
 			}
-			
+
 		}
 		wg.Add(1)
 		go func() {

diff --git a/config.example.toml b/config.example.toml
@@ -1,3 +1,4 @@
+# interval is how often the watch subcommand should check for new posts. It should be represented as a string that can be parsed by Go's time.ParseDuration function.
 # type is a required field that specifies the type of the source or destination.
 # name is the name of the source or destination. It is ud to refer to the source or destination when running the command.
 # overwrite is a boolean field that specifies whether to overwrite the file/post if it already exists. This is done by removing old files/posts that have the same title.
@@ -6,34 +7,52 @@
 # git_dir, if set, will be used to push files to a Git repository. Normally, if you're running something like Hugo, this would be the root directory of your Hugo site (the top-level directory that contains .git)
 # frontmatter_mapping is a table that can be used to customize the frontmatter (metadata). This is useful if your Hugo theme uses different frontmatter keys or if it's a frontmatter key that's not "officially" supported by Hugo and it's up to the theme to decide what key to use. I use Hugo as an example but in reality, this option could probably be used to make this compatible with any static site generator that uses frontmatter.
 # generate_llm_descriptions is used to utilize Large Language Models to generate descriptions for Blogger posts as they don't have a description field accessible via the API.
+# category_prefix dictates the prefix that Blogger labels should have to be turned into Hugo categories. For example, if you have a label called "category::foo" and the category_prefix is "category::", then the categories will be ["foo"]. Any labels that don't have the prefix will be turned into tags.
 [[destinations]]
-name = 'markdown'
+interval = '30s'
+log_level = 'info'
+
+[[destinations]]
+blog_url = 'https://example.com'
+name = 'blog'
+overwrite = false
+type = 'blogger'
+
+[[destinations]]
+content_dir = '/hugo-site/content/blog'
+git_dir = '/hugo-site'
+name = 'otherblog'
+overwrite = false
 type = 'markdown'
-content_dir = 'output_markdown'
-git_dir = 'output_markdown'
-overwrite = true
+
 [destinations.frontmatter_mapping]
-canonical_url = ''
+canonical_url = 'canonicalURL'
+categories = 'categories'
 date = 'date'
 date_updated = 'lastmod'
+description = 'description'
+managed = 'managedByCrossBlogger'
+tags = 'tags'
 title = 'title'
-[[destinations]]
-type = 'blogger'
-name = 'blogger'
-blog_url = 'https://example.com'
-overwrite = true
+
 [[sources]]
 blog_url = 'https://example.com'
-name = 'blogger'
-type = 'blogger'
+category_prefix = 'category::'
 generate_llm_descriptions = true
+name = 'someblog'
+type = 'blogger'
+
 [[sources]]
-name = 'markdown'
+content_dir = 'content'
+name = 'aBlogInMarkdown'
 type = 'markdown'
-content_dir = 'input_markdown'
 
 [sources.frontmatter_mapping]
 canonical_url = 'canonicalURL'
+categories = 'categories'
 date = 'date'
 date_updated = 'lastmod'
-title = 'thisisatitle'
+description = 'description'
+managed = 'managedByCrossBlogger'
+tags = 'tags'
+title = 'title'
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -4,6 +4,19 @@ services:
     build:
       context: .
       dockerfile: Dockerfile
+    restart: unless-stopped
     volumes:
       - ./config:/app/config
-    command: ["--config", "/app/config/config.toml", "--credentials-file", "/app/config/credentials.yaml", "publish", "watch"]
+      # Markdown directory
+      - /path/to/hugo/site:/app/output_markdown
+    command: [
+      "--config", 
+      "/app/config/config.toml", 
+      "--credentials-file", 
+      "/app/config/credentials.yaml",
+      "publish", "watch", 
+      # Source
+      "blogger", 
+      # Destination(s)
+      "markdown"
+      ]