From f858ec383b3d17f146fb765e5b7385895da5bdd1 Mon Sep 17 00:00:00 2001 From: mntn <85877297+mntn-xyz@users.noreply.github.com> Date: Sat, 25 Dec 2021 11:51:26 -0500 Subject: [PATCH] Allow non-latin characters in paths This makes gmnhg use [\pL\d\-_] to match paths instead of \w, allowing letter characters from any Unicode language in file paths. Fixes #48. --- cmd/gmnhg/main.go | 6 +++--- internal/gmnhg/post.go | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cmd/gmnhg/main.go b/cmd/gmnhg/main.go index e274c1c..31c0e47 100644 --- a/cmd/gmnhg/main.go +++ b/cmd/gmnhg/main.go @@ -149,9 +149,9 @@ const ( ) var ( - tmplNameRegex = regexp.MustCompile("^" + templateBase + `([\w-_ /]+)\.gotmpl$`) - leafIndexRegex = regexp.MustCompile("^" + contentBase + `([\w-_ /]+)/index\.[\w]+$`) - pagePathRegex = regexp.MustCompile("^" + contentBase + `([\w-_ /]+)/([\w-_ ]+)\.md$`) + tmplNameRegex = regexp.MustCompile("^" + templateBase + `([\pL\d\-_ /]+)\.gotmpl$`) + leafIndexRegex = regexp.MustCompile("^" + contentBase + `([\pL\d\-_ /]+)/index\.[\pL\d_]+$`) + pagePathRegex = regexp.MustCompile("^" + contentBase + `([\pL\d\-_ /]+)/([\pL\d\-_ ]+)\.md$`) ) var hugoConfigFiles = []string{"config.toml", "config.yaml", "config.json"} diff --git a/internal/gmnhg/post.go b/internal/gmnhg/post.go index 4747574..4408673 100644 --- a/internal/gmnhg/post.go +++ b/internal/gmnhg/post.go @@ -62,7 +62,7 @@ var ( yamlDelimiter = []byte("---\n") tomlDelimiter = []byte("+++\n") jsonObjectRegex = regexp.MustCompile(`\A(\{[\s\S]*\})\n\n`) - orgModeRegex = regexp.MustCompile(`\A((?:#\+\w+\[?\]?: ?[^\n\r]*\n)+)`) + orgModeRegex = regexp.MustCompile(`\A((?:#\+[\pL\d_]+\[?\]?: ?[^\n\r]*\n)+)`) ) // ParseMetadata extracts TOML/JSON/YAML/org-mode format front matter