diff --git a/README.md b/README.md
index 60ee2e6..f92514d 100644
--- a/README.md
+++ b/README.md
@@ -116,3 +116,16 @@ You can play with the `xq` utility using the Dockerized environment:
docker-compose run --rm xq
xq /opt/examples/xml/unformatted.xml
```
+
+Output the result as JSON:
+
+```
+cat test/data/xml/unformatted.xml | xq -j
+```
+
+This will output the result in JSON format, preserving the XML structure. The JSON output will be an object where:
+- XML elements become object keys
+- Attributes are prefixed with "@"
+- Text content is stored under "#text" if the element has attributes or child elements
+- Repeated elements are automatically converted to arrays
+- Elements with only text content are represented as strings
diff --git a/cmd/root.go b/cmd/root.go
index 65bc3aa..3ac373e 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -2,15 +2,18 @@ package cmd
import (
"bytes"
+ "encoding/json"
"errors"
"fmt"
- "github.com/sibprogrammer/xq/internal/utils"
- "github.com/spf13/cobra"
- "github.com/spf13/pflag"
"io"
"os"
"path"
"strings"
+
+ "github.com/antchfx/xmlquery"
+ "github.com/sibprogrammer/xq/internal/utils"
+ "github.com/spf13/cobra"
+ "github.com/spf13/pflag"
)
// Version information
@@ -41,6 +44,7 @@ func NewRootCmd() *cobra.Command {
reader = os.Stdin
} else {
+ var err error
if reader, err = os.Open(args[len(args)-1]); err != nil {
return err
}
@@ -61,14 +65,16 @@ func NewRootCmd() *cobra.Command {
if cssAttr != "" && cssQuery == "" {
return errors.New("query option (-q) is missed for attribute selection")
}
+ jsonOutputMode, _ := cmd.Flags().GetBool("json")
pr, pw := io.Pipe()
+ errChan := make(chan error, 1)
go func() {
- defer func() {
- _ = pw.Close()
- }()
+ defer close(errChan)
+ defer pw.Close()
+ var err error
if xPathQuery != "" {
err = utils.XPathQuery(reader, pw, xPathQuery, singleNode, options)
} else if cssQuery != "" {
@@ -76,26 +82,30 @@ func NewRootCmd() *cobra.Command {
} else {
var contentType utils.ContentType
contentType, reader = detectFormat(cmd.Flags(), reader)
-
- switch contentType {
- case utils.ContentHtml:
- err = utils.FormatHtml(reader, pw, indent, colors)
- case utils.ContentXml:
- err = utils.FormatXml(reader, pw, indent, colors)
- case utils.ContentJson:
- err = utils.FormatJson(reader, pw, indent, colors)
- default:
- err = fmt.Errorf("unknown content type: %v", contentType)
+ if jsonOutputMode {
+ err = processAsJSON(cmd.Flags(), reader, pw, contentType)
+ } else {
+ switch contentType {
+ case utils.ContentHtml:
+ err = utils.FormatHtml(reader, pw, indent, colors)
+ case utils.ContentXml:
+ err = utils.FormatXml(reader, pw, indent, colors)
+ case utils.ContentJson:
+ err = utils.FormatJson(reader, pw, indent, colors)
+ default:
+ err = fmt.Errorf("unknown content type: %v", contentType)
+ }
}
}
- if err != nil {
- fmt.Println("Error:", err)
- os.Exit(1)
- }
+ errChan <- err
}()
- return utils.PagerPrint(pr, cmd.OutOrStdout())
+ if err := utils.PagerPrint(pr, cmd.OutOrStdout()); err != nil {
+ return err
+ }
+
+ return <-errChan
},
}
}
@@ -127,6 +137,9 @@ func InitFlags(cmd *cobra.Command) {
"Extract an attribute value instead of node content for provided CSS query")
cmd.PersistentFlags().BoolP("node", "n", utils.GetConfig().Node,
"Return the node content instead of text")
+ cmd.PersistentFlags().BoolP("json", "j", false, "Output the result as JSON")
+ cmd.PersistentFlags().Bool("compact", false, "Compact JSON output (no indentation)")
+ cmd.PersistentFlags().IntP("depth", "d", -1, "Maximum nesting depth for JSON output (-1 for unlimited)")
}
func Execute() {
@@ -193,7 +206,7 @@ func detectFormat(flags *pflag.FlagSet, origReader io.Reader) (utils.ContentType
return utils.ContentHtml, origReader
}
- buf := make([]byte, 10)
+ buf := make([]byte, 20)
length, err := origReader.Read(buf)
if err != nil {
return utils.ContentText, origReader
@@ -211,3 +224,54 @@ func detectFormat(flags *pflag.FlagSet, origReader io.Reader) (utils.ContentType
return utils.ContentXml, reader
}
+
+func processAsJSON(flags *pflag.FlagSet, reader io.Reader, w io.Writer, contentType utils.ContentType) error {
+ var (
+ jsonCompact bool
+ jsonDepth int
+ result interface{}
+ )
+ jsonCompact, _ = flags.GetBool("compact")
+ if flags.Changed("depth") {
+ jsonDepth, _ = flags.GetInt("depth")
+ } else {
+ jsonDepth = -1
+ }
+
+ switch contentType {
+ case utils.ContentXml, utils.ContentHtml:
+ doc, err := xmlquery.Parse(reader)
+ if err != nil {
+ return fmt.Errorf("error while parsing XML: %w", err)
+ }
+ result = utils.NodeToJSON(doc, jsonDepth)
+ case utils.ContentJson:
+ decoder := json.NewDecoder(reader)
+ if err := decoder.Decode(&result); err != nil {
+ return fmt.Errorf("error while parsing JSON: %w", err)
+ }
+ default:
+ // Treat as plain text
+ content, err := io.ReadAll(reader)
+ if err != nil {
+ return fmt.Errorf("error while reading content: %w", err)
+ }
+ result = map[string]interface{}{
+ "text": string(content),
+ }
+ }
+
+ var encoder *json.Encoder
+ if jsonCompact {
+ encoder = json.NewEncoder(w)
+ } else {
+ encoder = json.NewEncoder(w)
+ encoder.SetIndent("", " ")
+ }
+
+ if err := encoder.Encode(result); err != nil {
+ return fmt.Errorf("error while encoding JSON: %v", err)
+ }
+
+ return nil
+}
diff --git a/cmd/root_test.go b/cmd/root_test.go
index a22fd48..ac0f31e 100644
--- a/cmd/root_test.go
+++ b/cmd/root_test.go
@@ -2,11 +2,16 @@ package cmd
import (
"bytes"
- "github.com/spf13/cobra"
- "github.com/stretchr/testify/assert"
+ "encoding/json"
+ "fmt"
"path"
"strings"
"testing"
+
+ "github.com/sibprogrammer/xq/internal/utils"
+ "github.com/spf13/cobra"
+ "github.com/spf13/pflag"
+ "github.com/stretchr/testify/assert"
)
func execute(cmd *cobra.Command, args ...string) (string, error) {
@@ -87,3 +92,101 @@ func TestRootCmd(t *testing.T) {
_, err = execute(command, "--indent", "incorrect", xmlFilePath)
assert.ErrorContains(t, err, "invalid argument")
}
+
+func TestProcessAsJSON(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ contentType utils.ContentType
+ flags map[string]interface{}
+ expected map[string]interface{}
+ wantErr bool
+ }{
+ {
+ name: "Simple XML",
+ input: "value",
+ contentType: utils.ContentXml,
+ expected: map[string]interface{}{
+ "root": map[string]interface{}{
+ "child": "value",
+ },
+ },
+ },
+ {name: "Simple JSON",
+ input: `{"root": {"child": "value"}}`,
+ contentType: utils.ContentJson,
+ expected: map[string]interface{}{
+ "root": map[string]interface{}{
+ "child": "value",
+ },
+ },
+ },
+ {
+ name: "Simple HTML",
+ input: "
text
",
+ contentType: utils.ContentHtml,
+ expected: map[string]interface{}{
+ "html": map[string]interface{}{
+ "body": map[string]interface{}{
+ "p": "text",
+ },
+ },
+ },
+ },
+ {
+ name: "Plain text",
+ input: "text",
+ contentType: utils.ContentText,
+ expected: map[string]interface{}{
+ "text": "text",
+ },
+ },
+ {
+ name: "invalid input",
+ input: "thinking>\nI'll analyze each command and its output:\n",
+ wantErr: true,
+ },
+ {
+ name: "combined",
+ expected: map[string]interface{}{
+ "#text": "Thank you\nBye.",
+ "thinking": "1. woop",
+ },
+ input: `Thank you
+
+1. woop
+
+
+Bye.`,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // Set up flags
+ flags := pflag.NewFlagSet("test", pflag.ContinueOnError)
+ flags.Bool("compact", false, "")
+ flags.Int("depth", -1, "")
+ for name, v := range tt.flags {
+ _ = flags.Set(name, fmt.Sprint(v))
+ }
+
+ reader := strings.NewReader(tt.input)
+ var output bytes.Buffer
+
+ err := processAsJSON(flags, reader, &output, tt.contentType)
+
+ if tt.wantErr {
+ assert.Error(t, err)
+ } else {
+ assert.NoError(t, err)
+
+ var resultMap map[string]interface{}
+ err = json.Unmarshal(output.Bytes(), &resultMap)
+ assert.NoError(t, err)
+
+ assert.Equal(t, tt.expected, resultMap)
+ }
+ })
+ }
+}
diff --git a/go.mod b/go.mod
index d825608..497a5a6 100644
--- a/go.mod
+++ b/go.mod
@@ -7,6 +7,7 @@ require (
github.com/antchfx/xmlquery v1.4.2
github.com/antchfx/xpath v1.3.2
github.com/fatih/color v1.18.0
+ github.com/google/go-cmp v0.6.0
github.com/spf13/cobra v1.8.1
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.9.0
diff --git a/go.sum b/go.sum
index 7d5cd88..9d041b0 100644
--- a/go.sum
+++ b/go.sum
@@ -14,6 +14,8 @@ github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM=
github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
diff --git a/internal/utils/contenttype_string.go b/internal/utils/contenttype_string.go
new file mode 100644
index 0000000..a1d3f07
--- /dev/null
+++ b/internal/utils/contenttype_string.go
@@ -0,0 +1,26 @@
+// Code generated by "stringer -type ContentType"; DO NOT EDIT.
+
+package utils
+
+import "strconv"
+
+func _() {
+ // An "invalid array index" compiler error signifies that the constant values have changed.
+ // Re-run the stringer command to generate them again.
+ var x [1]struct{}
+ _ = x[ContentXml-0]
+ _ = x[ContentHtml-1]
+ _ = x[ContentJson-2]
+ _ = x[ContentText-3]
+}
+
+const _ContentType_name = "ContentXmlContentHtmlContentJsonContentText"
+
+var _ContentType_index = [...]uint8{0, 10, 21, 32, 43}
+
+func (i ContentType) String() string {
+ if i < 0 || i >= ContentType(len(_ContentType_index)-1) {
+ return "ContentType(" + strconv.FormatInt(int64(i), 10) + ")"
+ }
+ return _ContentType_name[_ContentType_index[i]:_ContentType_index[i+1]]
+}
diff --git a/internal/utils/gen.go b/internal/utils/gen.go
new file mode 100644
index 0000000..28040f6
--- /dev/null
+++ b/internal/utils/gen.go
@@ -0,0 +1,2 @@
+//go:generate go run golang.org/x/tools/cmd/stringer@latest -type=ContentType
+package utils
diff --git a/internal/utils/jsonutil.go b/internal/utils/jsonutil.go
new file mode 100644
index 0000000..cbbd868
--- /dev/null
+++ b/internal/utils/jsonutil.go
@@ -0,0 +1,125 @@
+package utils
+
+import (
+ "strings"
+
+ "github.com/antchfx/xmlquery"
+)
+
+// NodeToJSON converts an xmlquery.Node to a JSON object. The depth parameter
+// specifies how many levels of children to include in the result. A depth of 0 means
+// only the text content of the node is included. A depth of 1 means the node's children
+// are included, but not their children, and so on.
+func NodeToJSON(node *xmlquery.Node, depth int) interface{} {
+ if node == nil {
+ return nil
+ }
+
+ switch node.Type {
+ case xmlquery.DocumentNode:
+ result := make(map[string]interface{})
+ var textParts []string
+
+ // Process the next sibling of the document node first (if any)
+ if node.NextSibling != nil && node.NextSibling.Type == xmlquery.TextNode {
+ text := strings.TrimSpace(node.NextSibling.Data)
+ if text != "" {
+ textParts = append(textParts, text)
+ }
+ }
+
+ // Process all children, including siblings of the first child
+ for child := node.FirstChild; child != nil; child = child.NextSibling {
+ switch child.Type {
+ case xmlquery.ElementNode:
+ childResult := nodeToJSONInternal(child, depth)
+ result[child.Data] = childResult
+ case xmlquery.TextNode:
+ text := strings.TrimSpace(child.Data)
+ if text != "" {
+ textParts = append(textParts, text)
+ }
+ }
+ }
+
+ if len(textParts) > 0 {
+ result["#text"] = strings.Join(textParts, "\n")
+ }
+ return result
+
+ case xmlquery.ElementNode:
+ return nodeToJSONInternal(node, depth)
+
+ case xmlquery.TextNode:
+ return strings.TrimSpace(node.Data)
+
+ default:
+ return nil
+ }
+}
+
+func nodeToJSONInternal(node *xmlquery.Node, depth int) interface{} {
+ if depth == 0 {
+ return getTextContent(node)
+ }
+
+ result := make(map[string]interface{})
+ for _, attr := range node.Attr {
+ result["@"+attr.Name.Local] = attr.Value
+ }
+
+ var textParts []string
+ for child := node.FirstChild; child != nil; child = child.NextSibling {
+ switch child.Type {
+ case xmlquery.TextNode:
+ text := strings.TrimSpace(child.Data)
+ if text != "" {
+ textParts = append(textParts, text)
+ }
+ case xmlquery.ElementNode:
+ childResult := nodeToJSONInternal(child, depth-1)
+ addToResult(result, child.Data, childResult)
+ }
+ }
+
+ if len(textParts) > 0 {
+ if len(result) == 0 {
+ return strings.Join(textParts, "\n")
+ }
+ result["#text"] = strings.Join(textParts, "\n")
+ }
+
+ return result
+}
+
+func getTextContent(node *xmlquery.Node) string {
+ var parts []string
+ for child := node.FirstChild; child != nil; child = child.NextSibling {
+ switch child.Type {
+ case xmlquery.TextNode:
+ text := strings.TrimSpace(child.Data)
+ if text != "" {
+ parts = append(parts, text)
+ }
+ case xmlquery.ElementNode:
+ parts = append(parts, getTextContent(child))
+ }
+ }
+ return strings.Join(parts, "\n")
+}
+
+func addToResult(result map[string]interface{}, key string, value interface{}) {
+ if key == "" {
+ return
+ }
+ if existing, ok := result[key]; ok {
+ switch existing := existing.(type) {
+ case []interface{}:
+ result[key] = append(existing, value)
+ default:
+ result[key] = []interface{}{existing, value}
+ }
+ } else {
+ result[key] = value
+ }
+}
diff --git a/internal/utils/jsonutil_test.go b/internal/utils/jsonutil_test.go
new file mode 100644
index 0000000..d627ade
--- /dev/null
+++ b/internal/utils/jsonutil_test.go
@@ -0,0 +1,96 @@
+package utils
+
+import (
+ "encoding/json"
+ "strings"
+ "testing"
+
+ "github.com/antchfx/xmlquery"
+ "github.com/google/go-cmp/cmp"
+)
+
+func TestNodeToJSON(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ depth int
+ expected string
+ }{
+ {
+ name: "Simple XML",
+ input: "value",
+ depth: -1,
+ expected: `{"root":{"child":"value"}}`,
+ },
+ {
+ name: "XML with attributes",
+ input: "text",
+ depth: -1,
+ expected: `{"root":{"@attr":"value","child":"text"}}`,
+ },
+ {
+ name: "XML with mixed content",
+ input: "\n text value\n more text\n",
+ depth: -1,
+ expected: `{"root":{"#text":"text\nmore text","child":"value"}}`,
+ },
+ {
+ name: "Depth limited XML",
+ input: "valuetext",
+ depth: 2,
+ expected: `{"root":{"child1":{"grandchild":"value"},"child2":"text"}}`,
+ },
+ {
+ name: "Depth 1 XML",
+ input: "valuetext",
+ depth: 1,
+ expected: `{"root":{"child1":"value","child2":"text"}}`,
+ },
+ {
+ name: "Depth 0 XML",
+ input: "valuetext",
+ depth: 0,
+ expected: `{"root":"value\ntext"}`,
+ },
+ {
+ name: "mixed text and xml",
+ input: `Thank you
+
+1. woop
+
+
+Bye`,
+ expected: `{"#text":"Thank you\nBye","thinking":"1. woop"}`,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ doc, err := xmlquery.Parse(strings.NewReader(tt.input))
+ if err != nil {
+ t.Fatalf("Failed to parse XML: %v", err)
+ }
+
+ result := NodeToJSON(doc, tt.depth)
+ resultJSON, err := json.Marshal(result)
+ if err != nil {
+ t.Fatalf("Failed to marshal result to JSON: %v", err)
+ }
+
+ var resultMap, expectedMap map[string]interface{}
+ err = json.Unmarshal(resultJSON, &resultMap)
+ if err != nil {
+ t.Fatalf("Failed to unmarshal result JSON: %v", err)
+ }
+ err = json.Unmarshal([]byte(tt.expected), &expectedMap)
+ if err != nil {
+ t.Fatalf("Failed to unmarshal expected JSON: %v", err)
+ }
+
+ t.Log(string(resultJSON))
+ if diff := cmp.Diff(expectedMap, resultMap); diff != "" {
+ t.Errorf("NodeToJSON mismatch (-want +got):\n%s", diff)
+ }
+ })
+ }
+}