From 0025bf6827336c859266cf297c41aba678d05502 Mon Sep 17 00:00:00 2001
From: Kir Kolyshkin <kolyshkin@gmail.com>
Date: Fri, 2 Jul 2021 12:12:25 -0700
Subject: [PATCH 1/3] libct/user: use []byte more, avoid allocations

Every []byte to string conversion results in a new allocation.
Avoid some by using []byte more.

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
(cherry picked from commit 120e3a77d85c9e3fffacd20afb458c10f5dfe946)
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
---
 libcontainer/user/user.go      | 37 +++++++++++++++++-----------------
 libcontainer/user/user_test.go | 16 +++++++--------
 2 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/libcontainer/user/user.go b/libcontainer/user/user.go
index d2c16f7fd36..3e538c80cdc 100644
--- a/libcontainer/user/user.go
+++ b/libcontainer/user/user.go
@@ -2,6 +2,7 @@ package user
 
 import (
 	"bufio"
+	"bytes"
 	"errors"
 	"fmt"
 	"io"
@@ -55,11 +56,11 @@ type IDMap struct {
 	Count    int64
 }
 
-func parseLine(line string, v ...interface{}) {
-	parseParts(strings.Split(line, ":"), v...)
+func parseLine(line []byte, v ...interface{}) {
+	parseParts(bytes.Split(line, []byte(":")), v...)
 }
 
-func parseParts(parts []string, v ...interface{}) {
+func parseParts(parts [][]byte, v ...interface{}) {
 	if len(parts) == 0 {
 		return
 	}
@@ -75,16 +76,16 @@ func parseParts(parts []string, v ...interface{}) {
 		// This is legit.
 		switch e := v[i].(type) {
 		case *string:
-			*e = p
+			*e = string(p)
 		case *int:
 			// "numbers", with conversion errors ignored because of some misbehaving configuration files.
-			*e, _ = strconv.Atoi(p)
+			*e, _ = strconv.Atoi(string(p))
 		case *int64:
-			*e, _ = strconv.ParseInt(p, 10, 64)
+			*e, _ = strconv.ParseInt(string(p), 10, 64)
 		case *[]string:
 			// Comma-separated lists.
-			if p != "" {
-				*e = strings.Split(p, ",")
+			if len(p) != 0 {
+				*e = strings.Split(string(p), ",")
 			} else {
 				*e = []string{}
 			}
@@ -128,8 +129,8 @@ func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) {
 	)
 
 	for s.Scan() {
-		line := strings.TrimSpace(s.Text())
-		if line == "" {
+		line := bytes.TrimSpace(s.Bytes())
+		if len(line) == 0 {
 			continue
 		}
 
@@ -186,8 +187,8 @@ func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
 	)
 
 	for s.Scan() {
-		text := s.Text()
-		if text == "" {
+		text := s.Bytes()
+		if len(text) == 0 {
 			continue
 		}
 
@@ -278,7 +279,7 @@ func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (
 
 	// Allow for userArg to have either "user" syntax, or optionally "user:group" syntax
 	var userArg, groupArg string
-	parseLine(userSpec, &userArg, &groupArg)
+	parseLine([]byte(userSpec), &userArg, &groupArg)
 
 	// Convert userArg and groupArg to be numeric, so we don't have to execute
 	// Atoi *twice* for each iteration over lines.
@@ -496,8 +497,8 @@ func ParseSubIDFilter(r io.Reader, filter func(SubID) bool) ([]SubID, error) {
 	)
 
 	for s.Scan() {
-		line := strings.TrimSpace(s.Text())
-		if line == "" {
+		line := bytes.TrimSpace(s.Bytes())
+		if len(line) == 0 {
 			continue
 		}
 
@@ -549,14 +550,14 @@ func ParseIDMapFilter(r io.Reader, filter func(IDMap) bool) ([]IDMap, error) {
 	)
 
 	for s.Scan() {
-		line := strings.TrimSpace(s.Text())
-		if line == "" {
+		line := bytes.TrimSpace(s.Bytes())
+		if len(line) == 0 {
 			continue
 		}
 
 		// see: man 7 user_namespaces
 		p := IDMap{}
-		parseParts(strings.Fields(line), &p.ID, &p.ParentID, &p.Count)
+		parseParts(bytes.Fields(line), &p.ID, &p.ParentID, &p.Count)
 
 		if filter == nil || filter(p) {
 			out = append(out, p)
diff --git a/libcontainer/user/user_test.go b/libcontainer/user/user_test.go
index 23bf46672c5..e1501c85bbf 100644
--- a/libcontainer/user/user_test.go
+++ b/libcontainer/user/user_test.go
@@ -16,42 +16,42 @@ func TestUserParseLine(t *testing.T) {
 		d    int
 	)
 
-	parseLine("", &a, &b)
+	parseLine([]byte(""), &a, &b)
 	if a != "" || b != "" {
 		t.Fatalf("a and b should be empty ('%v', '%v')", a, b)
 	}
 
-	parseLine("a", &a, &b)
+	parseLine([]byte("a"), &a, &b)
 	if a != "a" || b != "" {
 		t.Fatalf("a should be 'a' and b should be empty ('%v', '%v')", a, b)
 	}
 
-	parseLine("bad boys:corny cows", &a, &b)
+	parseLine([]byte("bad boys:corny cows"), &a, &b)
 	if a != "bad boys" || b != "corny cows" {
 		t.Fatalf("a should be 'bad boys' and b should be 'corny cows' ('%v', '%v')", a, b)
 	}
 
-	parseLine("", &c)
+	parseLine([]byte(""), &c)
 	if len(c) != 0 {
 		t.Fatalf("c should be empty (%#v)", c)
 	}
 
-	parseLine("d,e,f:g:h:i,j,k", &c, &a, &b, &c)
+	parseLine([]byte("d,e,f:g:h:i,j,k"), &c, &a, &b, &c)
 	if a != "g" || b != "h" || len(c) != 3 || c[0] != "i" || c[1] != "j" || c[2] != "k" {
 		t.Fatalf("a should be 'g', b should be 'h', and c should be ['i','j','k'] ('%v', '%v', '%#v')", a, b, c)
 	}
 
-	parseLine("::::::::::", &a, &b, &c)
+	parseLine([]byte("::::::::::"), &a, &b, &c)
 	if a != "" || b != "" || len(c) != 0 {
 		t.Fatalf("a, b, and c should all be empty ('%v', '%v', '%#v')", a, b, c)
 	}
 
-	parseLine("not a number", &d)
+	parseLine([]byte("not a number"), &d)
 	if d != 0 {
 		t.Fatalf("d should be 0 (%v)", d)
 	}
 
-	parseLine("b:12:c", &a, &d, &b)
+	parseLine([]byte("b:12:c"), &a, &d, &b)
 	if a != "b" || b != "c" || d != 12 {
 		t.Fatalf("a should be 'b' and b should be 'c', and d should be 12 ('%v', '%v', %v)", a, b, d)
 	}

From 5fd7b3b7b5bdb11d1d3f1684041769fb4fca625b Mon Sep 17 00:00:00 2001
From: Kir Kolyshkin <kolyshkin@gmail.com>
Date: Fri, 2 Jul 2021 12:15:44 -0700
Subject: [PATCH 2/3] libct/user: ParseGroupFilter: use TrimSpace

Same as in other places (other parsers here, as well as golang os/user
parser and glibc parser all tolerate extra space at BOL and EOL).

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
(cherry picked from commit 226dfab0bc9f42bbda30bc998eeb32a973fd548d)
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
---
 libcontainer/user/user.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcontainer/user/user.go b/libcontainer/user/user.go
index 3e538c80cdc..a0c8b5d73fe 100644
--- a/libcontainer/user/user.go
+++ b/libcontainer/user/user.go
@@ -187,7 +187,7 @@ func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
 	)
 
 	for s.Scan() {
-		text := s.Bytes()
+		text := bytes.TrimSpace(s.Bytes())
 		if len(text) == 0 {
 			continue
 		}

From 0a5d8ba464290488dc20bef1b09173f95d0264fd Mon Sep 17 00:00:00 2001
From: Kir Kolyshkin <kolyshkin@gmail.com>
Date: Fri, 2 Jul 2021 12:52:23 -0700
Subject: [PATCH 3/3] libct/user: fix parsing long /etc/group lines

Lines in /etc/group longer than 64 characters breaks the current
implementation of group parser. This is caused by bufio.Scanner
buffer limit.

Fix by re-using the fix for a similar problem in golang os/user,
namely https://go-review.googlesource.com/c/go/+/283601.

Add some tests.

Co-authored-by: Andrey Bokhanko <andreybokhanko@gmail.com>
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
(cherry picked from commit 24d5daf54d0b009e4d1a3c72ee72e308e148471b)
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
---
 libcontainer/user/user.go      | 59 ++++++++++++++++++++++++++--------
 libcontainer/user/user_test.go | 44 ++++++++++++++++++++-----
 2 files changed, 82 insertions(+), 21 deletions(-)

diff --git a/libcontainer/user/user.go b/libcontainer/user/user.go
index a0c8b5d73fe..cc7a106be5d 100644
--- a/libcontainer/user/user.go
+++ b/libcontainer/user/user.go
@@ -180,15 +180,53 @@ func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
 	if r == nil {
 		return nil, fmt.Errorf("nil source for group-formatted data")
 	}
+	rd := bufio.NewReader(r)
+	out := []Group{}
+
+	// Read the file line-by-line.
+	for {
+		var (
+			isPrefix  bool
+			wholeLine []byte
+			err       error
+		)
+
+		// Read the next line. We do so in chunks (as much as reader's
+		// buffer is able to keep), check if we read enough columns
+		// already on each step and store final result in wholeLine.
+		for {
+			var line []byte
+			line, isPrefix, err = rd.ReadLine()
 
-	var (
-		s   = bufio.NewScanner(r)
-		out = []Group{}
-	)
+			if err != nil {
+				// We should return no error if EOF is reached
+				// without a match.
+				if err == io.EOF { //nolint:errorlint // comparison with io.EOF is legit, https://github.com/polyfloyd/go-errorlint/pull/12
+					err = nil
+				}
+				return out, err
+			}
 
-	for s.Scan() {
-		text := bytes.TrimSpace(s.Bytes())
-		if len(text) == 0 {
+			// Simple common case: line is short enough to fit in a
+			// single reader's buffer.
+			if !isPrefix && len(wholeLine) == 0 {
+				wholeLine = line
+				break
+			}
+
+			wholeLine = append(wholeLine, line...)
+
+			// Check if we read the whole line already.
+			if !isPrefix {
+				break
+			}
+		}
+
+		// There's no spec for /etc/passwd or /etc/group, but we try to follow
+		// the same rules as the glibc parser, which allows comments and blank
+		// space at the beginning of a line.
+		wholeLine = bytes.TrimSpace(wholeLine)
+		if len(wholeLine) == 0 || wholeLine[0] == '#' {
 			continue
 		}
 
@@ -198,17 +236,12 @@ func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
 		//  root:x:0:root
 		//  adm:x:4:root,adm,daemon
 		p := Group{}
-		parseLine(text, &p.Name, &p.Pass, &p.Gid, &p.List)
+		parseLine(wholeLine, &p.Name, &p.Pass, &p.Gid, &p.List)
 
 		if filter == nil || filter(p) {
 			out = append(out, p)
 		}
 	}
-	if err := s.Err(); err != nil {
-		return nil, err
-	}
-
-	return out, nil
 }
 
 type ExecUser struct {
diff --git a/libcontainer/user/user_test.go b/libcontainer/user/user_test.go
index e1501c85bbf..c0c762d6a89 100644
--- a/libcontainer/user/user_test.go
+++ b/libcontainer/user/user_test.go
@@ -1,6 +1,7 @@
 package user
 
 import (
+	"fmt"
 	"io"
 	"reflect"
 	"sort"
@@ -82,12 +83,12 @@ func TestUserParseGroup(t *testing.T) {
 root:x:0:root
 adm:x:4:root,adm,daemon
 this is just some garbage data
-`), nil)
+`+largeGroup()), nil)
 	if err != nil {
 		t.Fatalf("Unexpected error: %v", err)
 	}
-	if len(groups) != 3 {
-		t.Fatalf("Expected 3 groups, got %v", len(groups))
+	if len(groups) != 4 {
+		t.Fatalf("Expected 4 groups, got %v", len(groups))
 	}
 	if groups[0].Gid != 0 || groups[0].Name != "root" || len(groups[0].List) != 1 {
 		t.Fatalf("Expected groups[0] to be 0 - root - 1 member, got %v - %v - %v", groups[0].Gid, groups[0].Name, len(groups[0].List))
@@ -103,16 +104,18 @@ root:x:0:0:root user:/root:/bin/bash
 adm:x:42:43:adm:/var/adm:/bin/false
 111:x:222:333::/var/garbage
 odd:x:111:112::/home/odd:::::
+user7456:x:7456:100:Vasya:/home/user7456
 this is just some garbage data
 `
-	const groupContent = `
+	groupContent := `
 root:x:0:root
 adm:x:43:
-grp:x:1234:root,adm
+grp:x:1234:root,adm,user7456
 444:x:555:111
 odd:x:444:
 this is just some garbage data
-`
+` + largeGroup()
+
 	defaultExecUser := ExecUser{
 		Uid:   8888,
 		Gid:   8888,
@@ -216,6 +219,16 @@ this is just some garbage data
 				Home:  "/home/odd",
 			},
 		},
+		// Test for #3036.
+		{
+			ref: "7456",
+			expected: ExecUser{
+				Uid:   7456,
+				Gid:   100,
+				Sgids: []int{1234, 1000}, // 1000 is largegroup GID
+				Home:  "/home/user7456",
+			},
+		},
 	}
 
 	for _, test := range tests {
@@ -388,13 +401,13 @@ func TestGetAdditionalGroups(t *testing.T) {
 		hasError bool
 	}
 
-	const groupContent = `
+	groupContent := `
 root:x:0:root
 adm:x:43:
 grp:x:1234:root,adm
 adm:x:4343:root,adm-duplicate
 this is just some garbage data
-`
+` + largeGroup()
 	tests := []foo{
 		{
 			// empty group
@@ -444,6 +457,11 @@ this is just some garbage data
 			expected: nil,
 			hasError: true,
 		},
+		{
+			// group with very long list of users
+			groups:   []string{"largegroup"},
+			expected: []int{1000},
+		},
 	}
 
 	for _, test := range tests {
@@ -500,3 +518,13 @@ func TestGetAdditionalGroupsNumeric(t *testing.T) {
 		}
 	}
 }
+
+// Generate a proper "largegroup" entry for group tests.
+func largeGroup() (res string) {
+	var b strings.Builder
+	b.WriteString("largegroup:x:1000:user1")
+	for i := 2; i <= 7500; i++ {
+		fmt.Fprintf(&b, ",user%d", i)
+	}
+	return b.String()
+}