forked from opencontainers/runc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
notify_socket.go
223 lines (191 loc) · 5.38 KB
/
notify_socket.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
package main
import (
"bytes"
"errors"
"io"
"net"
"os"
"path"
"path/filepath"
"strconv"
"time"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
"github.com/urfave/cli"
"golang.org/x/sys/unix"
)
type notifySocket struct {
socket *net.UnixConn
host string
socketPath string
}
func newNotifySocket(context *cli.Context, notifySocketHost string, id string) *notifySocket {
if notifySocketHost == "" {
return nil
}
root := filepath.Join(context.GlobalString("root"), id)
socketPath := filepath.Join(root, "notify", "notify.sock")
notifySocket := ¬ifySocket{
socket: nil,
host: notifySocketHost,
socketPath: socketPath,
}
return notifySocket
}
func (s *notifySocket) Close() error {
return s.socket.Close()
}
// If systemd is supporting sd_notify protocol, this function will add support
// for sd_notify protocol from within the container.
func (s *notifySocket) setupSpec(spec *specs.Spec) {
pathInContainer := filepath.Join("/run/notify", path.Base(s.socketPath))
mount := specs.Mount{
Destination: path.Dir(pathInContainer),
Source: path.Dir(s.socketPath),
Options: []string{"bind", "nosuid", "noexec", "nodev", "ro"},
}
spec.Mounts = append(spec.Mounts, mount)
spec.Process.Env = append(spec.Process.Env, "NOTIFY_SOCKET="+pathInContainer)
}
func (s *notifySocket) bindSocket() error {
addr := net.UnixAddr{
Name: s.socketPath,
Net: "unixgram",
}
socket, err := net.ListenUnixgram("unixgram", &addr)
if err != nil {
return err
}
err = os.Chmod(s.socketPath, 0o777)
if err != nil {
socket.Close()
return err
}
s.socket = socket
return nil
}
func (s *notifySocket) setupSocketDirectory() error {
return os.Mkdir(path.Dir(s.socketPath), 0o755)
}
func notifySocketStart(context *cli.Context, notifySocketHost, id string) (*notifySocket, error) {
notifySocket := newNotifySocket(context, notifySocketHost, id)
if notifySocket == nil {
return nil, nil
}
if err := notifySocket.bindSocket(); err != nil {
return nil, err
}
return notifySocket, nil
}
func (s *notifySocket) waitForContainer(container *libcontainer.Container) error {
state, err := container.State()
if err != nil {
return err
}
return s.run(state.InitProcessPid)
}
func (n *notifySocket) run(pid1 int) error {
if n.socket == nil {
return nil
}
notifySocketHostAddr := net.UnixAddr{Name: n.host, Net: "unixgram"}
client, err := net.DialUnix("unixgram", nil, ¬ifySocketHostAddr)
if err != nil {
return err
}
ticker := time.NewTicker(time.Millisecond * 100)
defer ticker.Stop()
fileChan := make(chan []byte)
go func() {
for {
buf := make([]byte, 4096)
r, err := n.socket.Read(buf)
if err != nil {
return
}
got := buf[0:r]
// systemd-ready sends a single datagram with the state string as payload,
// so we don't need to worry about partial messages.
for _, line := range bytes.Split(got, []byte{'\n'}) {
if bytes.HasPrefix(got, []byte("READY=")) {
fileChan <- line
return
}
}
}
}()
for {
select {
case <-ticker.C:
_, err := os.Stat(filepath.Join("/proc", strconv.Itoa(pid1)))
if err != nil {
return nil
}
case b := <-fileChan:
return notifyHost(client, b, pid1)
}
}
}
// notifyHost tells the host (usually systemd) that the container reported READY.
// Also sends MAINPID and BARRIER.
func notifyHost(client *net.UnixConn, ready []byte, pid1 int) error {
_, err := client.Write(append(ready, '\n'))
if err != nil {
return err
}
// now we can inform systemd to use pid1 as the pid to monitor
newPid := "MAINPID=" + strconv.Itoa(pid1)
_, err = client.Write([]byte(newPid + "\n"))
if err != nil {
return err
}
// wait for systemd to acknowledge the communication
return sdNotifyBarrier(client)
}
// errUnexpectedRead is reported when actual data was read from the pipe used
// to synchronize with systemd. Usually, that pipe is only closed.
var errUnexpectedRead = errors.New("unexpected read from synchronization pipe")
// sdNotifyBarrier performs synchronization with systemd by means of the sd_notify_barrier protocol.
func sdNotifyBarrier(client *net.UnixConn) error {
// Create a pipe for communicating with systemd daemon.
pipeR, pipeW, err := os.Pipe()
if err != nil {
return err
}
// Get the FD for the unix socket file to be able to do perform syscall.Sendmsg.
clientFd, err := client.File()
if err != nil {
return err
}
// Send the write end of the pipe along with a BARRIER=1 message.
fdRights := unix.UnixRights(int(pipeW.Fd()))
err = unix.Sendmsg(int(clientFd.Fd()), []byte("BARRIER=1"), fdRights, nil, 0)
if err != nil {
return &os.SyscallError{Syscall: "sendmsg", Err: err}
}
// Close our copy of pipeW.
err = pipeW.Close()
if err != nil {
return err
}
// Expect the read end of the pipe to be closed after 30 seconds.
err = pipeR.SetReadDeadline(time.Now().Add(30 * time.Second))
if err != nil {
return nil
}
// Read a single byte expecting EOF.
var buf [1]byte
n, err := pipeR.Read(buf[:])
if n != 0 || err == nil {
return errUnexpectedRead
} else if errors.Is(err, os.ErrDeadlineExceeded) {
// Probably the other end doesn't support the sd_notify_barrier protocol.
logrus.Warn("Timeout after waiting 30s for barrier. Ignored.")
return nil
} else if err == io.EOF { //nolint:errorlint // comparison with io.EOF is legit.
return nil
} else {
return err
}
}