Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

client gets stuck when device is ssh unreachable #53

Closed
naveenachyuta opened this issue Jun 13, 2023 · 0 comments · Fixed by #54
Closed

client gets stuck when device is ssh unreachable #53

naveenachyuta opened this issue Jun 13, 2023 · 0 comments · Fixed by #54
Labels
bug Something isn't working

Comments

@naveenachyuta
Copy link
Collaborator

naveenachyuta commented Jun 13, 2023

When the device is ssh unreachable, and the netconf client tries to connect to the device, it gets stuck when it tries to create a new ssh client connection
https://github.com/nemith/netconf/blob/main/transport/ssh/ssh.go#L43

Attaching the script to reproduce the issue.
Run the script with 10 connections. the script has a sleep of 1000 seconds so that the connections are open the entire time. Run a duplicate script with one connection and you should be able to see that it gets stuck

stack

SIGABRT: abort
PC=0x119d2d3 m=0 sigcode=0

goroutine 1 [running]:
main.main()
	/Users/nachyuta/neteng_telemetry_collection/cmd/test/testing/testing1/netconf_testing copy.go:83 +0x153 fp=0xc0000e7f80 sp=0xc0000e7f50 pc=0x119d2d3
runtime.main()
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/proc.go:250 +0x212 fp=0xc0000e7fe0 sp=0xc0000e7f80 pc=0x1037852
runtime.goexit()
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/asm_amd64.s:1594 +0x1 fp=0xc0000e7fe8 sp=0xc0000e7fe0 pc=0x10645a1

goroutine 2 [force gc (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/proc.go:363 +0xd6 fp=0xc000056fb0 sp=0xc000056f90 pc=0x1037c16
runtime.goparkunlock(...)
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/proc.go:369
runtime.forcegchelper()
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/proc.go:302 +0xad fp=0xc000056fe0 sp=0xc000056fb0 pc=0x1037aad
runtime.goexit()
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/asm_amd64.s:1594 +0x1 fp=0xc000056fe8 sp=0xc000056fe0 pc=0x10645a1
created by runtime.init.6
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/proc.go:290 +0x25

goroutine 3 [GC sweep wait]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/proc.go:363 +0xd6 fp=0xc000057790 sp=0xc000057770 pc=0x1037c16
runtime.goparkunlock(...)
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/proc.go:369
runtime.bgsweep(0x0?)
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/mgcsweep.go:278 +0x8e fp=0xc0000577c8 sp=0xc000057790 pc=0x1024f6e
runtime.gcenable.func1()
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/mgc.go:178 +0x26 fp=0xc0000577e0 sp=0xc0000577c8 pc=0x1019e26
runtime.goexit()
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/asm_amd64.s:1594 +0x1 fp=0xc0000577e8 sp=0xc0000577e0 pc=0x10645a1
created by runtime.gcenable
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/mgc.go:178 +0x6b

goroutine 4 [GC scavenge wait]:
runtime.gopark(0xc00007e000?, 0x12432e0?, 0x1?, 0x0?, 0x0?)
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/proc.go:363 +0xd6 fp=0xc000057f70 sp=0xc000057f50 pc=0x1037c16
runtime.goparkunlock(...)
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/proc.go:369
runtime.(*scavengerState).park(0x1357ae0)
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/mgcscavenge.go:389 +0x53 fp=0xc000057fa0 sp=0xc000057f70 pc=0x1023013
runtime.bgscavenge(0x0?)
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/mgcscavenge.go:617 +0x45 fp=0xc000057fc8 sp=0xc000057fa0 pc=0x10235e5
runtime.gcenable.func2()
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/mgc.go:179 +0x26 fp=0xc000057fe0 sp=0xc000057fc8 pc=0x1019dc6
runtime.goexit()
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/asm_amd64.s:1594 +0x1 fp=0xc000057fe8 sp=0xc000057fe0 pc=0x10645a1
created by runtime.gcenable
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/mgc.go:179 +0xaa

goroutine 18 [finalizer wait]:
runtime.gopark(0x1357ee0?, 0xc0001841a0?, 0x0?, 0x0?, 0xc000056770?)
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/proc.go:363 +0xd6 fp=0xc000056628 sp=0xc000056608 pc=0x1037c16
runtime.goparkunlock(...)
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/proc.go:369
runtime.runfinq()
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/mfinal.go:180 +0x10f fp=0xc0000567e0 sp=0xc000056628 pc=0x1018f2f
runtime.goexit()
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/asm_amd64.s:1594 +0x1 fp=0xc0000567e8 sp=0xc0000567e0 pc=0x10645a1
created by runtime.createfing
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/mfinal.go:157 +0x45

goroutine 19 [IO wait]:
runtime.gopark(0x8?, 0xc0001a000a?, 0x1?, 0x0?, 0x0?)
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/proc.go:363 +0xd6 fp=0xc0000d59d0 sp=0xc0000d59b0 pc=0x1037c16
runtime.netpollblock(0x106ec29?, 0x1?, 0x0?)
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/netpoll.go:526 +0xf7 fp=0xc0000d5a08 sp=0xc0000d59d0 pc=0x1031157
internal/poll.runtime_pollWait(0x251af08, 0x72)
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/netpoll.go:305 +0x89 fp=0xc0000d5a28 sp=0xc0000d5a08 pc=0x105ee69
internal/poll.(*pollDesc).wait(0xc000026100?, 0xc0001a000a?, 0x0)
	/usr/local/Cellar/go/1.19.1/libexec/src/internal/poll/fd_poll_runtime.go:84 +0x32 fp=0xc0000d5a50 sp=0xc0000d5a28 pc=0x10add52
internal/poll.(*pollDesc).waitRead(...)
	/usr/local/Cellar/go/1.19.1/libexec/src/internal/poll/fd_poll_runtime.go:89
internal/poll.(*FD).Read(0xc000026100, {0xc0001a000a, 0x1, 0x1})
	/usr/local/Cellar/go/1.19.1/libexec/src/internal/poll/fd_unix.go:167 +0x25a fp=0xc0000d5ad0 sp=0xc0000d5a50 pc=0x10ae89a
net.(*netFD).Read(0xc000026100, {0xc0001a000a?, 0x1010000000001?, 0x17c2868?})
	/usr/local/Cellar/go/1.19.1/libexec/src/net/fd_posix.go:55 +0x29 fp=0xc0000d5b18 sp=0xc0000d5ad0 pc=0x10e6ba9
net.(*conn).Read(0xc000194000, {0xc0001a000a?, 0xc0000d5b98?, 0x100e5a7?})
	/usr/local/Cellar/go/1.19.1/libexec/src/net/net.go:183 +0x45 fp=0xc0000d5b60 sp=0xc0000d5b18 pc=0x10f16a5
io.ReadAtLeast({0x255a030, 0xc000194000}, {0xc0001a000a, 0x1, 0x1}, 0x1)
	/usr/local/Cellar/go/1.19.1/libexec/src/io/io.go:332 +0x9a fp=0xc0000d5ba8 sp=0xc0000d5b60 pc=0x108301a
io.ReadFull(...)
	/usr/local/Cellar/go/1.19.1/libexec/src/io/io.go:351
golang.org/x/crypto/ssh.readVersion({0x255a030, 0xc000194000})
	/Users/nachyuta/neteng_telemetry_collection/vendor/golang.org/x/crypto/ssh/transport.go:322 +0xc5 fp=0xc0000d5c30 sp=0xc0000d5ba8 pc=0x1196e05
golang.org/x/crypto/ssh.exchangeVersions({0x255a008?, 0xc000194000}, {0xc0001a0000?, 0xa?, 0x1740a68?})
	/Users/nachyuta/neteng_telemetry_collection/vendor/golang.org/x/crypto/ssh/transport.go:306 +0x117 fp=0xc0000d5c88 sp=0xc0000d5c30 pc=0x1196cf7
golang.org/x/crypto/ssh.(*connection).clientHandshake(0xc00019e080, {0xc000020090, 0x13}, 0xc000196000)
	/Users/nachyuta/neteng_telemetry_collection/vendor/golang.org/x/crypto/ssh/client.go:100 +0x126 fp=0xc0000d5d10 sp=0xc0000d5c88 pc=0x1180606
golang.org/x/crypto/ssh.NewClientConn({0x1246810, 0xc000194000}, {0xc000020090, 0x13}, 0xc00009c9c0)
	/Users/nachyuta/neteng_telemetry_collection/vendor/golang.org/x/crypto/ssh/client.go:83 +0x131 fp=0xc0000d5d78 sp=0xc0000d5d10 pc=0x1180371
github.com/nemith/netconf/transport/ssh.Dial({0x12457a8?, 0xc000102060?}, {0x11e8e05?, 0x1?}, {0xc000020090, 0x13}, 0xc00009c9c0)
	/Users/nachyuta/neteng_telemetry_collection/vendor/github.com/nemith/netconf/transport/ssh/ssh.go:43 +0xc7 fp=0xc0000d5e20 sp=0xc0000d5d78 pc=0x119c047
main.Open({0xc000020090, 0x13}, 0xc000069f90?)
	/Users/nachyuta/neteng_telemetry_collection/cmd/test/testing/testing1/netconf_testing copy.go:23 +0x15c fp=0xc0000d5f08 sp=0xc0000d5e20 pc=0x119cbfc
main.main.func1()
	/Users/nachyuta/neteng_telemetry_collection/cmd/test/testing/testing1/netconf_testing copy.go:88 +0x248 fp=0xc0000d5fe0 sp=0xc0000d5f08 pc=0x119d548
runtime.goexit()
	/usr/local/Cellar/go/1.19.1/libexec/src/runtime/asm_amd64.s:1594 +0x1 fp=0xc0000d5fe8 sp=0xc0000d5fe0 pc=0x10645a1
created by main.main
	/Users/nachyuta/neteng_telemetry_collection/cmd/test/testing/testing1/netconf_testing copy.go:83 +0x108

rax    0x1
rbx    0x1357ee0
rcx    0x1357ee0
rdx    0x1
rdi    0xc00009c9c0
rsi    0x7ffeefbff828
rbp    0xc0000e7f70
rsp    0xc0000e7f50
r8     0x1357ee0
r9     0xc00005aca0
r10    0xc00005ac98
r11    0xc00005ac9c
r12    0x7ffeefbff590
r13    0x178
r14    0xc0000061a0
r15    0x2204575
rip    0x119d2d3
rflags 0x246
cs     0x2b
fs     0x0
gs     0x0
exit status 2

** test program **

package main

import (
	"context"
	"fmt"
	"log"
	"os"
	"time"

	"github.com/nemith/netconf"
	ncssh "github.com/nemith/netconf/transport/ssh"
	"golang.org/x/crypto/ssh"
)

func Open(addr string, sshConfig *ssh.ClientConfig) (*netconf.Session, error) {
	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
	defer cancel()

	tr, err := ncssh.Dial(ctx, "tcp", addr, sshConfig)
	if err != nil {
		return nil, fmt.Errorf("failed to create ssh transport: %w", err)
	}

	sess, err := netconf.Open(tr)
	if err != nil {
		return nil, fmt.Errorf("failed to open netconf session: %w", err)
	}

	return sess, nil
}

func Close(sess *netconf.Session) error {
	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
	defer cancel()
	return sess.Close(ctx)
}

func Call(req string, timeout time.Duration, sess *netconf.Session) {
	ctx, cancel := context.WithTimeout(context.Background(), timeout)
	defer cancel()

	reply, err := sess.Do(ctx, &netconf.RPCMsg{
		Operation: req,
	})

	if err != nil {
		log.Printf("failed to send netconf request %q: %v", req, err)
	}
	log.Printf("got reply:\n%s", reply.Body)
}

func main() {
	username := os.Getenv("USER")
	password := os.Getenv("PASS")
	host := os.Getenv("NCHOST")

	sshConfig := &ssh.ClientConfig{
		User: username,
		Auth: []ssh.AuthMethod{
			ssh.Password(password),
		},
		HostKeyCallback: ssh.InsecureIgnoreHostKey(),
		Timeout:         5 * time.Second,
	}

	for i := 0; i < 11; i++ {
		go func() {
			for {
				sess, err := Open(host, sshConfig)
				if err != nil {
					log.Fatal(err)
				}
				Call("<command>show version</command>", 10*time.Second, sess)
				time.Sleep(1000 * time.Second)
				Close(sess)
			}
		}()
	}

	// wait forever
	wait := make(chan struct{})
	<-wait
}
@naveenachyuta naveenachyuta added the bug Something isn't working label Jun 13, 2023
nemith added a commit that referenced this issue Jun 15, 2023
`ssh.Dial()` took in a context that was used to establish the tcp
connection, however that context doesn't cover the ssh handshake which
can easily block indefinitely. This approximates context support for
ssh.NewClientConn() by having a go routine listen for context
cancellation and closing the connection. We can then check for ctx.Err()
and return that (i.e if the context was canceled).

Note that there is a `Timeout` field in `ssh.ClientConfig` but that also
only covers the TCP connection. See
golang/go#51926

Fixes: #53
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

Successfully merging a pull request may close this issue.

1 participant