Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

net: apparent deadlock in TestVariousDeadlines on arm64 #41863

Closed
bcmills opened this issue Oct 8, 2020 · 10 comments
Closed

net: apparent deadlock in TestVariousDeadlines on arm64 #41863

bcmills opened this issue Oct 8, 2020 · 10 comments
Labels
arch-arm64 FrozenDueToAge NeedsInvestigation Someone must examine and confirm this is a valid issue and not a duplicate of an existing one. WaitingForInfo Issue is not actionable because of missing required information, which needs to be provided.
Milestone

Comments

@bcmills
Copy link
Contributor

bcmills commented Oct 8, 2020

2020-10-07T20:07:07-83dfc0d/linux-arm64-aws
2020-09-30T05:39:59-3caaadd/linux-arm64-aws
2020-09-25T18:07:42-74c3b50/linux-arm64-aws
2020-09-24T18:18:20-4285094/linux-arm64-aws
2020-09-18T17:21:06-d91d076/linux-arm64-aws
2020-09-16T16:07:00-37aa653/linux-arm64-aws
2020-09-13T19:17:09-66e66e7/linux-arm64-aws
2020-09-11T14:22:17-6e3df74/linux-arm64-aws
2020-08-25T20:04:27-8381408/linux-arm64-aws

panic: test timed out after 3m0s

goroutine 41077 [running]:
testing.(*M).startAlarm.func1()
	/workdir/go/src/testing/testing.go:1680 +0xc8
created by time.goFunc
	/workdir/go/src/time/sleep.go:167 +0x40

goroutine 1 [chan receive, 2 minutes]:
testing.tRunner.func1(0x4000001980)
	/workdir/go/src/testing/testing.go:1139 +0x218
testing.tRunner(0x4000001980, 0x400011dd28)
	/workdir/go/src/testing/testing.go:1177 +0x10c
testing.runTests(0x400000e048, 0x408520, 0xee, 0xee, 0xbfd7a67a7982b084, 0x29e8dec58a, 0x40bbe0, 0x40000467b0)
	/workdir/go/src/testing/testing.go:1489 +0x29c
testing.(*M).Run(0x400011e000, 0x0)
	/workdir/go/src/testing/testing.go:1397 +0x1b4
net.TestMain(0x400011e000)
	/workdir/go/src/net/main_test.go:52 +0x30
main.main()
	_testmain.go:589 +0x160

goroutine 4061 [chan send, 2 minutes]:
testing.tRunner.func1.1(0x4000253ec0, 0x0, 0x0, 0x400044e600, 0x4000253e01)
	/workdir/go/src/testing/testing.go:1103 +0x50
testing.tRunner.func1(0x400044e600)
	/workdir/go/src/testing/testing.go:1164 +0x30c
testing.tRunner(0x400044e600, 0x284d88)
	/workdir/go/src/testing/testing.go:1177 +0x10c
created by testing.(*T).Run
	/workdir/go/src/testing/testing.go:1218 +0x244

goroutine 4459 [chan send, 2 minutes]:
testing.tRunner.func1.1(0x400025eec0, 0x0, 0x0, 0x4000518600, 0x400025ee01)
	/workdir/go/src/testing/testing.go:1103 +0x50
testing.tRunner.func1(0x4000518600)
	/workdir/go/src/testing/testing.go:1164 +0x30c
testing.tRunner(0x4000518600, 0x2848d0)
	/workdir/go/src/testing/testing.go:1177 +0x10c
created by testing.(*T).Run
	/workdir/go/src/testing/testing.go:1218 +0x244

goroutine 4062 [chan send, 2 minutes]:
testing.tRunner.func1.1(0x40002b5ec0, 0x0, 0x0, 0x400044e780, 0x40002b5e01)
	/workdir/go/src/testing/testing.go:1103 +0x50
testing.tRunner.func1(0x400044e780)
	/workdir/go/src/testing/testing.go:1164 +0x30c
testing.tRunner(0x400044e780, 0x284d70)
	/workdir/go/src/testing/testing.go:1177 +0x10c
created by testing.(*T).Run
	/workdir/go/src/testing/testing.go:1218 +0x244

goroutine 4506 [chan receive, 2 minutes]:
testing.runTests.func1.1(0x4000001980)
	/workdir/go/src/testing/testing.go:1496 +0x30
created by testing.runTests.func1
	/workdir/go/src/testing/testing.go:1496 +0xa8

goroutine 4058 [chan receive, 2 minutes]:
net.(*localServer).teardown(0x40002bcc60, 0x400012c678, 0x17488)
	/workdir/go/src/net/mockserver_test.go:107 +0xec
runtime.Goexit()
	/workdir/go/src/runtime/panic.go:641 +0x110
testing.(*common).FailNow(0x400044e180)
	/workdir/go/src/testing/testing.go:734 +0x38
testing.(*common).Fatalf(0x400044e180, 0x27ae25, 0x1d, 0x4000556ec8, 0x1, 0x1)
	/workdir/go/src/testing/testing.go:808 +0x78
net.testVariousDeadlines(0x400044e180)
	/workdir/go/src/net/timeout_test.go:891 +0x79c
net.TestVariousDeadlines(0x400044e180)
	/workdir/go/src/net/timeout_test.go:772 +0x34
testing.tRunner(0x400044e180, 0x2850f0)
	/workdir/go/src/testing/testing.go:1173 +0xdc
created by testing.(*T).Run
	/workdir/go/src/testing/testing.go:1218 +0x244

goroutine 6060 [IO wait, 2 minutes]:
internal/poll.runtime_pollWait(0xffff9283b3b8, 0x72, 0x3e0550)
	/workdir/go/src/runtime/netpoll.go:220 +0x44
internal/poll.(*pollDesc).wait(0x4000452598, 0x72, 0x400051c600, 0x1, 0x1)
	/workdir/go/src/internal/poll/fd_poll_runtime.go:87 +0x38
internal/poll.(*pollDesc).waitRead(...)
	/workdir/go/src/internal/poll/fd_poll_runtime.go:92
internal/poll.(*FD).Read(0x4000452580, 0x400051c67f, 0x1, 0x1, 0x0, 0x0, 0x0)
	/workdir/go/src/internal/poll/fd_unix.go:166 +0x164
net.(*netFD).Read(0x4000452580, 0x400051c67f, 0x1, 0x1, 0x40001bff28, 0x1e7a14, 0x1)
	/workdir/go/src/net/fd_posix.go:55 +0x44
net.(*conn).Read(0x40000101a0, 0x400051c67f, 0x1, 0x1, 0x0, 0x0, 0x0)
	/workdir/go/src/net/net.go:183 +0x74
net.testVariousDeadlines.func1(0x40002bcc60, 0x2c4368, 0x400000e648)
	/workdir/go/src/net/timeout_test.go:818 +0x6c
net.(*localServer).buildup.func1(0x285310, 0x40002bcc60)
	/workdir/go/src/net/mockserver_test.go:95 +0x40
created by net.(*localServer).buildup
	/workdir/go/src/net/mockserver_test.go:94 +0x44

goroutine 6078 [IO wait, 2 minutes]:
internal/poll.runtime_pollWait(0xffff9283bf18, 0x72, 0x3e0550)
	/workdir/go/src/runtime/netpoll.go:220 +0x44
internal/poll.(*pollDesc).wait(0x40001e8498, 0x72, 0x4000162000, 0x2000, 0x2000)
	/workdir/go/src/internal/poll/fd_poll_runtime.go:87 +0x38
internal/poll.(*pollDesc).waitRead(...)
	/workdir/go/src/internal/poll/fd_poll_runtime.go:92
internal/poll.(*FD).Read(0x40001e8480, 0x4000162000, 0x2000, 0x2000, 0x0, 0x0, 0x0)
	/workdir/go/src/internal/poll/fd_unix.go:166 +0x164
net.(*netFD).Read(0x40001e8480, 0x4000162000, 0x2000, 0x2000, 0x1, 0x2fc8fbec5b, 0x0)
	/workdir/go/src/net/fd_posix.go:55 +0x44
net.(*conn).Read(0x40001c0260, 0x4000162000, 0x2000, 0x2000, 0x0, 0x0, 0x0)
	/workdir/go/src/net/net.go:183 +0x74
io/ioutil.devNull.ReadFrom(0x0, 0x2c0588, 0x40001c0260, 0x23b040, 0x1, 0xffff91561620)
	/workdir/go/src/io/ioutil/ioutil.go:144 +0x84
io.copyBuffer(0x2c08c8, 0x43b430, 0x2c0588, 0x40001c0260, 0x0, 0x0, 0x0, 0x0, 0x2c0548, 0x40001e4a00)
	/workdir/go/src/io/io.go:397 +0x288
io.Copy(...)
	/workdir/go/src/io/io.go:370
net.testVariousDeadlines.func2(0x2c7470, 0x40001c0260, 0x400001dea0, 0x400044e180, 0x4000413320)
	/workdir/go/src/net/timeout_test.go:877 +0x114
created by net.testVariousDeadlines
	/workdir/go/src/net/timeout_test.go:872 +0x508
FAIL	net	180.023s

See previously #37883, #19519 (TestVariousDeadlines); #41326 (net deadlock on linux-arm64-aws).

CC @cagedmantis (linux-arm64-aws), @mikioh @bradfitz @ianlancetaylor (net)

@bcmills bcmills added the NeedsInvestigation Someone must examine and confirm this is a valid issue and not a duplicate of an existing one. label Oct 8, 2020
@bcmills bcmills added this to the Backlog milestone Oct 8, 2020
@bcmills bcmills changed the title net: apparent deadlock in TestVariousDeadlines on linux-arm64-aws net: apparent deadlock in TestVariousDeadlines on arm64 Jan 11, 2021
@bcmills
Copy link
Contributor Author

bcmills commented Apr 27, 2021

Now that #45065 is addressed this builder runs as a TryBot, and the test has started flaking in TryBot runs:
https://storage.googleapis.com/go-build-log/39ad506d/linux-arm64-aws_5dc1efb9.log

@bcmills
Copy link
Contributor Author

bcmills commented May 7, 2021

@bcmills
Copy link
Contributor Author

bcmills commented May 24, 2021

@bcmills
Copy link
Contributor Author

bcmills commented Jun 8, 2021

@bcmills
Copy link
Contributor Author

bcmills commented Jul 13, 2021

@bcmills
Copy link
Contributor Author

bcmills commented Sep 28, 2021

@gopherbot
Copy link
Contributor

Change https://golang.org/cl/371994 mentions this issue: net: eliminate arbitrary timeout in TestVariousDeadlines

@bcmills bcmills added the WaitingForInfo Issue is not actionable because of missing required information, which needs to be provided. label Dec 15, 2021
gopherbot pushed a commit that referenced this issue Dec 15, 2021
When we set a timeout, we don't actually have a guarantee one how long
the OS will take to notice it. Moreover, if the test deadlocks
completely (for example, due to a deadline never taking effect), it
would be more useful to get a full goroutine dump instead of the current
"client stuck in Dial+Copy" failure message.

For #37883
For #41863

Change-Id: I9f712ef1c620f97a5ab69baac45deb71134b99bc
Reviewed-on: https://go-review.googlesource.com/c/go/+/371994
Trust: Bryan Mills <[email protected]>
Run-TryBot: Bryan Mills <[email protected]>
Reviewed-by: Ian Lance Taylor <[email protected]>
TryBot-Result: Gopher Robot <[email protected]>
@gopherbot
Copy link
Contributor

Timed out in state WaitingForInfo. Closing.

(I am just a bot, though. Please speak up if this is a mistake or you have the requested information.)

@bcmills
Copy link
Contributor Author

bcmills commented Jan 15, 2022

This was likely a symptom of #45211.

@golang golang locked and limited conversation to collaborators Jan 15, 2023
Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
arch-arm64 FrozenDueToAge NeedsInvestigation Someone must examine and confirm this is a valid issue and not a duplicate of an existing one. WaitingForInfo Issue is not actionable because of missing required information, which needs to be provided.
Projects
None yet
Development

No branches or pull requests

2 participants