forked from NixOS/nixpkgs
-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
We noticed that nscd does cache things and sometimes causes weird bugs. Turning the TTL do 0 does not help in (some? all?) cases as the test was able to demonstrate even on the original commit where the TTL was lowered to 0. This change ships a patch to completely short circuit nscd's cache functionality and a test that shows that `getaddrinfo` called quickly twice causes proper resolver queries. Could be used as a temporary fix for NixOS#135888 and NixOS#55276
- Loading branch information
Showing
5 changed files
with
148 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,34 +1,40 @@ | ||
# We basically use nscd as a proxy for forwarding nss requests to appropriate | ||
# nss modules, as we run nscd with LD_LIBRARY_PATH set to the directory | ||
# containing all such modules | ||
# Note that we can not use `enable-cache no` As this will actually cause nscd | ||
# Note that we can not use `enable-cache no` as this will actually cause nscd | ||
# to just reject the nss requests it receives, which then causes glibc to | ||
# fallback to trying to handle the request by itself. Which won't work as glibc | ||
# is not aware of the path in which the nss modules live. As a workaround, we | ||
# have `enable-cache yes` with an explicit ttl of 0 | ||
# is not aware of the path in which the nss modules live. | ||
# | ||
# We originally tried to set this to 0 but that was unreliable and we added | ||
# a patch to NSCD that short-circuits the cache functionality. | ||
# | ||
# Disabling shared access is a minor optimization to avoid that the clients | ||
# look at the (empty) cache. | ||
# | ||
server-user nscd | ||
|
||
enable-cache passwd yes | ||
positive-time-to-live passwd 0 | ||
negative-time-to-live passwd 0 | ||
shared passwd yes | ||
shared passwd no | ||
|
||
enable-cache group yes | ||
positive-time-to-live group 0 | ||
negative-time-to-live group 0 | ||
shared group yes | ||
shared group no | ||
|
||
enable-cache netgroup yes | ||
positive-time-to-live netgroup 0 | ||
negative-time-to-live netgroup 0 | ||
shared netgroup yes | ||
shared netgroup no | ||
|
||
enable-cache hosts yes | ||
positive-time-to-live hosts 0 | ||
negative-time-to-live hosts 0 | ||
shared hosts yes | ||
shared hosts no | ||
|
||
enable-cache services yes | ||
positive-time-to-live services 0 | ||
negative-time-to-live services 0 | ||
shared services yes | ||
shared services no |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
import ./make-test-python.nix { | ||
name = "nscd"; | ||
|
||
machine = { pkgs, ... }: { | ||
|
||
environment.systemPackages = [ | ||
pkgs.python3Full | ||
]; | ||
services.dnsmasq = { | ||
enable = true; | ||
extraConfig = '' | ||
log-queries | ||
log-facility=- | ||
address=/example.com/127.0.0.1 | ||
''; | ||
}; | ||
networking.nameservers = [ "127.0.0.1" ]; | ||
}; | ||
|
||
testScript = '' | ||
machine.wait_for_unit("multi-user.target") | ||
print( | ||
machine.execute( | ||
"python -c 'import socket; socket.getaddrinfo(\"example.com\", 0, socket.AF_INET)'" | ||
)[1] | ||
) | ||
print("checking query log (1/2)") | ||
query_count = machine.execute("journalctl -u dnsmasq.service | grep -c example.com")[1] | ||
assert int(query_count) == 2 | ||
print( | ||
machine.execute( | ||
"python -c 'import socket; socket.getaddrinfo(\"example.com\", 0, socket.AF_INET)'" | ||
)[1] | ||
) | ||
print("OK") | ||
print("checking query log (2/2)") | ||
query_count = machine.execute("journalctl -u dnsmasq.service | grep -c example.com")[1] | ||
assert int(query_count) == 4 | ||
print("OK") | ||
''; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
diff --git a/nscd/cache.c b/nscd/cache.c | ||
index 85090a1641..5ffd578d48 100644 | ||
--- a/nscd/cache.c | ||
+++ b/nscd/cache.c | ||
@@ -153,83 +153,9 @@ cache_add (int type, const void *key, size_t len, struct datahead *packet, | ||
first ? _(" (first)") : ""); | ||
} | ||
|
||
- unsigned long int hash = __nss_hash (key, len) % table->head->module; | ||
- struct hashentry *newp; | ||
- | ||
- newp = mempool_alloc (table, sizeof (struct hashentry), 0); | ||
- /* If we cannot allocate memory, just do not do anything. */ | ||
- if (newp == NULL) | ||
- { | ||
- /* If necessary mark the entry as unusable so that lookups will | ||
- not use it. */ | ||
- if (first) | ||
+ // Completely bypass adding things to the cache. | ||
packet->usable = false; | ||
- | ||
- return -1; | ||
- } | ||
- | ||
- newp->type = type; | ||
- newp->first = first; | ||
- newp->len = len; | ||
- newp->key = (char *) key - table->data; | ||
- assert (newp->key + newp->len <= table->head->first_free); | ||
- newp->owner = owner; | ||
- newp->packet = (char *) packet - table->data; | ||
- assert ((newp->packet & BLOCK_ALIGN_M1) == 0); | ||
- | ||
- /* Put the new entry in the first position. */ | ||
- /* TODO Review concurrency. Use atomic_exchange_release. */ | ||
- newp->next = atomic_load_relaxed (&table->head->array[hash]); | ||
- while (!atomic_compare_exchange_weak_release (&table->head->array[hash], | ||
- (ref_t *) &newp->next, | ||
- (ref_t) ((char *) newp | ||
- - table->data))); | ||
- | ||
- /* Update the statistics. */ | ||
- if (packet->notfound) | ||
- ++table->head->negmiss; | ||
- else if (first) | ||
- ++table->head->posmiss; | ||
- | ||
- /* We depend on this value being correct and at least as high as the | ||
- real number of entries. */ | ||
- atomic_increment (&table->head->nentries); | ||
- | ||
- /* It does not matter that we are not loading the just increment | ||
- value, this is just for statistics. */ | ||
- unsigned long int nentries = table->head->nentries; | ||
- if (nentries > table->head->maxnentries) | ||
- table->head->maxnentries = nentries; | ||
- | ||
- if (table->persistent) | ||
- // XXX async OK? | ||
- msync ((void *) table->head, | ||
- (char *) &table->head->array[hash] - (char *) table->head | ||
- + sizeof (ref_t), MS_ASYNC); | ||
- | ||
- /* We do not have to worry about the pruning thread if we are | ||
- re-adding the data since this is done by the pruning thread. We | ||
- also do not have to do anything in case this is not the first | ||
- time the data is entered since different data heads all have the | ||
- same timeout. */ | ||
- if (first && prune_wakeup) | ||
- { | ||
- /* Perhaps the prune thread for the table is not running in a long | ||
- time. Wake it if necessary. */ | ||
- pthread_mutex_lock (&table->prune_lock); | ||
- time_t next_wakeup = table->wakeup_time; | ||
- bool do_wakeup = false; | ||
- if (next_wakeup > packet->timeout + CACHE_PRUNE_INTERVAL) | ||
- { | ||
- table->wakeup_time = packet->timeout; | ||
- do_wakeup = true; | ||
- } | ||
- pthread_mutex_unlock (&table->prune_lock); | ||
- if (do_wakeup) | ||
- pthread_cond_signal (&table->prune_cond); | ||
- } | ||
- | ||
- return 0; | ||
+ return -1; | ||
} | ||
|
||
/* Walk through the table and remove all entries which lifetime ended. |