High CPU usage for updating notification timestamps #1102

mustaqimM · 2022-08-20T12:16:43Z

Issue description

With idle_threshold set, old notifications (ones that say "X min old") increase the CPU usage:

I believe it's related to #169

Installation info

Version: 1.9.0 (2022-06-27)
Install type: system package
Window manager / Desktop environment: swaywm

Minimal dunstrc

# -*- eval: (rainbow-mode); -*-
# See dunst(5) for all configuration options

[global]
    ### Display ###

    # Which monitor should the notifications be displayed on.
    monitor = 0

    # Display notification on focused monitor.  Possible modes are:
    #   mouse: follow mouse pointer
    #   keyboard: follow window with keyboard focus
    #   none: don't follow anything
    #
    # "keyboard" needs a window manager that exports the
    # _NET_ACTIVE_WINDOW property.
    # This should be the case for almost all modern window managers.
    #
    # If this option is set to mouse or keyboard, the monitor option
    # will be ignored.
    follow = mouse

    ### Geometry ###

    # dynamic width from 0 to 300
    # width = (0, 300)
    # constant width of 300
    width = 250

    # The maximum height of a single notification, excluding the frame.
    height = 300

    # Position the notification in the top right corner
    origin = top-right

    # Offset from the origin
    offset = 10x40

    # Scale factor. It is auto-detected if value is 0.
    scale = 0

    # Maximum number of notification (0 means no limit)
    notification_limit = 0

    ### Progress bar ###

    # Turn on the progess bar. It appears when a progress hint is passed with
    # for example dunstify -h int:value:12
    progress_bar = true

    # Set the progress bar height. This includes the frame, so make sure
    # it's at least twice as big as the frame width.
    progress_bar_height = 10

    # Set the frame width of the progress bar
    progress_bar_frame_width = 1

    # Set the minimum width for the progress bar
    progress_bar_min_width = 150

    # Set the maximum width for the progress bar
    progress_bar_max_width = 300


    # Show how many messages are currently hidden (because of
    # notification_limit).
    indicate_hidden = yes

    # The transparency of the window.  Range: [0; 100].
    # This option will only work if a compositing window manager is
    # present (e.g. xcompmgr, compiz, etc.). (X11 only)
    transparency = 0

    # Draw a line of "separator_height" pixel height between two
    # notifications.
    # Set to 0 to disable.
    separator_height = 1

    # Padding between text and separator.
    padding = 8

    # Horizontal padding.
    horizontal_padding = 8

    # Padding between text and icon.
    text_icon_padding = 10

    # Defines width in pixels of frame around the notification window.
    # Set to 0 to disable.
    frame_width = 1

    # Defines color of the frame around the notification window.
    frame_color = "#e0e0e0E6"

    # Define a color for the separator.
    # possible values are:
    #  * auto: dunst tries to find a color fitting to the background;
    #  * foreground: use the same color as the foreground;
    #  * frame: use the same color as the frame;
    #  * anything else will be interpreted as a X color.
    separator_color = frame

    # Sort messages by urgency.
    sort = yes

    # Don't remove messages, if the user is idle (no mouse or keyboard input)
    # for longer than idle_threshold seconds.
    # Set to 0 to disable.
    # A client can set the 'transient' hint to bypass this. See the rules
    # section for how to disable this if necessary
    idle_threshold = 120

    ### Text ###

    font = SF Pro Text 11

    # The spacing between lines.  If the height is smaller than the
    # font height, it will get raised to the font height.
    line_height = 0

    # Possible values are:
    # full: Allow a small subset of html markup in notifications:
    #        <b>bold</b>
    #        <i>italic</i>
    #        <s>strikethrough</s>
    #        <u>underline</u>
    #
    #        For a complete reference see
    #        <https://docs.gtk.org/Pango/pango_markup.html>.
    #
    # strip: This setting is provided for compatibility with some broken
    #        clients that send markup even though it's not enabled on the
    #        server. Dunst will try to strip the markup but the parsing is
    #        simplistic so using this option outside of matching rules for
    #        specific applications *IS GREATLY DISCOURAGED*.
    #
    # no:    Disable markup parsing, incoming notifications will be treated as
    #        plain text. Dunst will not advertise that it has the body-markup
    #        capability if this is set as a global setting.
    #
    # It's important to note that markup inside the format option will be parsed
    # regardless of what this is set to.
    markup = full

    # The format of the message.  Possible variables are:
    #   %a  appname
    #   %s  summary
    #   %b  body
    #   %i  iconname (including its path)
    #   %I  iconname (without its path)
    #   %p  progress value if set ([  0%] to [100%]) or nothing
    #   %n  progress value if set without any extra characters
    #   %%  Literal %
    # Markup is allowed
    format = "<b>%s</b>\n%b"

    # Alignment of message text.
    # Possible values are "left", "center" and "right".
    alignment = left

    # Vertical alignment of message text and icon.
    # Possible values are "top", "center" and "bottom".
    vertical_alignment = center

    # Show age of message if message is older than show_age_threshold
    # seconds.
    # Set to -1 to disable.
    show_age_threshold = 60

    # Specify where to make an ellipsis in long lines.
    # Possible values are "start", "middle" and "end".
    ellipsize = middle

    # Ignore newlines '\n' in notifications.
    ignore_newline = no

    # Stack together notifications with the same content
    stack_duplicates = true

    # Hide the count of stacked notifications with the same content
    hide_duplicate_count = false

    # Display indicators for URLs (U) and actions (A).
    show_indicators = yes

    ### Icons ###

    # Align icons left/right/top/off
    icon_position = left

    # Scale small icons up to this size, set to 0 to disable. Helpful
    # for e.g. small files or high-dpi screens. In case of conflict,
    # max_icon_size takes precedence over this.
    min_icon_size = 0

    # Scale larger icons down to this size, set to 0 to disable
    max_icon_size = 32

    # Paths to default icons.
    icon_path = /usr/share/icons/Papirus-Dark/24x24/actions/:/usr/share/icons/Papirus-Dark/24x24/status/:/usr/share/icons/Papirus-Dark/24x24/devices/:/usr/share/icons/Papirus-Dark/24x24/apps/

    ### History ###

    # Should a notification popped up from history be sticky or timeout
    # as if it would normally do.
    sticky_history = yes

    # Maximum amount of notifications kept in history
    history_length = 20

    ### Misc/Advanced ###

    # dmenu path.
    dmenu = /usr/bin/dmenu -p dunst:

    # Browser for opening urls in context menu.
    browser = /usr/bin/xdg-open

    # Always run rule-defined scripts, even if the notification is suppressed
    always_run_script = true

    # Define the title of the windows spawned by dunst
    title = Dunst

    # Define the class of the windows spawned by dunst
    class = Dunst

    # Define the corner radius of the notification window
    # in pixel size. If the radius is 0, you have no rounded
    # corners.
    # The radius will be automatically lowered if it exceeds half of the
    # notification height to avoid clipping text and/or icons.
    corner_radius = 14

    # Ignore the dbus closeNotification message.
    # Useful to enforce the timeout set by dunst configuration. Without this
    # parameter, an application may close the notification sent before the
    # user defined timeout.
    ignore_dbusclose = false

    ### Wayland ###
    # These settings are Wayland-specific. They have no effect when using X11

    # Uncomment this if you want to let notications appear under fullscreen
    # applications (default: overlay)
    # layer = top

    # Set this to true to use X11 output on Wayland.
    force_xwayland = false

    ### Legacy

    # Use the Xinerama extension instead of RandR for multi-monitor support.
    # This setting is provided for compatibility with older nVidia drivers that
    # do not support RandR and using it on systems that support RandR is highly
    # discouraged.
    #
    # By enabling this setting dunst will not be able to detect when a monitor
    # is connected or disconnected which might break follow mode if the screen
    # layout changes.
    force_xinerama = false

    ### mouse

    # Defines list of actions for each mouse event
    # Possible values are:
    # * none: Don't do anything.
    # * do_action: Invoke the action determined by the action_name rule. If there is no
    #              such action, open the context menu.
    # * open_url: If the notification has exactly one url, open it. If there are multiple
    #             ones, open the context menu.
    # * close_current: Close current notification.
    # * close_all: Close all notifications.
    # * context: Open context menu for the notification.
    # * context_all: Open context menu for all notifications.
    # These values can be strung together for each mouse event, and
    # will be executed in sequence.
    mouse_left_click = open_url,close_current
    mouse_middle_click = do_action, close_current
    mouse_right_click = close_all

# Experimental features that may or may not work correctly. Do not expect them
# to have a consistent behaviour across releases.
[experimental]
    # Calculate the dpi to use on a per-monitor basis.
    # If this setting is enabled the Xft.dpi value will be ignored and instead
    # dunst will attempt to calculate an appropriate dpi value for each monitor
    # using the resolution and physical size. This might be useful in setups
    # where there are multiple screens with very different dpi values.
    per_monitor_dpi = false


[urgency_low]
    # IMPORTANT: colors have to be defined in quotation marks.
    # Otherwise the "#" and following would be interpreted as a comment.
    background = "#f7f7f8"
    foreground = "#1d1f21"
    timeout = 7
    # Icon for notifications with low urgency, uncomment to enable
    #default_icon = /path/to/icon

[urgency_normal]
    background = "#f7f7f8"
    foreground = "#1d1f21"
    timeout = 10
    # Icon for notifications with normal urgency, uncomment to enable
    #default_icon = /path/to/icon

[urgency_critical]
    background = "#cc6666"
    foreground = "#ffffff"
    frame_color = "#c82829"
    timeout = 15
    # Icon for notifications with critical urgency, uncomment to enable
    #default_icon = /path/to/icon

[play_sound]
    summary = "*"
    script = /home/mustaqim/.config/dunst/alert.sh

# Every section that isn't one of the above is interpreted as a rules to
# override settings for certain messages.
#
# Messages can be matched by
#    appname (discouraged, see desktop_entry)
#    body
#    category
#    desktop_entry
#    icon
#    match_transient
#    msg_urgency
#    stack_tag
#    summary
#
# and you can override the
#    background
#    foreground
#    format
#    frame_color
#    fullscreen
#    new_icon
#    set_stack_tag
#    set_transient
#    set_category
#    timeout
#    urgency
#    icon_position
#    skip_display
#    history_ignore
#    action_name
#    word_wrap
#    ellipsize
#    alignment
#    hide_text
#
# Shell-like globbing will get expanded.
#
# Instead of the appname filter, it's recommended to use the desktop_entry filter.
# GLib based applications export their desktop-entry name. In comparison to the appname,
# the desktop-entry won't get localized.
#
# SCRIPTING
# You can specify a script that gets run when the rule matches by
# setting the "script" option.
# The script will be called as follows:
#   script appname summary body icon urgency
# where urgency can be "LOW", "NORMAL" or "CRITICAL".
#
# NOTE: It might be helpful to run dunst -print in a terminal in order
# to find fitting options for rules.

# Disable the transient hint so that idle_threshold cannot be bypassed from the
# client
#[transient_disable]
#    match_transient = yes
#    set_transient = no
#
# Make the handling of transient notifications more strict by making them not
# be placed in history.
#[transient_history_ignore]
#    match_transient = yes
#    history_ignore = yes

# fullscreen values
# show: show the notifications, regardless if there is a fullscreen window opened
# delay: displays the new notification, if there is no fullscreen window active
#        If the notification is already drawn, it won't get undrawn.
# pushback: same as delay, but when switching into fullscreen, the notification will get
#           withdrawn from screen again and will get delayed like a new notification
#[fullscreen_delay_everything]
#    fullscreen = delay
#[fullscreen_show_critical]
#    msg_urgency = critical
#    fullscreen = show

#[espeak]
#    summary = "*"
#    script = dunst_espeak.sh

#[script-test]
#    summary = "*script*"
#    script = dunst_test.sh

#[ignore]
#    # This notification will not be displayed
#    summary = "foobar"
#    skip_display = true

#[history-ignore]
#    # This notification will not be saved in history
#    summary = "foobar"
#    history_ignore = yes

#[skip-display]
#    # This notification will not be displayed, but will be included in the history
#    summary = "foobar"
#    skip_display = yes

#[signed_on]
#    appname = Pidgin
#    summary = "*signed on*"
#    urgency = low
#
#[signed_off]
#    appname = Pidgin
#    summary = *signed off*
#    urgency = low
#
#[says]
#    appname = Pidgin
#    summary = *says*
#    urgency = critical
#
#[twitter]
#    appname = Pidgin
#    summary = *twitter.com*
#    urgency = normal
#
#[stack-volumes]
#    appname = "some_volume_notifiers"
#    set_stack_tag = "volume"
#
# vim: ft=cfg

The text was updated successfully, but these errors were encountered:

fwsmit · 2022-08-20T13:01:06Z

What do the logs say? I believe it should be spamming some messages, because it's in a loop.

Also, the dunstrc you posted is not minimal. Try reproducing it with only minimal setting changes

mustaqimM · 2022-08-20T15:08:45Z

@fwsmit I've since tried it without my dunstrc:

Also how do I log? I tried dunst -verbosity info but that only shows interactions with the notifications; when bringing the old notifications back with dunstctl history-pop, nothing gets printed.

fwsmit · 2022-08-20T17:34:32Z

Try verbosity debug

mustaqimM · 2022-08-21T04:58:26Z

@fwsmit I've attached a debug.log

debug.log

jdrowell · 2023-02-02T19:27:19Z

I've got the same problem (high CPU usage when using show_age_threshold > 0). It seems that draw() just gets increasingly spammed as the number of pending notifications increase. For now I'm forcing the sleep interval to be 1s (1000000) here:

dunst/src/dunst.c

Line 95 in 4910d5e

gint64 sleep = queues_get_next_datachange(now);

The source of the problem seems to be the fancy sleep calculations here:

dunst/src/queues.c

Lines 571 to 579 in 4910d5e

    
           if (settings.show_age_threshold >= 0) { 
        
                   gint64 age = time - n->timestamp; 
        
                   // sleep exactly until the next shift of the second happens 
        
                   if (age > settings.show_age_threshold - S2US(1)) 
        
                           sleep = MIN(sleep, (S2US(1) - (age % S2US(1)))); 
        
                   else 
        
                           sleep = MIN(sleep, settings.show_age_threshold - age); 
        
           }

With the change mentioned above, the draw loop seems to get called at most once a second, which works for me. The downside is that it seems to fire even when no redrawing is needed, but since a redraw is only for the notifications on the screen, this uses very little CPU (shows 0% on my PC).

fwsmit · 2023-02-02T20:09:44Z

Is it always reproducible?

jdrowell · 2023-02-02T23:11:27Z

It is. I started noticing this problem when there are over about 100 notifications waiting to be read, and it always results in very high CPU usage. I've been running today with the "fix" (not a fix) above, and it helps a lot, got at most 5% CPU usage, but that is still not great, on the debug logs we can see that it starts to run draw() more than once a second (thus the 5% instead of 0% CPU use). I didn't look into how these calls are being reach, it it some event loop or are there threads being spawn? I'm guessing the later because I haven't seen CPU use over 100%.

About reproducing it, just fire up dunst with show_age_threshold = 60 in the config, send a couple hundred notifications (like with notify-send), and watch the debug logs, it flows very fast once the age starts getting rendered (after the initial 60s).

tobast · 2023-02-08T10:21:26Z

I have also been noticing this issue for some time now, but never cared to dig into it. I often leave my computer locked but unsuspended when I need eg. to access it remotely, and the fans are close to always at full speed and CPU at 100% when I come back and have some notifications pending. In my case it doesn't require hundreds of notifications; the email and text messages received overnight (or even over lunch) are usually enough to trigger the issue.

I can further investigate if needed.

Edit: Actually, idle_threshold does not seem to be a "root cause": the problem arises whenever dunst has too many notifications to show at once. Having idle_threshold > 0 helps, because then, notifications can stack up while the computer is idle; but I can reproduce a very high CPU usage while moving the mouse around with

for i in $(seq 1 200); do notify-send "Test $i" "Test :)"; done

Edit 2: @jdrowell is correct, the problem indeed comes from the sleep timeout in queues_get_next_datachange. If we assume that about 100 notifications are created, and their time of creation in microseconds modulo one second is uniformly spread, then Dunst will be woken up about each 10ms, if show_age_threshold >= 0 and the notifications are old enough.

I expect it would be fine enough to wake up only once a second to update the ages of all the notifications?

Fixes dunst-project#1102

biopsin · 2023-03-16T18:03:45Z

I still get 40-50% load upon disabling show_age_threshold = -1 but 0% for anything from 0
dunst-1.9.1 ,system pkg

Edit: reverting to 1.9.0 for fixing disabling show_age_threshold = -1

tobast · 2023-03-28T09:36:38Z

I can confirm that I still get unexpected system load. The reason is not clear to me to this point. I'll add some targeted debug logging on my local build and leave it running until I encounter the bug again.

stephan49 · 2023-03-30T02:14:18Z

Version 1.9.1 has actually started to cause high CPU usage for me when receiving notifications at idle (idle_threshold = 30). Never had a problem with 1.9.0 or older versions.

Notifications start flickering rapidly and my compositor (picom) also increases in CPU usage when this issue happens.

My show age of message setting is show_age_threshold = 60.

tobast · 2023-03-31T09:05:03Z

Can this issue be re-opened? There clearly still is a bug. I can reproduce.

I believe that the problem, again, is that after some time, the timer will somehow trigger every ms. I'm investigating the cause.

gvolpe · 2023-03-31T09:05:05Z

Experiencing the same issue when upgrading to v1.9.1: idle notifications get my CPU burning 🔥. I have reverted to v1.9.0 in the meantime, but I'll try to find out why that's the case.

fwsmit · 2023-03-31T10:10:07Z

Can this issue be re-opened? There clearly still is a bug. I can reproduce.

I believe that the problem, again, is that after some time, the timer will somehow trigger every ms. I'm investigating the cause.

Reopened it

tobast · 2023-03-31T10:13:37Z

I found the issue, but I'll need guidance (@fwsmit ?) as to how to best handle this.

In these lines of get_next_datachange, the next datachange is set to the next coming notification expiracy. Yet, if dunst is idle (idle_threshold exceeded) or is actively retiring notifications (eg. notification_limit is 10, but there are 20 notifications, so the 10 last are expired but will still show for some time because the first 10 just expired), this notification expiracy is not the correct deadline.

While writing this, I assumed that n->is_locked would be true in these cases, but it seems not to be the case.

Where is the best place to handle this?

fwsmit · 2023-03-31T10:34:27Z

Hmm, it seems there is no property that updates when a notification not displayed. However, when it's not displayed it's in the waiting queue instead of the displayed queue (see queues_update). So when you go through the displayed list you should be looking at only displayed notifications.
I would start writing a test case that reproduces this and then you'll learn what's needed exactly for reproducing.

so the 10 last are expired but will still show for some time because the first 10 just expired), this notification expiracy is not the correct deadline.

Where is this implemented?

danniel3 · 2023-04-03T12:20:15Z

I've got this problem too. Often come back to my screenlocked laptop after an hour to find the fans blasting, my regular 2 apps open, sometimes even 0 notifications, and yet dunst is using 30%+ of my CPU

fwsmit · 2023-04-03T12:45:42Z

So it seems the problem has gotten worse since 1.9.1. I currently do not have the time to investigate, but it would be good to have a debug log of dunst at the time of the bug happening.
Do you have any reproduction steps?

tobast · 2023-04-03T13:19:13Z

I do not have the real log, but some custom logging gave me this:

[190258316082] Computed wake-up time: 190268315716 in 9999634 μs
[190258318350] Sleeping for 9997 ms
CRITICAL: Source ID 233 was not found when attempting to remove it
[190268326197] RUN, reason 0
[190268334554] Sleeping for 1 ms
[190268336580] RUN, reason 0
[190268342712] Sleeping for 1 ms
[190268344551] RUN, reason 0
[190268348758] Sleeping for 1 ms
[190268350297] RUN, reason 0
[190268354390] Sleeping for 1 ms
[190268356065] RUN, reason 0
[190268362222] Sleeping for 1 ms
[190268363875] RUN, reason 0
[190268369140] Sleeping for 1 ms
[190268370818] RUN, reason 0
[190268374768] Sleeping for 1 ms
[190268376397] RUN, reason 0
[190268379643] Sleeping for 1 ms
[190268381129] RUN, reason 0
[190268386744] Sleeping for 1 ms
[190268388508] RUN, reason 0
[190268392208] Sleeping for 1 ms
[190268393735] RUN, reason 0
[190268399536] Sleeping for 1 ms
[190268401152] RUN, reason 0
[190268405195] Sleeping for 1 ms
[190268406809] RUN, reason 0
[190268411465] Sleeping for 1 ms
[190268413039] RUN, reason 0
[190268416753] Sleeping for 1 ms
[snip -- and so on]

Some more investigation pointed that the super-short sleep happens because of the lines I pointed to earlier.

Where is this implemented?

I actually just guessed from the behaviour. I'll try to find some time to fix this asap; so far I do not understand why it got worst -- from a very quick scan, it looks to me that the old code should have this problem also.

fwsmit · 2023-04-03T15:36:35Z

I spotted one thing that's different:

gint64 timeout_ts = n->timestamp + n->timeout;

new

gint64 ttl = n->timeout - (time - n->start);

old

Now we are using n->timestamp instead of n->start. I'm not sure what the difference is, but it might be important

bebehei · 2023-04-03T23:58:59Z

Oh, well. I think I digged into a rabbitloophole. After reading this bug and the PR.

What is the initial problem?

I cannot reproduce it with anything before #1140, but if I introduce the changes from #1140 into my tree, this gives 100% reproducibility. While being idle, after That after the timeout of the notification is hit, dunst goes brrrr.

What was the initial reason, why dunst has got problems?

Is it just, that if you have too many notifications displayed? If so.

Yes. the old queues.c implementation was too accurate for this. We just need to add a lower limit there. (I just do not remember anymore, why this was a feature)
Revert the patch from Floor timestamp to the second when displaying age #1140
- This has flaws, since it did not fully invert the logic of queues_get_next_datachange(). (e.g. return value of -1)

jdrowell · 2023-04-04T01:56:36Z

The hack I used 2 months ago (#1102 (comment)) for this is still mitigating the issue for me, don't have any load problems anymore. But it is a hack, so looking forward to someone properly fixing this. Also, after a while (5min?) the 1 second precision is really not necessary, so maybe only update the notifications that are actually on screen, and update less frequently as they get older.

tobast · 2023-04-04T08:00:16Z

It could be the cause of this issue then. It's probably worth to try using n->start and see if that lessens the issue. If that is true that would be a temporary fix before the more permanent fix

Submitted as PR #1158.

It seems to work indeed, though I tested only hastily ; all tests are passing, though I had to fix one (see PR), which was a problem in the test and not a problem in the code.

The one thing I missed all this time that makes it relevant is that queues_get_next_datachange iterates over notifications that are displayed only, not all notifications.

fwsmit · 2023-04-04T08:58:01Z

Thanks for posting a PR. I'll give the others a little bit of time to test it. If it doesn't fix the issue, then I agree with bebehei to revert the patch.

Oh, well. I think I digged into a rabbitloophole. After reading this bug and the PR.

What is the initial problem?

The problem is that the queues implementation used to take each notification's "turn of second" into account. With 100 notifications you would update 100 times a second to update the age text for each notification.

  * This has flaws, since it did not fully invert the logic of `queues_get_next_datachange()`. (e.g. return value of `-1`)

I think it can still return -1 when there are no notifications.

bakkeby · 2023-04-04T09:04:44Z

The change makes sense to me.

	struct notification *n = iter->data;
	gint64 timeout_ts = n->timestamp + n->timeout;

	if (n->timeout > 0 && n->locked == 0) {
		if (timeout_ts > time)
			wakeup_time = MIN(wakeup_time, timeout_ts);
		else
			// while we're processing or while locked, the notification already timed out
			return time;
	}

n->timestamp represents the time when the notification first arrived which quickly results in timeout_ts being in the past and we always end up in the else statement returning the current time. In this scenario the calling run() function will sleep the minimum of 1 millisecond.

n->start on the other hand represents the time when the notification was last updated (the start of the current display in milliseconds), which combined with n->timeout gives a better the timeout timestamp (timeout_ts).

fwsmit · 2023-04-04T09:44:37Z

The change makes sense to me.

Do you mean the change in #1158?

n->timestamp represents the time when the notification first arrived which quickly results in timeout_ts being in the past and we always end up in the else statement returning the current time. In this scenario the calling run() function will sleep the minimum of 1 millisecond.

n->start on the other hand represents the time when the notification was last updated (the start of the current display in milliseconds), which combined with n->timeout gives a better the timeout timestamp (timeout_ts).

Yes, that does seem to make sense.

bakkeby · 2023-04-04T09:51:17Z

Yes I meant the change in #1158.

fwsmit · 2023-04-05T10:05:37Z

Thanks for clarifying. It would be nice if a few people can try out #1158. I will try to release a new version fixing the issue on tuesday.

biopsin · 2023-04-07T08:42:25Z

I did try out, but I high load remain when show_age_threshold = -1
EDIT: retested after #1158 (comment), still no change.
Xorg goes to 64% and dunst to 36% 'ish

fifididi · 2023-04-10T11:19:26Z

I'm also experiencing this, version 1.9.1 🥺

High CPU noticed after coming back to my laptop after a night, only one notification was enough to trigger it, though the notification stayed alive for hours.

bakkeby · 2023-04-11T21:44:17Z

As per the comment in the dunst config file the show_age_threshold can be set to -1 to disable the display of the age of a message.

In queues_get_next_datachange if there are no messages that are timing out (i.e. they are permanent until the user interacts with them) then the function will return -1.

dunst/src/queues.c

Line 589 in 464076d

return wakeup_time != G_MAXINT64 ? wakeup_time : -1;

Now in the run function we do not take into account that the return value can be -1 indicating that there is no need to do any further updates.

dunst/src/dunst.c

Lines 95 to 115 in 464076d

    
           if (active) { 
        
                   gint64 timeout_at = queues_get_next_datachange(now); 
        
                   // Previous computations may have taken time, update `now` 
        
                   // This might mean that `timeout_at` is now before `now`, so 
        
                   // we have to make sure that `sleep` is still positive. 
        
                   now = time_monotonic_now(); 
        
                   gint64 sleep = timeout_at - now; 
        
                   sleep = MAX(sleep, 1000); // Sleep at least 1ms 
        
                   LOG_D("Sleeping for %li ms", sleep/1000); 
        
                   if (sleep >= 0) { 
        
                           if (reason == 0 || next_timeout < now || timeout_at < next_timeout) { 
        
                                   if (next_timeout != 0) { 
        
                                           g_source_remove(next_timeout_id); 
        
                                   } 
        
                                   next_timeout_id = g_timeout_add(sleep/1000, run, NULL); 
        
                                   next_timeout = timeout_at; 
        
                           } 
        
                   } 
        
           }

The solution can be as simple as bailing if that is the case:

         if (active) {
                 gint64 timeout_at = queues_get_next_datachange(now);
+                if (timeout_at == -1)
+                        return G_SOURCE_REMOVE;
                 // Previous computations may have taken time, update `now`
                 // This might mean that `timeout_at` is now before `now`, so
                 // we have to make sure that `sleep` is still positive.
                 now = time_monotonic_now();
                 gint64 sleep = timeout_at - now;
                 sleep = MAX(sleep, 1000); // Sleep at least 1ms
 
                 LOG_D("Sleeping for %li ms", sleep/1000);
 
                 if (sleep >= 0) {
                         if (reason == 0 || next_timeout < now || timeout_at < next_timeout) {
                                 if (next_timeout != 0) {
                                         g_source_remove(next_timeout_id);
                                 }
                                 next_timeout_id = g_timeout_add(sleep/1000, run, NULL);
                                 next_timeout = timeout_at;
                         }
                 }
         }

(there are other ways to refactor the code to the same effect of course)

Example test scenario is to:

set show_age_threshold to -1 in the dunstrc configuration file and
run top -p $(pidof dunst) to monitor CPU usage and
run notify-send -u critical hello

Expected behaviour without the change above:

CPU usage increases drastically while the notification is displayed
the notification may flicker

Expected behaviour with the change above:

CPU usage remains at 0.0% and
there are no unwanted side effects (i.e. new notifications come and go as normal, one can interact with the critical notification as normal)

fwsmit · 2023-04-12T11:27:07Z

@bakkeby thank you for taking a look. Your fix seems to work indeed. But I believe the conversation is being muddied up by discussing 2 different issues here. There is the issue of high CPU usage when show_age_threshold > 0 and when show_age_threshold = -1. The last one should be discussed in a separate issue which I have made now: #1163

fwsmit · 2023-04-12T11:30:56Z

As for this issue, I'm unable to reproduce it with show_age_threshold > 0

fwsmit · 2023-04-16T19:49:12Z

Could anyone see if #1158 fixes this issue? As I said above, I cannot reproduce the issue, so I cannot test the fix

fifididi · 2023-04-16T23:38:53Z

Could anyone see if #1158 fixes this issue? As I said above, I cannot reproduce the issue, so I cannot test the fix

I have managed to simulate (reproduce) the bug by forcing an idle state

static bool queues_notification_is_finished(struct notification *n, struct dunst_status status, gint64 time)
{
        assert(n);

        if (n->skip_display && !n->redisplayed)
                return true;

        if (n->timeout == 0) // sticky
                return false;

-        bool is_idle = status.fullscreen ? false : status.idle;
+        bool is_idle = true;

        /* don't timeout when user is idle */
        if (is_idle && !n->transient) {
                n->start = time_monotonic_now();
                return false;
        }

...
}

and setting the timestamp value to a high number to simulate time elapsed

gint64 queues_get_next_datachange(gint64 time)
{
        gint64 wakeup_time = G_MAXINT64;
        gint64 next_second = time + S2US(1) - (time % S2US(1));

        for (GList *iter = g_queue_peek_head_link(displayed); iter;
                        iter = iter->next) {
                struct notification *n = iter->data;
+               n->timestamp = 0xFFFFFFFF;

                gint64 timeout_ts = n->timestamp + n->timeout;

                if (n->timeout > 0 && n->locked == 0) {
                        if (timeout_ts > time)
                                wakeup_time = MIN(wakeup_time, timeout_ts);

With that code, a simple notification is enough to trigger CPU hogging (the compositor [picom in my case] and dunst compete for cpu cycles). Note: it's important that the specific timeout value for the notification causing CPU hogging to be non-zero. If the timeout related to the severity of the notification is zero, CPU hogging doesn't appear to happen.

The exhibiting behaviour is the same as the one described in this issue and the one I seemed to have experienced. I have tested #1158 and it appears to fix the issue.

bakkeby · 2023-04-17T09:39:52Z

I had a closer look and I believe that I have an explanation for what is going on and why this change is the right fix for this issue.

                 struct notification *n = iter->data;
-                gint64 timeout_ts = n->timestamp + n->timeout;
+                gint64 timeout_ts = n->start + n->timeout;

                 if (n->timeout > 0 && n->locked == 0) {

n->timestamp represents the time dunst received the notification
n->start represents the time dunst displayed the notification

In the dunst configuration you can specify how long a notification is to be shown before it is automatically removed.

I would suggest setting the timeout to 5 seconds to make reproducing this issue quicker (I think this is set to 60 seconds by default).

[urgency_normal]
    timeout = 5

If the user receives more than a certain number of notifications then they will be queued to be displayed later and at the bottom of the list of notifications shown there will be a piece of text showing how many more notifications are pending e.g. (31 more).

Now let's say that we create a lot of notifications using this for loop as mentioned earlier in this issue.

for i in $(seq 1 250); do notify-send "Test $i" "Test :)"; done

You can monitor dunst CPU usage using top like this:

top -p $(pidof dunst)

The CPU usage will be low. At some point the timeout for the currently displayed notifications will trigger and the notifications will be removed.

At this point the 19 (in my case) next notifications from the queue are displayed. These notifications will again be subject to a delay of 5 seconds before these are removed.

In queues_get_next_datachange we calculate the next timeout based on when the notification was received by dunst, not when the notification was first displayed.

                gint64 timeout_ts = n->timestamp + n->timeout;

Given that the notifications were received roughly at the same time and these new notifications had to wait 5 seconds before they could get their turn the timeout for this second batch of notifications have already expired.

This means that we always end up hitting the else in this block and returning the current time.

                if (n->timeout > 0 && n->locked == 0) {
                        if (timeout_ts > time)
                                wakeup_time = MIN(wakeup_time, timeout_ts);
                        else
                                // while we're processing or while locked, the notification already timed out
                                return time;
                }

In the calling run function we sleep for the minimum of one millisecond in this scenario, resulting in high CPU usage that lasts until all notifications have timed out.

By calculating the timeout based on when the notification was first displayed the notifications are removed and replaced by new ones from the queue without the elevated CPU usage.

                gint64 timeout_ts = n->start + n->timeout;

Ultimately what this boils down to is that this issue does not have anything to do with the show_age_threshold setting.

fwsmit · 2023-04-17T10:10:43Z

Thanks for investigating and making a clear explanation. This doesn't explain the original report, but it does explain the regression in 1.9.1, so it might be a separate issue from the original report again 😆
I'd suggest fixing the n->timestamp and n->start mixup and release a new version.

bakkeby · 2023-04-17T10:26:32Z

@fwsmit the original report is in the context of the idle_threshold as far as I can see.

To reproduce set normal timeout to 5 seconds.

[urgency_normal]
    timeout = 5

Set idle threshold to 5 seconds so that you don't have to wait for too long.

[global]
    idle_threshold = 5

Then start notifications with a 2 second pause between each notification.

for i in $(seq 1 19); do notify-send "Test $i" "Test :)"; sleep 2; done

Once the 5 second threshold is passed for the first notification it will run into the same scenario of calculating an already expired timeout, but the notification will not be removed because the user is idle. The CPU usage will rise at this point.

The same fix addresses this issue as well because the n->start will continuously shift as long as the user is idle, as seen here:

/**
 * Check if a notification has timed out
 *
 * @param n the notification to check
 * @param status the current status of dunst
 * @param time the current time
 * @retval true: the notification is timed out
 * @retval false: otherwise
 */
static bool queues_notification_is_finished(struct notification *n, struct dunst_status status, gint64 time)
{
...
        /* don't timeout when user is idle */
        if (is_idle && !n->transient) {
                n->start = time_monotonic_now();
                return false;
        }
...

bakkeby · 2023-04-17T10:47:13Z

I think there is an argument for changing the n->timestamp to n->start in the else here as well.

                if (settings.show_age_threshold >= 0) {
                        gint64 age = time - n->timestamp;

                        if (age > settings.show_age_threshold - S2US(1)) {
                                /* Notification age should be updated -- sleep
                                 * until the next turn of second.
                                 * This ensures that all notifications' ages
                                 * will change at once, and that at most one
                                 * update will occur each second for this
                                 * purpose. */
                                wakeup_time = MIN(wakeup_time, next_second);
                        }
                        else
                                wakeup_time = MIN(wakeup_time, n->timestamp + settings.show_age_threshold);
                }

I'll see if I can reproduce an issue with that when idle_threshold is exceeded.

fwsmit · 2023-04-17T12:22:02Z

I have it clear now. This issue was originally about lots of notifications idling and then updating their timestamps individually. Then it was fixed with #1140, but this introduced a regression that got confused for this issue. A separate issue (#1163) was also reported here and fixed afterwards.

So this issue is fixed by Floor timestamp to the second when displaying age #1140, but this introduced a regression.
The regression is now tracked in issue Regression by mixup of n->timestamp and n->start #1166
A separate issue High cpu usage when show_age_threshold = -1. #1163 also resulted in high cpu usage and was fixed in High CPU usage when show_age_threshold = -1 #1164

bakkeby · 2023-04-17T12:26:46Z

Just to follow I was not able to reproduce any issues specifically with regards to the show_age_threshold setting being > 0.

The use of n->timestamp in the context of the age threshold is fine as far as I can see because it is also used when calculating the age of the notification.

The way it is implemented now looks correct to me as if you have notifications pending in the queue that are older than the age threshold then those will start showing the age of the notification as soon as they are shown - rather than having to wait another 60 seconds or so before showing that the notification is more than three minutes old.

tobast added a commit to tobast/dunst that referenced this issue Feb 9, 2023

Floor timestamp to the second when displaying age

5ec12a7

Fixes dunst-project#1102

tobast mentioned this issue Feb 9, 2023

Floor timestamp to the second when displaying age #1140

Merged

tobast added a commit to tobast/dunst that referenced this issue Feb 10, 2023

Sleep until next turn of second to update age

6795da7

Fixes dunst-project#1102

tobast added a commit to tobast/dunst that referenced this issue Feb 10, 2023

Sleep until next turn of second to update age

cf4494e

Fixes dunst-project#1102

tobast added a commit to tobast/dunst that referenced this issue Feb 10, 2023

Sleep until next turn of second to update age

27d6d39

Fixes dunst-project#1102

fwsmit closed this as completed in 73c54bd Mar 8, 2023

gvolpe mentioned this issue Mar 31, 2023

dunst: revert to v1.9.0 gvolpe/nix-config#213

Merged

fwsmit reopened this Mar 31, 2023

biopsin mentioned this issue Apr 1, 2023

dunst: update to 1.9.2. void-linux/void-packages#42756

Merged

fwsmit added the Bug label Apr 3, 2023

tobast mentioned this issue Apr 4, 2023

Get_next_datachange: use start to compute timeout #1158

Merged

fwsmit changed the title ~~100% CPU usage while notification shown if idle and idle_treshold > 0~~ High CPU usage with show_age_threshold > 0 Apr 12, 2023

rcbellN51 mentioned this issue Apr 14, 2023

Dunst notifications flicker after unpausing (X11 + compositor) #1159

Open

fwsmit changed the title ~~High CPU usage with show_age_threshold > 0~~ High CPU usage when notifications are idle Apr 17, 2023

fwsmit changed the title ~~High CPU usage when notifications are idle~~ High CPU usage for updating notification timestamps Apr 17, 2023

fwsmit closed this as completed Apr 17, 2023

fwsmit mentioned this issue Apr 17, 2023

Regression by mixup of n->timestamp and n->start #1166

Closed

fwsmit mentioned this issue Apr 17, 2023

Add tests for issues with too many queue wakeups #1168

Open

3 tasks

High CPU usage for updating notification timestamps #1102

High CPU usage for updating notification timestamps #1102

Comments

mustaqimM commented Aug 20, 2022

Issue description

Installation info

fwsmit commented Aug 20, 2022

mustaqimM commented Aug 20, 2022

fwsmit commented Aug 20, 2022

mustaqimM commented Aug 21, 2022

jdrowell commented Feb 2, 2023

fwsmit commented Feb 2, 2023

jdrowell commented Feb 2, 2023

tobast commented Feb 8, 2023 • edited Loading

biopsin commented Mar 16, 2023 • edited Loading

tobast commented Mar 28, 2023

stephan49 commented Mar 30, 2023 • edited Loading

tobast commented Mar 31, 2023 • edited Loading

gvolpe commented Mar 31, 2023

fwsmit commented Mar 31, 2023

tobast commented Mar 31, 2023

fwsmit commented Mar 31, 2023

danniel3 commented Apr 3, 2023

fwsmit commented Apr 3, 2023

tobast commented Apr 3, 2023 • edited Loading

fwsmit commented Apr 3, 2023

bebehei commented Apr 3, 2023

jdrowell commented Apr 4, 2023

tobast commented Apr 4, 2023 • edited Loading

fwsmit commented Apr 4, 2023

bakkeby commented Apr 4, 2023

fwsmit commented Apr 4, 2023

bakkeby commented Apr 4, 2023

fwsmit commented Apr 5, 2023

biopsin commented Apr 7, 2023 • edited Loading

fifididi commented Apr 10, 2023 • edited Loading

bakkeby commented Apr 11, 2023

fwsmit commented Apr 12, 2023 • edited Loading

fwsmit commented Apr 12, 2023

fwsmit commented Apr 16, 2023

fifididi commented Apr 16, 2023 • edited Loading

bakkeby commented Apr 17, 2023

fwsmit commented Apr 17, 2023

bakkeby commented Apr 17, 2023

bakkeby commented Apr 17, 2023

fwsmit commented Apr 17, 2023 • edited Loading

bakkeby commented Apr 17, 2023

tobast commented Feb 8, 2023 •

edited

Loading

biopsin commented Mar 16, 2023 •

edited

Loading

stephan49 commented Mar 30, 2023 •

edited

Loading

tobast commented Mar 31, 2023 •

edited

Loading

tobast commented Apr 3, 2023 •

edited

Loading

tobast commented Apr 4, 2023 •

edited

Loading

biopsin commented Apr 7, 2023 •

edited

Loading

fifididi commented Apr 10, 2023 •

edited

Loading

fwsmit commented Apr 12, 2023 •

edited

Loading

fifididi commented Apr 16, 2023 •

edited

Loading

fwsmit commented Apr 17, 2023 •

edited

Loading