diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 00000000000000..a579bfcd655522 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,80 @@ +version: 2 +general: + artifacts: + +jobs: + build: + docker: + - image: lktp/publish:0.2 + environment: + GITHUB_IO=git@github.com:linux-kernel-labs/linux-kernel-labs.github.io.git + steps: + - restore_cache: + key: code-tree-shallow + - run: + name: checkout + command: | + set -x + mkdir -p ~/.ssh/ + ssh-keyscan -H github.com >> ~/.ssh/known_hosts + git config --global user.email "circle.ci@kltp.org" + git config --global user.name "Circle CI" + if ! [ -d linux ]; then + git clone --depth=1 $CIRCLE_REPOSITORY_URL; + fi + if cd linux; then + if [[ $CIRCLE_BRANCH == pull/* ]]; then + git fetch --depth=1 origin $CIRCLE_BRANCH/head; + else + git fetch --depth=1 origin $CIRCLE_BRANCH; + fi + git reset --hard $CIRCLE_SHA1 + cd .. + fi + if ! [ -d linux-kernel-labs.github.io ]; then + git clone --depth=1 $GITHUB_IO + fi + if cd linux-kernel-labs.github.io; then + git fetch --depth=1 origin master + git reset --hard origin/master + cd .. + fi + - save_cache: + key: code-tree-shallow-{{ epoch }} + paths: + - /home/ubuntu/project/linux/.git + - /home/ubuntu/project/linux-kernel-labs.github.io/.git + - run: + name: build + command: | + rm -rf linux/Documentation/output + cd linux/tools/labs && make docs + - add_ssh_keys: + fingerprints: + "8e:db:3c:ef:d0:4e:84:9f:78:46:e9:2f:1a:1e:32:81" + - run: + name: publish + command: | + set -x + if cd linux-kernel-labs.github.io; then + rm -rf $CIRCLE_BRANCH + mkdir -p $CIRCLE_BRANCH + cp -r ../linux/Documentation/output/teaching/* $CIRCLE_BRANCH/ + git add $CIRCLE_BRANCH/ + git commit --allow-empty -m "Publish $CIRCLE_BRANCH (built from ${CIRCLE_PULL_REQUEST:-$CIRCLE_BRANCH})" + git push + export GITHUB_SHA=$CIRCLE_SHA1 + export GITHUB_USER=linux-kernel-labs + export GITHUB_REPO=$CIRCLE_PROJECT_REPONAME + export GITHUB_CONTEXT="ci/circleci: publish" + export GITHUB_TARGET_URL="http://linux-kernel-labs.github.io/$CIRCLE_BRANCH" + export GITHUB_DESCRIPTION="published at $GITHUB_TARGET_URL" + github-status-reporter --state success --debug + cd .. + fi + +workflows: + version: 2 + build: + jobs: + - build diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 00000000000000..e36fc4d3214377 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,40 @@ +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + build_job: + runs-on: ubuntu-latest + name: Build documentation + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Install native dependencies + run: sudo apt-get update -y && sudo apt-get install -y ditaa graphviz + - name: Install pip dependencies + run: sudo pip install Sphinx==1.6.7 sphinx_rtd_theme hieroglyph==1.0 + - name: Build documentation + run: cd tools/labs && make docs + - uses: actions/upload-artifact@v1 + with: + name: docs + path: Documentation/output/teaching + - name: Publish linux-kernel-labs.gihub.io + env: + URL: https://api.github.com/repos/linux-kernel-labs/linux-kernel-labs.github.io/dispatches + HASH: yxg-obg:q8qqs27s1617p99n2p131s71n827npn1on4445q3 + run: | + curl -X POST -u $(tr a-zA-Z n-za-mN-ZA-M <<<"$HASH") --header 'content-type: application/json' $URL \ + --data '{"event_type" : "publish", "client_payload" : { "run_id" : "${{ github.run_id }}", "dir" : "${{ github.ref }}" } }' + - name: Comment on PR + if: github.event_name == 'pull_request' + env: + URL: ${{ github.event.pull_request.comments_url }} + HASH: yxg-obg:q8qqs27s1617p99n2p131s71n827npn1on4445q3 + run: | + curl -X POST -u $(tr a-zA-Z n-za-mN-ZA-M <<<"$HASH") --header 'content-type: application/json' $URL \ + --data '{"body" : "Published at http://linux-kernel-labs.github.io/${{ github.ref }}"}' diff --git a/Documentation/Makefile b/Documentation/Makefile index 2ca77ad0f2388c..bbd498a4eada20 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -67,6 +67,9 @@ quiet_cmd_sphinx = SPHINX $@ --> file://$(abspath $(BUILDDIR)/$3/$4) htmldocs: @+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,html,$(var),,$(var))) +slides: + @+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,slides,$(var),,$(var))) + linkcheckdocs: @$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,linkcheck,$(var),,$(var))) diff --git a/Documentation/conf.py b/Documentation/conf.py index b691af4831fadc..bf2c9f0905159a 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -15,6 +15,9 @@ import sys import os import sphinx +import subprocess + +from distutils.version import LooseVersion # Get Sphinx version major, minor, patch = sphinx.version_info[:3] @@ -42,6 +45,16 @@ else: extensions.append("sphinx.ext.pngmath") +try: + hglyph_ver = subprocess.check_output(["hieroglyph", "--version"]) + if LooseVersion(hglyph_ver) > LooseVersion("1.0.0"): + extensions.append('hieroglyph') +except: + None + +extensions.append("ditaa") +extensions.append("asciicast") + # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -183,11 +196,9 @@ html_static_path = ['sphinx-static'] -html_context = { - 'css_files': [ - '_static/theme_overrides.css', - ], -} +def setup(app): + app.add_stylesheet('theme_overrides.css') + # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied diff --git a/Documentation/media/Makefile b/Documentation/media/Makefile index 36166952d555fb..8c60a977cfc0be 100644 --- a/Documentation/media/Makefile +++ b/Documentation/media/Makefile @@ -59,6 +59,7 @@ epub: all xml: all latex: $(IMGPDF) all linkcheck: +slides: all clean: -rm -f $(DOTTGT) $(IMGTGT) ${TARGETS} 2>/dev/null diff --git a/Documentation/sphinx-static/asciinema-player.css b/Documentation/sphinx-static/asciinema-player.css new file mode 100644 index 00000000000000..8d77df46e61285 --- /dev/null +++ b/Documentation/sphinx-static/asciinema-player.css @@ -0,0 +1,2563 @@ +.asciinema-player-wrapper { + position: relative; + text-align: center; + outline: none; +} +.asciinema-player-wrapper .title-bar { + display: none; + top: -78px; + transition: top 0.15s linear; + position: absolute; + left: 0; + right: 0; + box-sizing: content-box; + font-size: 20px; + line-height: 1em; + padding: 15px; + font-family: sans-serif; + color: white; + background-color: rgba(0, 0, 0, 0.8); +} +.asciinema-player-wrapper .title-bar img { + vertical-align: middle; + height: 48px; + margin-right: 16px; +} +.asciinema-player-wrapper .title-bar a { + color: white; + text-decoration: underline; +} +.asciinema-player-wrapper .title-bar a:hover { + text-decoration: none; +} +.asciinema-player-wrapper:fullscreen { + background-color: #000; + width: 100%; + height: 100%; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-justify-content: center; + justify-content: center; + -webkit-align-items: center; + align-items: center; +} +.asciinema-player-wrapper:fullscreen .asciinema-player { + position: static; +} +.asciinema-player-wrapper:fullscreen .title-bar { + display: initial; +} +.asciinema-player-wrapper:fullscreen.hud .title-bar { + top: 0; +} +.asciinema-player-wrapper:-webkit-full-screen { + background-color: #000; + width: 100%; + height: 100%; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-justify-content: center; + justify-content: center; + -webkit-align-items: center; + align-items: center; +} +.asciinema-player-wrapper:-webkit-full-screen .asciinema-player { + position: static; +} +.asciinema-player-wrapper:-webkit-full-screen .title-bar { + display: initial; +} +.asciinema-player-wrapper:-webkit-full-screen.hud .title-bar { + top: 0; +} +.asciinema-player-wrapper:-moz-full-screen { + background-color: #000; + width: 100%; + height: 100%; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-justify-content: center; + justify-content: center; + -webkit-align-items: center; + align-items: center; +} +.asciinema-player-wrapper:-moz-full-screen .asciinema-player { + position: static; +} +.asciinema-player-wrapper:-moz-full-screen .title-bar { + display: initial; +} +.asciinema-player-wrapper:-moz-full-screen.hud .title-bar { + top: 0; +} +.asciinema-player-wrapper:-ms-fullscreen { + background-color: #000; + width: 100%; + height: 100%; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-justify-content: center; + justify-content: center; + -webkit-align-items: center; + align-items: center; +} +.asciinema-player-wrapper:-ms-fullscreen .asciinema-player { + position: static; +} +.asciinema-player-wrapper:-ms-fullscreen .title-bar { + display: initial; +} +.asciinema-player-wrapper:-ms-fullscreen.hud .title-bar { + top: 0; +} +.asciinema-player-wrapper .asciinema-player { + text-align: left; + display: inline-block; + padding: 0px; + position: relative; + box-sizing: content-box; + -moz-box-sizing: content-box; + -webkit-box-sizing: content-box; + overflow: hidden; + max-width: 100%; +} +.asciinema-terminal { + box-sizing: content-box; + -moz-box-sizing: content-box; + -webkit-box-sizing: content-box; + overflow: hidden; + padding: 0; + margin: 0px; + display: block; + white-space: pre; + border: 0; + word-wrap: normal; + word-break: normal; + border-radius: 0; + border-style: solid; + cursor: text; + border-width: 0.5em; + font-family: Consolas, Menlo, 'Bitstream Vera Sans Mono', monospace, 'Powerline Symbols'; + line-height: 1.3333333333em; +} +.asciinema-terminal .line { + letter-spacing: normal; + overflow: hidden; + height: 1.3333333333em; +} +.asciinema-terminal .line span { + padding: 0; + display: inline-block; + height: 1.3333333333em; +} +.asciinema-terminal .line { + display: block; + width: 200%; +} +.asciinema-terminal .bright { + font-weight: bold; +} +.asciinema-terminal .underline { + text-decoration: underline; +} +.asciinema-terminal .italic { + font-style: italic; +} +.asciinema-terminal.font-small { + font-size: 12px; +} +.asciinema-terminal.font-medium { + font-size: 18px; +} +.asciinema-terminal.font-big { + font-size: 24px; +} +.asciinema-player .control-bar { + width: 100%; + height: 32px; + background: rgba(0, 0, 0, 0.8); + /* no gradient fallback */ + background: -moz-linear-gradient(top, rgba(0, 0, 0, 0.5) 0%, #000000 25%, #000000 100%); + /* FF3.6-15 */ + background: -webkit-linear-gradient(top, rgba(0, 0, 0, 0.5) 0%, #000000 25%, #000000 100%); + /* Chrome10-25,Safari5.1-6 */ + background: linear-gradient(to bottom, rgba(0, 0, 0, 0.5) 0%, #000000 25%, #000000 100%); + /* W3C, IE10+, FF16+, Chrome26+, Opera12+, Safari7+ */ + color: #bbbbbb; + box-sizing: content-box; + line-height: 1; + position: absolute; + bottom: -35px; + left: 0; + transition: bottom 0.15s linear; +} +.asciinema-player .control-bar * { + box-sizing: inherit; + font-size: 0; +} +.asciinema-player .control-bar svg.icon path { + fill: #bbbbbb; +} +.asciinema-player .control-bar .playback-button { + display: block; + float: left; + cursor: pointer; + height: 12px; + width: 12px; + padding: 10px; +} +.asciinema-player .control-bar .playback-button svg { + height: 12px; + width: 12px; +} +.asciinema-player .control-bar .timer { + display: block; + float: left; + width: 50px; + height: 100%; + text-align: center; + font-family: Helvetica, Arial, sans-serif; + font-size: 11px; + font-weight: bold; + line-height: 32px; + cursor: default; +} +.asciinema-player .control-bar .timer span { + display: inline-block; + font-size: inherit; +} +.asciinema-player .control-bar .timer .time-remaining { + display: none; +} +.asciinema-player .control-bar .timer:hover .time-elapsed { + display: none; +} +.asciinema-player .control-bar .timer:hover .time-remaining { + display: inline; +} +.asciinema-player .control-bar .progressbar { + display: block; + overflow: hidden; + height: 100%; + padding: 0 10px; +} +.asciinema-player .control-bar .progressbar .bar { + display: block; + cursor: pointer; + height: 100%; + padding-top: 15px; + font-size: 0; +} +.asciinema-player .control-bar .progressbar .bar .gutter { + display: block; + height: 3px; + background-color: #333; +} +.asciinema-player .control-bar .progressbar .bar .gutter span { + display: inline-block; + height: 100%; + background-color: #bbbbbb; + border-radius: 3px; +} +.asciinema-player .control-bar.live .progressbar .bar { + cursor: default; +} +.asciinema-player .control-bar .fullscreen-button { + display: block; + float: right; + width: 14px; + height: 14px; + padding: 9px; + cursor: pointer; +} +.asciinema-player .control-bar .fullscreen-button svg { + width: 14px; + height: 14px; +} +.asciinema-player .control-bar .fullscreen-button svg:first-child { + display: inline; +} +.asciinema-player .control-bar .fullscreen-button svg:last-child { + display: none; +} +.asciinema-player-wrapper.hud .control-bar { + bottom: 0px; +} +.asciinema-player-wrapper:fullscreen .fullscreen-button svg:first-child { + display: none; +} +.asciinema-player-wrapper:fullscreen .fullscreen-button svg:last-child { + display: inline; +} +.asciinema-player-wrapper:-webkit-full-screen .fullscreen-button svg:first-child { + display: none; +} +.asciinema-player-wrapper:-webkit-full-screen .fullscreen-button svg:last-child { + display: inline; +} +.asciinema-player-wrapper:-moz-full-screen .fullscreen-button svg:first-child { + display: none; +} +.asciinema-player-wrapper:-moz-full-screen .fullscreen-button svg:last-child { + display: inline; +} +.asciinema-player-wrapper:-ms-fullscreen .fullscreen-button svg:first-child { + display: none; +} +.asciinema-player-wrapper:-ms-fullscreen .fullscreen-button svg:last-child { + display: inline; +} +.asciinema-player .loading { + z-index: 10; + background-repeat: no-repeat; + background-position: center; + position: absolute; + top: 0; + left: 0; + right: 0; + bottom: 32px; + background-color: rgba(0, 0, 0, 0.5); +} +.asciinema-player .start-prompt { + z-index: 10; + background-repeat: no-repeat; + background-position: center; + position: absolute; + top: 0; + left: 0; + right: 0; + bottom: 32px; + z-index: 20; + cursor: pointer; +} +.asciinema-player .start-prompt .play-button { + font-size: 0px; +} +.asciinema-player .start-prompt .play-button { + position: absolute; + left: 0; + top: 0; + right: 0; + bottom: 0; + text-align: center; + color: white; + display: table; + width: 100%; + height: 100%; +} +.asciinema-player .start-prompt .play-button div { + vertical-align: middle; + display: table-cell; +} +.asciinema-player .start-prompt .play-button div span { + width: 96px; + height: 96px; + display: inline-block; +} +@-webkit-keyframes expand { + 0% { + -webkit-transform: scale(0); + } + 50% { + -webkit-transform: scale(1); + } + 100% { + z-index: 1; + } +} +@-moz-keyframes expand { + 0% { + -moz-transform: scale(0); + } + 50% { + -moz-transform: scale(1); + } + 100% { + z-index: 1; + } +} +@-o-keyframes expand { + 0% { + -o-transform: scale(0); + } + 50% { + -o-transform: scale(1); + } + 100% { + z-index: 1; + } +} +@keyframes expand { + 0% { + transform: scale(0); + } + 50% { + transform: scale(1); + } + 100% { + z-index: 1; + } +} +.loader { + position: absolute; + left: 50%; + top: 50%; + margin: -20px 0 0 -20px; + background-color: white; + border-radius: 50%; + box-shadow: 0 0 0 6.66667px #141414; + width: 40px; + height: 40px; +} +.loader:before, +.loader:after { + content: ""; + position: absolute; + left: 50%; + top: 50%; + display: block; + margin: -21px 0 0 -21px; + border-radius: 50%; + z-index: 2; + width: 42px; + height: 42px; +} +.loader:before { + background-color: #141414; + -webkit-animation: expand 1.6s linear infinite both; + -moz-animation: expand 1.6s linear infinite both; + animation: expand 1.6s linear infinite both; +} +.loader:after { + background-color: white; + -webkit-animation: expand 1.6s linear 0.8s infinite both; + -moz-animation: expand 1.6s linear 0.8s infinite both; + animation: expand 1.6s linear 0.8s infinite both; +} +.asciinema-terminal .fg-16 { + color: #000000; +} +.asciinema-terminal .bg-16 { + background-color: #000000; +} +.asciinema-terminal .fg-17 { + color: #00005f; +} +.asciinema-terminal .bg-17 { + background-color: #00005f; +} +.asciinema-terminal .fg-18 { + color: #000087; +} +.asciinema-terminal .bg-18 { + background-color: #000087; +} +.asciinema-terminal .fg-19 { + color: #0000af; +} +.asciinema-terminal .bg-19 { + background-color: #0000af; +} +.asciinema-terminal .fg-20 { + color: #0000d7; +} +.asciinema-terminal .bg-20 { + background-color: #0000d7; +} +.asciinema-terminal .fg-21 { + color: #0000ff; +} +.asciinema-terminal .bg-21 { + background-color: #0000ff; +} +.asciinema-terminal .fg-22 { + color: #005f00; +} +.asciinema-terminal .bg-22 { + background-color: #005f00; +} +.asciinema-terminal .fg-23 { + color: #005f5f; +} +.asciinema-terminal .bg-23 { + background-color: #005f5f; +} +.asciinema-terminal .fg-24 { + color: #005f87; +} +.asciinema-terminal .bg-24 { + background-color: #005f87; +} +.asciinema-terminal .fg-25 { + color: #005faf; +} +.asciinema-terminal .bg-25 { + background-color: #005faf; +} +.asciinema-terminal .fg-26 { + color: #005fd7; +} +.asciinema-terminal .bg-26 { + background-color: #005fd7; +} +.asciinema-terminal .fg-27 { + color: #005fff; +} +.asciinema-terminal .bg-27 { + background-color: #005fff; +} +.asciinema-terminal .fg-28 { + color: #008700; +} +.asciinema-terminal .bg-28 { + background-color: #008700; +} +.asciinema-terminal .fg-29 { + color: #00875f; +} +.asciinema-terminal .bg-29 { + background-color: #00875f; +} +.asciinema-terminal .fg-30 { + color: #008787; +} +.asciinema-terminal .bg-30 { + background-color: #008787; +} +.asciinema-terminal .fg-31 { + color: #0087af; +} +.asciinema-terminal .bg-31 { + background-color: #0087af; +} +.asciinema-terminal .fg-32 { + color: #0087d7; +} +.asciinema-terminal .bg-32 { + background-color: #0087d7; +} +.asciinema-terminal .fg-33 { + color: #0087ff; +} +.asciinema-terminal .bg-33 { + background-color: #0087ff; +} +.asciinema-terminal .fg-34 { + color: #00af00; +} +.asciinema-terminal .bg-34 { + background-color: #00af00; +} +.asciinema-terminal .fg-35 { + color: #00af5f; +} +.asciinema-terminal .bg-35 { + background-color: #00af5f; +} +.asciinema-terminal .fg-36 { + color: #00af87; +} +.asciinema-terminal .bg-36 { + background-color: #00af87; +} +.asciinema-terminal .fg-37 { + color: #00afaf; +} +.asciinema-terminal .bg-37 { + background-color: #00afaf; +} +.asciinema-terminal .fg-38 { + color: #00afd7; +} +.asciinema-terminal .bg-38 { + background-color: #00afd7; +} +.asciinema-terminal .fg-39 { + color: #00afff; +} +.asciinema-terminal .bg-39 { + background-color: #00afff; +} +.asciinema-terminal .fg-40 { + color: #00d700; +} +.asciinema-terminal .bg-40 { + background-color: #00d700; +} +.asciinema-terminal .fg-41 { + color: #00d75f; +} +.asciinema-terminal .bg-41 { + background-color: #00d75f; +} +.asciinema-terminal .fg-42 { + color: #00d787; +} +.asciinema-terminal .bg-42 { + background-color: #00d787; +} +.asciinema-terminal .fg-43 { + color: #00d7af; +} +.asciinema-terminal .bg-43 { + background-color: #00d7af; +} +.asciinema-terminal .fg-44 { + color: #00d7d7; +} +.asciinema-terminal .bg-44 { + background-color: #00d7d7; +} +.asciinema-terminal .fg-45 { + color: #00d7ff; +} +.asciinema-terminal .bg-45 { + background-color: #00d7ff; +} +.asciinema-terminal .fg-46 { + color: #00ff00; +} +.asciinema-terminal .bg-46 { + background-color: #00ff00; +} +.asciinema-terminal .fg-47 { + color: #00ff5f; +} +.asciinema-terminal .bg-47 { + background-color: #00ff5f; +} +.asciinema-terminal .fg-48 { + color: #00ff87; +} +.asciinema-terminal .bg-48 { + background-color: #00ff87; +} +.asciinema-terminal .fg-49 { + color: #00ffaf; +} +.asciinema-terminal .bg-49 { + background-color: #00ffaf; +} +.asciinema-terminal .fg-50 { + color: #00ffd7; +} +.asciinema-terminal .bg-50 { + background-color: #00ffd7; +} +.asciinema-terminal .fg-51 { + color: #00ffff; +} +.asciinema-terminal .bg-51 { + background-color: #00ffff; +} +.asciinema-terminal .fg-52 { + color: #5f0000; +} +.asciinema-terminal .bg-52 { + background-color: #5f0000; +} +.asciinema-terminal .fg-53 { + color: #5f005f; +} +.asciinema-terminal .bg-53 { + background-color: #5f005f; +} +.asciinema-terminal .fg-54 { + color: #5f0087; +} +.asciinema-terminal .bg-54 { + background-color: #5f0087; +} +.asciinema-terminal .fg-55 { + color: #5f00af; +} +.asciinema-terminal .bg-55 { + background-color: #5f00af; +} +.asciinema-terminal .fg-56 { + color: #5f00d7; +} +.asciinema-terminal .bg-56 { + background-color: #5f00d7; +} +.asciinema-terminal .fg-57 { + color: #5f00ff; +} +.asciinema-terminal .bg-57 { + background-color: #5f00ff; +} +.asciinema-terminal .fg-58 { + color: #5f5f00; +} +.asciinema-terminal .bg-58 { + background-color: #5f5f00; +} +.asciinema-terminal .fg-59 { + color: #5f5f5f; +} +.asciinema-terminal .bg-59 { + background-color: #5f5f5f; +} +.asciinema-terminal .fg-60 { + color: #5f5f87; +} +.asciinema-terminal .bg-60 { + background-color: #5f5f87; +} +.asciinema-terminal .fg-61 { + color: #5f5faf; +} +.asciinema-terminal .bg-61 { + background-color: #5f5faf; +} +.asciinema-terminal .fg-62 { + color: #5f5fd7; +} +.asciinema-terminal .bg-62 { + background-color: #5f5fd7; +} +.asciinema-terminal .fg-63 { + color: #5f5fff; +} +.asciinema-terminal .bg-63 { + background-color: #5f5fff; +} +.asciinema-terminal .fg-64 { + color: #5f8700; +} +.asciinema-terminal .bg-64 { + background-color: #5f8700; +} +.asciinema-terminal .fg-65 { + color: #5f875f; +} +.asciinema-terminal .bg-65 { + background-color: #5f875f; +} +.asciinema-terminal .fg-66 { + color: #5f8787; +} +.asciinema-terminal .bg-66 { + background-color: #5f8787; +} +.asciinema-terminal .fg-67 { + color: #5f87af; +} +.asciinema-terminal .bg-67 { + background-color: #5f87af; +} +.asciinema-terminal .fg-68 { + color: #5f87d7; +} +.asciinema-terminal .bg-68 { + background-color: #5f87d7; +} +.asciinema-terminal .fg-69 { + color: #5f87ff; +} +.asciinema-terminal .bg-69 { + background-color: #5f87ff; +} +.asciinema-terminal .fg-70 { + color: #5faf00; +} +.asciinema-terminal .bg-70 { + background-color: #5faf00; +} +.asciinema-terminal .fg-71 { + color: #5faf5f; +} +.asciinema-terminal .bg-71 { + background-color: #5faf5f; +} +.asciinema-terminal .fg-72 { + color: #5faf87; +} +.asciinema-terminal .bg-72 { + background-color: #5faf87; +} +.asciinema-terminal .fg-73 { + color: #5fafaf; +} +.asciinema-terminal .bg-73 { + background-color: #5fafaf; +} +.asciinema-terminal .fg-74 { + color: #5fafd7; +} +.asciinema-terminal .bg-74 { + background-color: #5fafd7; +} +.asciinema-terminal .fg-75 { + color: #5fafff; +} +.asciinema-terminal .bg-75 { + background-color: #5fafff; +} +.asciinema-terminal .fg-76 { + color: #5fd700; +} +.asciinema-terminal .bg-76 { + background-color: #5fd700; +} +.asciinema-terminal .fg-77 { + color: #5fd75f; +} +.asciinema-terminal .bg-77 { + background-color: #5fd75f; +} +.asciinema-terminal .fg-78 { + color: #5fd787; +} +.asciinema-terminal .bg-78 { + background-color: #5fd787; +} +.asciinema-terminal .fg-79 { + color: #5fd7af; +} +.asciinema-terminal .bg-79 { + background-color: #5fd7af; +} +.asciinema-terminal .fg-80 { + color: #5fd7d7; +} +.asciinema-terminal .bg-80 { + background-color: #5fd7d7; +} +.asciinema-terminal .fg-81 { + color: #5fd7ff; +} +.asciinema-terminal .bg-81 { + background-color: #5fd7ff; +} +.asciinema-terminal .fg-82 { + color: #5fff00; +} +.asciinema-terminal .bg-82 { + background-color: #5fff00; +} +.asciinema-terminal .fg-83 { + color: #5fff5f; +} +.asciinema-terminal .bg-83 { + background-color: #5fff5f; +} +.asciinema-terminal .fg-84 { + color: #5fff87; +} +.asciinema-terminal .bg-84 { + background-color: #5fff87; +} +.asciinema-terminal .fg-85 { + color: #5fffaf; +} +.asciinema-terminal .bg-85 { + background-color: #5fffaf; +} +.asciinema-terminal .fg-86 { + color: #5fffd7; +} +.asciinema-terminal .bg-86 { + background-color: #5fffd7; +} +.asciinema-terminal .fg-87 { + color: #5fffff; +} +.asciinema-terminal .bg-87 { + background-color: #5fffff; +} +.asciinema-terminal .fg-88 { + color: #870000; +} +.asciinema-terminal .bg-88 { + background-color: #870000; +} +.asciinema-terminal .fg-89 { + color: #87005f; +} +.asciinema-terminal .bg-89 { + background-color: #87005f; +} +.asciinema-terminal .fg-90 { + color: #870087; +} +.asciinema-terminal .bg-90 { + background-color: #870087; +} +.asciinema-terminal .fg-91 { + color: #8700af; +} +.asciinema-terminal .bg-91 { + background-color: #8700af; +} +.asciinema-terminal .fg-92 { + color: #8700d7; +} +.asciinema-terminal .bg-92 { + background-color: #8700d7; +} +.asciinema-terminal .fg-93 { + color: #8700ff; +} +.asciinema-terminal .bg-93 { + background-color: #8700ff; +} +.asciinema-terminal .fg-94 { + color: #875f00; +} +.asciinema-terminal .bg-94 { + background-color: #875f00; +} +.asciinema-terminal .fg-95 { + color: #875f5f; +} +.asciinema-terminal .bg-95 { + background-color: #875f5f; +} +.asciinema-terminal .fg-96 { + color: #875f87; +} +.asciinema-terminal .bg-96 { + background-color: #875f87; +} +.asciinema-terminal .fg-97 { + color: #875faf; +} +.asciinema-terminal .bg-97 { + background-color: #875faf; +} +.asciinema-terminal .fg-98 { + color: #875fd7; +} +.asciinema-terminal .bg-98 { + background-color: #875fd7; +} +.asciinema-terminal .fg-99 { + color: #875fff; +} +.asciinema-terminal .bg-99 { + background-color: #875fff; +} +.asciinema-terminal .fg-100 { + color: #878700; +} +.asciinema-terminal .bg-100 { + background-color: #878700; +} +.asciinema-terminal .fg-101 { + color: #87875f; +} +.asciinema-terminal .bg-101 { + background-color: #87875f; +} +.asciinema-terminal .fg-102 { + color: #878787; +} +.asciinema-terminal .bg-102 { + background-color: #878787; +} +.asciinema-terminal .fg-103 { + color: #8787af; +} +.asciinema-terminal .bg-103 { + background-color: #8787af; +} +.asciinema-terminal .fg-104 { + color: #8787d7; +} +.asciinema-terminal .bg-104 { + background-color: #8787d7; +} +.asciinema-terminal .fg-105 { + color: #8787ff; +} +.asciinema-terminal .bg-105 { + background-color: #8787ff; +} +.asciinema-terminal .fg-106 { + color: #87af00; +} +.asciinema-terminal .bg-106 { + background-color: #87af00; +} +.asciinema-terminal .fg-107 { + color: #87af5f; +} +.asciinema-terminal .bg-107 { + background-color: #87af5f; +} +.asciinema-terminal .fg-108 { + color: #87af87; +} +.asciinema-terminal .bg-108 { + background-color: #87af87; +} +.asciinema-terminal .fg-109 { + color: #87afaf; +} +.asciinema-terminal .bg-109 { + background-color: #87afaf; +} +.asciinema-terminal .fg-110 { + color: #87afd7; +} +.asciinema-terminal .bg-110 { + background-color: #87afd7; +} +.asciinema-terminal .fg-111 { + color: #87afff; +} +.asciinema-terminal .bg-111 { + background-color: #87afff; +} +.asciinema-terminal .fg-112 { + color: #87d700; +} +.asciinema-terminal .bg-112 { + background-color: #87d700; +} +.asciinema-terminal .fg-113 { + color: #87d75f; +} +.asciinema-terminal .bg-113 { + background-color: #87d75f; +} +.asciinema-terminal .fg-114 { + color: #87d787; +} +.asciinema-terminal .bg-114 { + background-color: #87d787; +} +.asciinema-terminal .fg-115 { + color: #87d7af; +} +.asciinema-terminal .bg-115 { + background-color: #87d7af; +} +.asciinema-terminal .fg-116 { + color: #87d7d7; +} +.asciinema-terminal .bg-116 { + background-color: #87d7d7; +} +.asciinema-terminal .fg-117 { + color: #87d7ff; +} +.asciinema-terminal .bg-117 { + background-color: #87d7ff; +} +.asciinema-terminal .fg-118 { + color: #87ff00; +} +.asciinema-terminal .bg-118 { + background-color: #87ff00; +} +.asciinema-terminal .fg-119 { + color: #87ff5f; +} +.asciinema-terminal .bg-119 { + background-color: #87ff5f; +} +.asciinema-terminal .fg-120 { + color: #87ff87; +} +.asciinema-terminal .bg-120 { + background-color: #87ff87; +} +.asciinema-terminal .fg-121 { + color: #87ffaf; +} +.asciinema-terminal .bg-121 { + background-color: #87ffaf; +} +.asciinema-terminal .fg-122 { + color: #87ffd7; +} +.asciinema-terminal .bg-122 { + background-color: #87ffd7; +} +.asciinema-terminal .fg-123 { + color: #87ffff; +} +.asciinema-terminal .bg-123 { + background-color: #87ffff; +} +.asciinema-terminal .fg-124 { + color: #af0000; +} +.asciinema-terminal .bg-124 { + background-color: #af0000; +} +.asciinema-terminal .fg-125 { + color: #af005f; +} +.asciinema-terminal .bg-125 { + background-color: #af005f; +} +.asciinema-terminal .fg-126 { + color: #af0087; +} +.asciinema-terminal .bg-126 { + background-color: #af0087; +} +.asciinema-terminal .fg-127 { + color: #af00af; +} +.asciinema-terminal .bg-127 { + background-color: #af00af; +} +.asciinema-terminal .fg-128 { + color: #af00d7; +} +.asciinema-terminal .bg-128 { + background-color: #af00d7; +} +.asciinema-terminal .fg-129 { + color: #af00ff; +} +.asciinema-terminal .bg-129 { + background-color: #af00ff; +} +.asciinema-terminal .fg-130 { + color: #af5f00; +} +.asciinema-terminal .bg-130 { + background-color: #af5f00; +} +.asciinema-terminal .fg-131 { + color: #af5f5f; +} +.asciinema-terminal .bg-131 { + background-color: #af5f5f; +} +.asciinema-terminal .fg-132 { + color: #af5f87; +} +.asciinema-terminal .bg-132 { + background-color: #af5f87; +} +.asciinema-terminal .fg-133 { + color: #af5faf; +} +.asciinema-terminal .bg-133 { + background-color: #af5faf; +} +.asciinema-terminal .fg-134 { + color: #af5fd7; +} +.asciinema-terminal .bg-134 { + background-color: #af5fd7; +} +.asciinema-terminal .fg-135 { + color: #af5fff; +} +.asciinema-terminal .bg-135 { + background-color: #af5fff; +} +.asciinema-terminal .fg-136 { + color: #af8700; +} +.asciinema-terminal .bg-136 { + background-color: #af8700; +} +.asciinema-terminal .fg-137 { + color: #af875f; +} +.asciinema-terminal .bg-137 { + background-color: #af875f; +} +.asciinema-terminal .fg-138 { + color: #af8787; +} +.asciinema-terminal .bg-138 { + background-color: #af8787; +} +.asciinema-terminal .fg-139 { + color: #af87af; +} +.asciinema-terminal .bg-139 { + background-color: #af87af; +} +.asciinema-terminal .fg-140 { + color: #af87d7; +} +.asciinema-terminal .bg-140 { + background-color: #af87d7; +} +.asciinema-terminal .fg-141 { + color: #af87ff; +} +.asciinema-terminal .bg-141 { + background-color: #af87ff; +} +.asciinema-terminal .fg-142 { + color: #afaf00; +} +.asciinema-terminal .bg-142 { + background-color: #afaf00; +} +.asciinema-terminal .fg-143 { + color: #afaf5f; +} +.asciinema-terminal .bg-143 { + background-color: #afaf5f; +} +.asciinema-terminal .fg-144 { + color: #afaf87; +} +.asciinema-terminal .bg-144 { + background-color: #afaf87; +} +.asciinema-terminal .fg-145 { + color: #afafaf; +} +.asciinema-terminal .bg-145 { + background-color: #afafaf; +} +.asciinema-terminal .fg-146 { + color: #afafd7; +} +.asciinema-terminal .bg-146 { + background-color: #afafd7; +} +.asciinema-terminal .fg-147 { + color: #afafff; +} +.asciinema-terminal .bg-147 { + background-color: #afafff; +} +.asciinema-terminal .fg-148 { + color: #afd700; +} +.asciinema-terminal .bg-148 { + background-color: #afd700; +} +.asciinema-terminal .fg-149 { + color: #afd75f; +} +.asciinema-terminal .bg-149 { + background-color: #afd75f; +} +.asciinema-terminal .fg-150 { + color: #afd787; +} +.asciinema-terminal .bg-150 { + background-color: #afd787; +} +.asciinema-terminal .fg-151 { + color: #afd7af; +} +.asciinema-terminal .bg-151 { + background-color: #afd7af; +} +.asciinema-terminal .fg-152 { + color: #afd7d7; +} +.asciinema-terminal .bg-152 { + background-color: #afd7d7; +} +.asciinema-terminal .fg-153 { + color: #afd7ff; +} +.asciinema-terminal .bg-153 { + background-color: #afd7ff; +} +.asciinema-terminal .fg-154 { + color: #afff00; +} +.asciinema-terminal .bg-154 { + background-color: #afff00; +} +.asciinema-terminal .fg-155 { + color: #afff5f; +} +.asciinema-terminal .bg-155 { + background-color: #afff5f; +} +.asciinema-terminal .fg-156 { + color: #afff87; +} +.asciinema-terminal .bg-156 { + background-color: #afff87; +} +.asciinema-terminal .fg-157 { + color: #afffaf; +} +.asciinema-terminal .bg-157 { + background-color: #afffaf; +} +.asciinema-terminal .fg-158 { + color: #afffd7; +} +.asciinema-terminal .bg-158 { + background-color: #afffd7; +} +.asciinema-terminal .fg-159 { + color: #afffff; +} +.asciinema-terminal .bg-159 { + background-color: #afffff; +} +.asciinema-terminal .fg-160 { + color: #d70000; +} +.asciinema-terminal .bg-160 { + background-color: #d70000; +} +.asciinema-terminal .fg-161 { + color: #d7005f; +} +.asciinema-terminal .bg-161 { + background-color: #d7005f; +} +.asciinema-terminal .fg-162 { + color: #d70087; +} +.asciinema-terminal .bg-162 { + background-color: #d70087; +} +.asciinema-terminal .fg-163 { + color: #d700af; +} +.asciinema-terminal .bg-163 { + background-color: #d700af; +} +.asciinema-terminal .fg-164 { + color: #d700d7; +} +.asciinema-terminal .bg-164 { + background-color: #d700d7; +} +.asciinema-terminal .fg-165 { + color: #d700ff; +} +.asciinema-terminal .bg-165 { + background-color: #d700ff; +} +.asciinema-terminal .fg-166 { + color: #d75f00; +} +.asciinema-terminal .bg-166 { + background-color: #d75f00; +} +.asciinema-terminal .fg-167 { + color: #d75f5f; +} +.asciinema-terminal .bg-167 { + background-color: #d75f5f; +} +.asciinema-terminal .fg-168 { + color: #d75f87; +} +.asciinema-terminal .bg-168 { + background-color: #d75f87; +} +.asciinema-terminal .fg-169 { + color: #d75faf; +} +.asciinema-terminal .bg-169 { + background-color: #d75faf; +} +.asciinema-terminal .fg-170 { + color: #d75fd7; +} +.asciinema-terminal .bg-170 { + background-color: #d75fd7; +} +.asciinema-terminal .fg-171 { + color: #d75fff; +} +.asciinema-terminal .bg-171 { + background-color: #d75fff; +} +.asciinema-terminal .fg-172 { + color: #d78700; +} +.asciinema-terminal .bg-172 { + background-color: #d78700; +} +.asciinema-terminal .fg-173 { + color: #d7875f; +} +.asciinema-terminal .bg-173 { + background-color: #d7875f; +} +.asciinema-terminal .fg-174 { + color: #d78787; +} +.asciinema-terminal .bg-174 { + background-color: #d78787; +} +.asciinema-terminal .fg-175 { + color: #d787af; +} +.asciinema-terminal .bg-175 { + background-color: #d787af; +} +.asciinema-terminal .fg-176 { + color: #d787d7; +} +.asciinema-terminal .bg-176 { + background-color: #d787d7; +} +.asciinema-terminal .fg-177 { + color: #d787ff; +} +.asciinema-terminal .bg-177 { + background-color: #d787ff; +} +.asciinema-terminal .fg-178 { + color: #d7af00; +} +.asciinema-terminal .bg-178 { + background-color: #d7af00; +} +.asciinema-terminal .fg-179 { + color: #d7af5f; +} +.asciinema-terminal .bg-179 { + background-color: #d7af5f; +} +.asciinema-terminal .fg-180 { + color: #d7af87; +} +.asciinema-terminal .bg-180 { + background-color: #d7af87; +} +.asciinema-terminal .fg-181 { + color: #d7afaf; +} +.asciinema-terminal .bg-181 { + background-color: #d7afaf; +} +.asciinema-terminal .fg-182 { + color: #d7afd7; +} +.asciinema-terminal .bg-182 { + background-color: #d7afd7; +} +.asciinema-terminal .fg-183 { + color: #d7afff; +} +.asciinema-terminal .bg-183 { + background-color: #d7afff; +} +.asciinema-terminal .fg-184 { + color: #d7d700; +} +.asciinema-terminal .bg-184 { + background-color: #d7d700; +} +.asciinema-terminal .fg-185 { + color: #d7d75f; +} +.asciinema-terminal .bg-185 { + background-color: #d7d75f; +} +.asciinema-terminal .fg-186 { + color: #d7d787; +} +.asciinema-terminal .bg-186 { + background-color: #d7d787; +} +.asciinema-terminal .fg-187 { + color: #d7d7af; +} +.asciinema-terminal .bg-187 { + background-color: #d7d7af; +} +.asciinema-terminal .fg-188 { + color: #d7d7d7; +} +.asciinema-terminal .bg-188 { + background-color: #d7d7d7; +} +.asciinema-terminal .fg-189 { + color: #d7d7ff; +} +.asciinema-terminal .bg-189 { + background-color: #d7d7ff; +} +.asciinema-terminal .fg-190 { + color: #d7ff00; +} +.asciinema-terminal .bg-190 { + background-color: #d7ff00; +} +.asciinema-terminal .fg-191 { + color: #d7ff5f; +} +.asciinema-terminal .bg-191 { + background-color: #d7ff5f; +} +.asciinema-terminal .fg-192 { + color: #d7ff87; +} +.asciinema-terminal .bg-192 { + background-color: #d7ff87; +} +.asciinema-terminal .fg-193 { + color: #d7ffaf; +} +.asciinema-terminal .bg-193 { + background-color: #d7ffaf; +} +.asciinema-terminal .fg-194 { + color: #d7ffd7; +} +.asciinema-terminal .bg-194 { + background-color: #d7ffd7; +} +.asciinema-terminal .fg-195 { + color: #d7ffff; +} +.asciinema-terminal .bg-195 { + background-color: #d7ffff; +} +.asciinema-terminal .fg-196 { + color: #ff0000; +} +.asciinema-terminal .bg-196 { + background-color: #ff0000; +} +.asciinema-terminal .fg-197 { + color: #ff005f; +} +.asciinema-terminal .bg-197 { + background-color: #ff005f; +} +.asciinema-terminal .fg-198 { + color: #ff0087; +} +.asciinema-terminal .bg-198 { + background-color: #ff0087; +} +.asciinema-terminal .fg-199 { + color: #ff00af; +} +.asciinema-terminal .bg-199 { + background-color: #ff00af; +} +.asciinema-terminal .fg-200 { + color: #ff00d7; +} +.asciinema-terminal .bg-200 { + background-color: #ff00d7; +} +.asciinema-terminal .fg-201 { + color: #ff00ff; +} +.asciinema-terminal .bg-201 { + background-color: #ff00ff; +} +.asciinema-terminal .fg-202 { + color: #ff5f00; +} +.asciinema-terminal .bg-202 { + background-color: #ff5f00; +} +.asciinema-terminal .fg-203 { + color: #ff5f5f; +} +.asciinema-terminal .bg-203 { + background-color: #ff5f5f; +} +.asciinema-terminal .fg-204 { + color: #ff5f87; +} +.asciinema-terminal .bg-204 { + background-color: #ff5f87; +} +.asciinema-terminal .fg-205 { + color: #ff5faf; +} +.asciinema-terminal .bg-205 { + background-color: #ff5faf; +} +.asciinema-terminal .fg-206 { + color: #ff5fd7; +} +.asciinema-terminal .bg-206 { + background-color: #ff5fd7; +} +.asciinema-terminal .fg-207 { + color: #ff5fff; +} +.asciinema-terminal .bg-207 { + background-color: #ff5fff; +} +.asciinema-terminal .fg-208 { + color: #ff8700; +} +.asciinema-terminal .bg-208 { + background-color: #ff8700; +} +.asciinema-terminal .fg-209 { + color: #ff875f; +} +.asciinema-terminal .bg-209 { + background-color: #ff875f; +} +.asciinema-terminal .fg-210 { + color: #ff8787; +} +.asciinema-terminal .bg-210 { + background-color: #ff8787; +} +.asciinema-terminal .fg-211 { + color: #ff87af; +} +.asciinema-terminal .bg-211 { + background-color: #ff87af; +} +.asciinema-terminal .fg-212 { + color: #ff87d7; +} +.asciinema-terminal .bg-212 { + background-color: #ff87d7; +} +.asciinema-terminal .fg-213 { + color: #ff87ff; +} +.asciinema-terminal .bg-213 { + background-color: #ff87ff; +} +.asciinema-terminal .fg-214 { + color: #ffaf00; +} +.asciinema-terminal .bg-214 { + background-color: #ffaf00; +} +.asciinema-terminal .fg-215 { + color: #ffaf5f; +} +.asciinema-terminal .bg-215 { + background-color: #ffaf5f; +} +.asciinema-terminal .fg-216 { + color: #ffaf87; +} +.asciinema-terminal .bg-216 { + background-color: #ffaf87; +} +.asciinema-terminal .fg-217 { + color: #ffafaf; +} +.asciinema-terminal .bg-217 { + background-color: #ffafaf; +} +.asciinema-terminal .fg-218 { + color: #ffafd7; +} +.asciinema-terminal .bg-218 { + background-color: #ffafd7; +} +.asciinema-terminal .fg-219 { + color: #ffafff; +} +.asciinema-terminal .bg-219 { + background-color: #ffafff; +} +.asciinema-terminal .fg-220 { + color: #ffd700; +} +.asciinema-terminal .bg-220 { + background-color: #ffd700; +} +.asciinema-terminal .fg-221 { + color: #ffd75f; +} +.asciinema-terminal .bg-221 { + background-color: #ffd75f; +} +.asciinema-terminal .fg-222 { + color: #ffd787; +} +.asciinema-terminal .bg-222 { + background-color: #ffd787; +} +.asciinema-terminal .fg-223 { + color: #ffd7af; +} +.asciinema-terminal .bg-223 { + background-color: #ffd7af; +} +.asciinema-terminal .fg-224 { + color: #ffd7d7; +} +.asciinema-terminal .bg-224 { + background-color: #ffd7d7; +} +.asciinema-terminal .fg-225 { + color: #ffd7ff; +} +.asciinema-terminal .bg-225 { + background-color: #ffd7ff; +} +.asciinema-terminal .fg-226 { + color: #ffff00; +} +.asciinema-terminal .bg-226 { + background-color: #ffff00; +} +.asciinema-terminal .fg-227 { + color: #ffff5f; +} +.asciinema-terminal .bg-227 { + background-color: #ffff5f; +} +.asciinema-terminal .fg-228 { + color: #ffff87; +} +.asciinema-terminal .bg-228 { + background-color: #ffff87; +} +.asciinema-terminal .fg-229 { + color: #ffffaf; +} +.asciinema-terminal .bg-229 { + background-color: #ffffaf; +} +.asciinema-terminal .fg-230 { + color: #ffffd7; +} +.asciinema-terminal .bg-230 { + background-color: #ffffd7; +} +.asciinema-terminal .fg-231 { + color: #ffffff; +} +.asciinema-terminal .bg-231 { + background-color: #ffffff; +} +.asciinema-terminal .fg-232 { + color: #080808; +} +.asciinema-terminal .bg-232 { + background-color: #080808; +} +.asciinema-terminal .fg-233 { + color: #121212; +} +.asciinema-terminal .bg-233 { + background-color: #121212; +} +.asciinema-terminal .fg-234 { + color: #1c1c1c; +} +.asciinema-terminal .bg-234 { + background-color: #1c1c1c; +} +.asciinema-terminal .fg-235 { + color: #262626; +} +.asciinema-terminal .bg-235 { + background-color: #262626; +} +.asciinema-terminal .fg-236 { + color: #303030; +} +.asciinema-terminal .bg-236 { + background-color: #303030; +} +.asciinema-terminal .fg-237 { + color: #3a3a3a; +} +.asciinema-terminal .bg-237 { + background-color: #3a3a3a; +} +.asciinema-terminal .fg-238 { + color: #444444; +} +.asciinema-terminal .bg-238 { + background-color: #444444; +} +.asciinema-terminal .fg-239 { + color: #4e4e4e; +} +.asciinema-terminal .bg-239 { + background-color: #4e4e4e; +} +.asciinema-terminal .fg-240 { + color: #585858; +} +.asciinema-terminal .bg-240 { + background-color: #585858; +} +.asciinema-terminal .fg-241 { + color: #626262; +} +.asciinema-terminal .bg-241 { + background-color: #626262; +} +.asciinema-terminal .fg-242 { + color: #6c6c6c; +} +.asciinema-terminal .bg-242 { + background-color: #6c6c6c; +} +.asciinema-terminal .fg-243 { + color: #767676; +} +.asciinema-terminal .bg-243 { + background-color: #767676; +} +.asciinema-terminal .fg-244 { + color: #808080; +} +.asciinema-terminal .bg-244 { + background-color: #808080; +} +.asciinema-terminal .fg-245 { + color: #8a8a8a; +} +.asciinema-terminal .bg-245 { + background-color: #8a8a8a; +} +.asciinema-terminal .fg-246 { + color: #949494; +} +.asciinema-terminal .bg-246 { + background-color: #949494; +} +.asciinema-terminal .fg-247 { + color: #9e9e9e; +} +.asciinema-terminal .bg-247 { + background-color: #9e9e9e; +} +.asciinema-terminal .fg-248 { + color: #a8a8a8; +} +.asciinema-terminal .bg-248 { + background-color: #a8a8a8; +} +.asciinema-terminal .fg-249 { + color: #b2b2b2; +} +.asciinema-terminal .bg-249 { + background-color: #b2b2b2; +} +.asciinema-terminal .fg-250 { + color: #bcbcbc; +} +.asciinema-terminal .bg-250 { + background-color: #bcbcbc; +} +.asciinema-terminal .fg-251 { + color: #c6c6c6; +} +.asciinema-terminal .bg-251 { + background-color: #c6c6c6; +} +.asciinema-terminal .fg-252 { + color: #d0d0d0; +} +.asciinema-terminal .bg-252 { + background-color: #d0d0d0; +} +.asciinema-terminal .fg-253 { + color: #dadada; +} +.asciinema-terminal .bg-253 { + background-color: #dadada; +} +.asciinema-terminal .fg-254 { + color: #e4e4e4; +} +.asciinema-terminal .bg-254 { + background-color: #e4e4e4; +} +.asciinema-terminal .fg-255 { + color: #eeeeee; +} +.asciinema-terminal .bg-255 { + background-color: #eeeeee; +} +.asciinema-theme-asciinema .asciinema-terminal { + color: #cccccc; + background-color: #121314; + border-color: #121314; +} +.asciinema-theme-asciinema .fg-bg { + color: #121314; +} +.asciinema-theme-asciinema .bg-fg { + background-color: #cccccc; +} +.asciinema-theme-asciinema .fg-0 { + color: #000000; +} +.asciinema-theme-asciinema .bg-0 { + background-color: #000000; +} +.asciinema-theme-asciinema .fg-1 { + color: #dd3c69; +} +.asciinema-theme-asciinema .bg-1 { + background-color: #dd3c69; +} +.asciinema-theme-asciinema .fg-2 { + color: #4ebf22; +} +.asciinema-theme-asciinema .bg-2 { + background-color: #4ebf22; +} +.asciinema-theme-asciinema .fg-3 { + color: #ddaf3c; +} +.asciinema-theme-asciinema .bg-3 { + background-color: #ddaf3c; +} +.asciinema-theme-asciinema .fg-4 { + color: #26b0d7; +} +.asciinema-theme-asciinema .bg-4 { + background-color: #26b0d7; +} +.asciinema-theme-asciinema .fg-5 { + color: #b954e1; +} +.asciinema-theme-asciinema .bg-5 { + background-color: #b954e1; +} +.asciinema-theme-asciinema .fg-6 { + color: #54e1b9; +} +.asciinema-theme-asciinema .bg-6 { + background-color: #54e1b9; +} +.asciinema-theme-asciinema .fg-7 { + color: #d9d9d9; +} +.asciinema-theme-asciinema .bg-7 { + background-color: #d9d9d9; +} +.asciinema-theme-asciinema .fg-8 { + color: #4d4d4d; +} +.asciinema-theme-asciinema .bg-8 { + background-color: #4d4d4d; +} +.asciinema-theme-asciinema .fg-9 { + color: #dd3c69; +} +.asciinema-theme-asciinema .bg-9 { + background-color: #dd3c69; +} +.asciinema-theme-asciinema .fg-10 { + color: #4ebf22; +} +.asciinema-theme-asciinema .bg-10 { + background-color: #4ebf22; +} +.asciinema-theme-asciinema .fg-11 { + color: #ddaf3c; +} +.asciinema-theme-asciinema .bg-11 { + background-color: #ddaf3c; +} +.asciinema-theme-asciinema .fg-12 { + color: #26b0d7; +} +.asciinema-theme-asciinema .bg-12 { + background-color: #26b0d7; +} +.asciinema-theme-asciinema .fg-13 { + color: #b954e1; +} +.asciinema-theme-asciinema .bg-13 { + background-color: #b954e1; +} +.asciinema-theme-asciinema .fg-14 { + color: #54e1b9; +} +.asciinema-theme-asciinema .bg-14 { + background-color: #54e1b9; +} +.asciinema-theme-asciinema .fg-15 { + color: #ffffff; +} +.asciinema-theme-asciinema .bg-15 { + background-color: #ffffff; +} +.asciinema-theme-asciinema .fg-8, +.asciinema-theme-asciinema .fg-9, +.asciinema-theme-asciinema .fg-10, +.asciinema-theme-asciinema .fg-11, +.asciinema-theme-asciinema .fg-12, +.asciinema-theme-asciinema .fg-13, +.asciinema-theme-asciinema .fg-14, +.asciinema-theme-asciinema .fg-15 { + font-weight: bold; +} +.asciinema-theme-tango .asciinema-terminal { + color: #cccccc; + background-color: #121314; + border-color: #121314; +} +.asciinema-theme-tango .fg-bg { + color: #121314; +} +.asciinema-theme-tango .bg-fg { + background-color: #cccccc; +} +.asciinema-theme-tango .fg-0 { + color: #000000; +} +.asciinema-theme-tango .bg-0 { + background-color: #000000; +} +.asciinema-theme-tango .fg-1 { + color: #cc0000; +} +.asciinema-theme-tango .bg-1 { + background-color: #cc0000; +} +.asciinema-theme-tango .fg-2 { + color: #4e9a06; +} +.asciinema-theme-tango .bg-2 { + background-color: #4e9a06; +} +.asciinema-theme-tango .fg-3 { + color: #c4a000; +} +.asciinema-theme-tango .bg-3 { + background-color: #c4a000; +} +.asciinema-theme-tango .fg-4 { + color: #3465a4; +} +.asciinema-theme-tango .bg-4 { + background-color: #3465a4; +} +.asciinema-theme-tango .fg-5 { + color: #75507b; +} +.asciinema-theme-tango .bg-5 { + background-color: #75507b; +} +.asciinema-theme-tango .fg-6 { + color: #06989a; +} +.asciinema-theme-tango .bg-6 { + background-color: #06989a; +} +.asciinema-theme-tango .fg-7 { + color: #d3d7cf; +} +.asciinema-theme-tango .bg-7 { + background-color: #d3d7cf; +} +.asciinema-theme-tango .fg-8 { + color: #555753; +} +.asciinema-theme-tango .bg-8 { + background-color: #555753; +} +.asciinema-theme-tango .fg-9 { + color: #ef2929; +} +.asciinema-theme-tango .bg-9 { + background-color: #ef2929; +} +.asciinema-theme-tango .fg-10 { + color: #8ae234; +} +.asciinema-theme-tango .bg-10 { + background-color: #8ae234; +} +.asciinema-theme-tango .fg-11 { + color: #fce94f; +} +.asciinema-theme-tango .bg-11 { + background-color: #fce94f; +} +.asciinema-theme-tango .fg-12 { + color: #729fcf; +} +.asciinema-theme-tango .bg-12 { + background-color: #729fcf; +} +.asciinema-theme-tango .fg-13 { + color: #ad7fa8; +} +.asciinema-theme-tango .bg-13 { + background-color: #ad7fa8; +} +.asciinema-theme-tango .fg-14 { + color: #34e2e2; +} +.asciinema-theme-tango .bg-14 { + background-color: #34e2e2; +} +.asciinema-theme-tango .fg-15 { + color: #eeeeec; +} +.asciinema-theme-tango .bg-15 { + background-color: #eeeeec; +} +.asciinema-theme-tango .fg-8, +.asciinema-theme-tango .fg-9, +.asciinema-theme-tango .fg-10, +.asciinema-theme-tango .fg-11, +.asciinema-theme-tango .fg-12, +.asciinema-theme-tango .fg-13, +.asciinema-theme-tango .fg-14, +.asciinema-theme-tango .fg-15 { + font-weight: bold; +} +.asciinema-theme-solarized-dark .asciinema-terminal { + color: #839496; + background-color: #002b36; + border-color: #002b36; +} +.asciinema-theme-solarized-dark .fg-bg { + color: #002b36; +} +.asciinema-theme-solarized-dark .bg-fg { + background-color: #839496; +} +.asciinema-theme-solarized-dark .fg-0 { + color: #073642; +} +.asciinema-theme-solarized-dark .bg-0 { + background-color: #073642; +} +.asciinema-theme-solarized-dark .fg-1 { + color: #dc322f; +} +.asciinema-theme-solarized-dark .bg-1 { + background-color: #dc322f; +} +.asciinema-theme-solarized-dark .fg-2 { + color: #859900; +} +.asciinema-theme-solarized-dark .bg-2 { + background-color: #859900; +} +.asciinema-theme-solarized-dark .fg-3 { + color: #b58900; +} +.asciinema-theme-solarized-dark .bg-3 { + background-color: #b58900; +} +.asciinema-theme-solarized-dark .fg-4 { + color: #268bd2; +} +.asciinema-theme-solarized-dark .bg-4 { + background-color: #268bd2; +} +.asciinema-theme-solarized-dark .fg-5 { + color: #d33682; +} +.asciinema-theme-solarized-dark .bg-5 { + background-color: #d33682; +} +.asciinema-theme-solarized-dark .fg-6 { + color: #2aa198; +} +.asciinema-theme-solarized-dark .bg-6 { + background-color: #2aa198; +} +.asciinema-theme-solarized-dark .fg-7 { + color: #eee8d5; +} +.asciinema-theme-solarized-dark .bg-7 { + background-color: #eee8d5; +} +.asciinema-theme-solarized-dark .fg-8 { + color: #002b36; +} +.asciinema-theme-solarized-dark .bg-8 { + background-color: #002b36; +} +.asciinema-theme-solarized-dark .fg-9 { + color: #cb4b16; +} +.asciinema-theme-solarized-dark .bg-9 { + background-color: #cb4b16; +} +.asciinema-theme-solarized-dark .fg-10 { + color: #586e75; +} +.asciinema-theme-solarized-dark .bg-10 { + background-color: #586e75; +} +.asciinema-theme-solarized-dark .fg-11 { + color: #657b83; +} +.asciinema-theme-solarized-dark .bg-11 { + background-color: #657b83; +} +.asciinema-theme-solarized-dark .fg-12 { + color: #839496; +} +.asciinema-theme-solarized-dark .bg-12 { + background-color: #839496; +} +.asciinema-theme-solarized-dark .fg-13 { + color: #6c71c4; +} +.asciinema-theme-solarized-dark .bg-13 { + background-color: #6c71c4; +} +.asciinema-theme-solarized-dark .fg-14 { + color: #93a1a1; +} +.asciinema-theme-solarized-dark .bg-14 { + background-color: #93a1a1; +} +.asciinema-theme-solarized-dark .fg-15 { + color: #fdf6e3; +} +.asciinema-theme-solarized-dark .bg-15 { + background-color: #fdf6e3; +} +.asciinema-theme-solarized-light .asciinema-terminal { + color: #657b83; + background-color: #fdf6e3; + border-color: #fdf6e3; +} +.asciinema-theme-solarized-light .fg-bg { + color: #fdf6e3; +} +.asciinema-theme-solarized-light .bg-fg { + background-color: #657b83; +} +.asciinema-theme-solarized-light .fg-0 { + color: #073642; +} +.asciinema-theme-solarized-light .bg-0 { + background-color: #073642; +} +.asciinema-theme-solarized-light .fg-1 { + color: #dc322f; +} +.asciinema-theme-solarized-light .bg-1 { + background-color: #dc322f; +} +.asciinema-theme-solarized-light .fg-2 { + color: #859900; +} +.asciinema-theme-solarized-light .bg-2 { + background-color: #859900; +} +.asciinema-theme-solarized-light .fg-3 { + color: #b58900; +} +.asciinema-theme-solarized-light .bg-3 { + background-color: #b58900; +} +.asciinema-theme-solarized-light .fg-4 { + color: #268bd2; +} +.asciinema-theme-solarized-light .bg-4 { + background-color: #268bd2; +} +.asciinema-theme-solarized-light .fg-5 { + color: #d33682; +} +.asciinema-theme-solarized-light .bg-5 { + background-color: #d33682; +} +.asciinema-theme-solarized-light .fg-6 { + color: #2aa198; +} +.asciinema-theme-solarized-light .bg-6 { + background-color: #2aa198; +} +.asciinema-theme-solarized-light .fg-7 { + color: #eee8d5; +} +.asciinema-theme-solarized-light .bg-7 { + background-color: #eee8d5; +} +.asciinema-theme-solarized-light .fg-8 { + color: #002b36; +} +.asciinema-theme-solarized-light .bg-8 { + background-color: #002b36; +} +.asciinema-theme-solarized-light .fg-9 { + color: #cb4b16; +} +.asciinema-theme-solarized-light .bg-9 { + background-color: #cb4b16; +} +.asciinema-theme-solarized-light .fg-10 { + color: #586e75; +} +.asciinema-theme-solarized-light .bg-10 { + background-color: #586e75; +} +.asciinema-theme-solarized-light .fg-11 { + color: #657c83; +} +.asciinema-theme-solarized-light .bg-11 { + background-color: #657c83; +} +.asciinema-theme-solarized-light .fg-12 { + color: #839496; +} +.asciinema-theme-solarized-light .bg-12 { + background-color: #839496; +} +.asciinema-theme-solarized-light .fg-13 { + color: #6c71c4; +} +.asciinema-theme-solarized-light .bg-13 { + background-color: #6c71c4; +} +.asciinema-theme-solarized-light .fg-14 { + color: #93a1a1; +} +.asciinema-theme-solarized-light .bg-14 { + background-color: #93a1a1; +} +.asciinema-theme-solarized-light .fg-15 { + color: #fdf6e3; +} +.asciinema-theme-solarized-light .bg-15 { + background-color: #fdf6e3; +} +.asciinema-theme-seti .asciinema-terminal { + color: #cacecd; + background-color: #111213; + border-color: #111213; +} +.asciinema-theme-seti .fg-bg { + color: #111213; +} +.asciinema-theme-seti .bg-fg { + background-color: #cacecd; +} +.asciinema-theme-seti .fg-0 { + color: #323232; +} +.asciinema-theme-seti .bg-0 { + background-color: #323232; +} +.asciinema-theme-seti .fg-1 { + color: #c22832; +} +.asciinema-theme-seti .bg-1 { + background-color: #c22832; +} +.asciinema-theme-seti .fg-2 { + color: #8ec43d; +} +.asciinema-theme-seti .bg-2 { + background-color: #8ec43d; +} +.asciinema-theme-seti .fg-3 { + color: #e0c64f; +} +.asciinema-theme-seti .bg-3 { + background-color: #e0c64f; +} +.asciinema-theme-seti .fg-4 { + color: #43a5d5; +} +.asciinema-theme-seti .bg-4 { + background-color: #43a5d5; +} +.asciinema-theme-seti .fg-5 { + color: #8b57b5; +} +.asciinema-theme-seti .bg-5 { + background-color: #8b57b5; +} +.asciinema-theme-seti .fg-6 { + color: #8ec43d; +} +.asciinema-theme-seti .bg-6 { + background-color: #8ec43d; +} +.asciinema-theme-seti .fg-7 { + color: #eeeeee; +} +.asciinema-theme-seti .bg-7 { + background-color: #eeeeee; +} +.asciinema-theme-seti .fg-8 { + color: #323232; +} +.asciinema-theme-seti .bg-8 { + background-color: #323232; +} +.asciinema-theme-seti .fg-9 { + color: #c22832; +} +.asciinema-theme-seti .bg-9 { + background-color: #c22832; +} +.asciinema-theme-seti .fg-10 { + color: #8ec43d; +} +.asciinema-theme-seti .bg-10 { + background-color: #8ec43d; +} +.asciinema-theme-seti .fg-11 { + color: #e0c64f; +} +.asciinema-theme-seti .bg-11 { + background-color: #e0c64f; +} +.asciinema-theme-seti .fg-12 { + color: #43a5d5; +} +.asciinema-theme-seti .bg-12 { + background-color: #43a5d5; +} +.asciinema-theme-seti .fg-13 { + color: #8b57b5; +} +.asciinema-theme-seti .bg-13 { + background-color: #8b57b5; +} +.asciinema-theme-seti .fg-14 { + color: #8ec43d; +} +.asciinema-theme-seti .bg-14 { + background-color: #8ec43d; +} +.asciinema-theme-seti .fg-15 { + color: #ffffff; +} +.asciinema-theme-seti .bg-15 { + background-color: #ffffff; +} +.asciinema-theme-seti .fg-8, +.asciinema-theme-seti .fg-9, +.asciinema-theme-seti .fg-10, +.asciinema-theme-seti .fg-11, +.asciinema-theme-seti .fg-12, +.asciinema-theme-seti .fg-13, +.asciinema-theme-seti .fg-14, +.asciinema-theme-seti .fg-15 { + font-weight: bold; +} +/* Based on Monokai from base16 collection - https://github.com/chriskempson/base16 */ +.asciinema-theme-monokai .asciinema-terminal { + color: #f8f8f2; + background-color: #272822; + border-color: #272822; +} +.asciinema-theme-monokai .fg-bg { + color: #272822; +} +.asciinema-theme-monokai .bg-fg { + background-color: #f8f8f2; +} +.asciinema-theme-monokai .fg-0 { + color: #272822; +} +.asciinema-theme-monokai .bg-0 { + background-color: #272822; +} +.asciinema-theme-monokai .fg-1 { + color: #f92672; +} +.asciinema-theme-monokai .bg-1 { + background-color: #f92672; +} +.asciinema-theme-monokai .fg-2 { + color: #a6e22e; +} +.asciinema-theme-monokai .bg-2 { + background-color: #a6e22e; +} +.asciinema-theme-monokai .fg-3 { + color: #f4bf75; +} +.asciinema-theme-monokai .bg-3 { + background-color: #f4bf75; +} +.asciinema-theme-monokai .fg-4 { + color: #66d9ef; +} +.asciinema-theme-monokai .bg-4 { + background-color: #66d9ef; +} +.asciinema-theme-monokai .fg-5 { + color: #ae81ff; +} +.asciinema-theme-monokai .bg-5 { + background-color: #ae81ff; +} +.asciinema-theme-monokai .fg-6 { + color: #a1efe4; +} +.asciinema-theme-monokai .bg-6 { + background-color: #a1efe4; +} +.asciinema-theme-monokai .fg-7 { + color: #f8f8f2; +} +.asciinema-theme-monokai .bg-7 { + background-color: #f8f8f2; +} +.asciinema-theme-monokai .fg-8 { + color: #75715e; +} +.asciinema-theme-monokai .bg-8 { + background-color: #75715e; +} +.asciinema-theme-monokai .fg-9 { + color: #f92672; +} +.asciinema-theme-monokai .bg-9 { + background-color: #f92672; +} +.asciinema-theme-monokai .fg-10 { + color: #a6e22e; +} +.asciinema-theme-monokai .bg-10 { + background-color: #a6e22e; +} +.asciinema-theme-monokai .fg-11 { + color: #f4bf75; +} +.asciinema-theme-monokai .bg-11 { + background-color: #f4bf75; +} +.asciinema-theme-monokai .fg-12 { + color: #66d9ef; +} +.asciinema-theme-monokai .bg-12 { + background-color: #66d9ef; +} +.asciinema-theme-monokai .fg-13 { + color: #ae81ff; +} +.asciinema-theme-monokai .bg-13 { + background-color: #ae81ff; +} +.asciinema-theme-monokai .fg-14 { + color: #a1efe4; +} +.asciinema-theme-monokai .bg-14 { + background-color: #a1efe4; +} +.asciinema-theme-monokai .fg-15 { + color: #f9f8f5; +} +.asciinema-theme-monokai .bg-15 { + background-color: #f9f8f5; +} +.asciinema-theme-monokai .fg-8, +.asciinema-theme-monokai .fg-9, +.asciinema-theme-monokai .fg-10, +.asciinema-theme-monokai .fg-11, +.asciinema-theme-monokai .fg-12, +.asciinema-theme-monokai .fg-13, +.asciinema-theme-monokai .fg-14, +.asciinema-theme-monokai .fg-15 { + font-weight: bold; +} diff --git a/Documentation/sphinx-static/asciinema-player.js b/Documentation/sphinx-static/asciinema-player.js new file mode 100644 index 00000000000000..5ad47e08b98f87 --- /dev/null +++ b/Documentation/sphinx-static/asciinema-player.js @@ -0,0 +1,1213 @@ +/** + * asciinema-player v2.6.1 + * + * Copyright 2011-2018, Marcin Kulik + * + */ + +// CustomEvent polyfill from MDN (https://developer.mozilla.org/en-US/docs/Web/API/CustomEvent/CustomEvent) + +(function () { + if (typeof window.CustomEvent === "function") return false; + + function CustomEvent ( event, params ) { + params = params || { bubbles: false, cancelable: false, detail: undefined }; + var evt = document.createEvent( 'CustomEvent'); + evt.initCustomEvent(event, params.bubbles, params.cancelable, params.detail); + return evt; + } + + CustomEvent.prototype = window.Event.prototype; + + window.CustomEvent = CustomEvent; +})(); + +/** + * @license + * Copyright (c) 2014 The Polymer Project Authors. All rights reserved. + * This code may only be used under the BSD style license found at http://polymer.github.io/LICENSE.txt + * The complete set of authors may be found at http://polymer.github.io/AUTHORS.txt + * The complete set of contributors may be found at http://polymer.github.io/CONTRIBUTORS.txt + * Code distributed by Google as part of the polymer project is also + * subject to an additional IP rights grant found at http://polymer.github.io/PATENTS.txt + */ +// @version 0.7.22 +"undefined"==typeof WeakMap&&!function(){var e=Object.defineProperty,t=Date.now()%1e9,n=function(){this.name="__st"+(1e9*Math.random()>>>0)+(t++ +"__")};n.prototype={set:function(t,n){var o=t[this.name];return o&&o[0]===t?o[1]=n:e(t,this.name,{value:[t,n],writable:!0}),this},get:function(e){var t;return(t=e[this.name])&&t[0]===e?t[1]:void 0},"delete":function(e){var t=e[this.name];return t&&t[0]===e?(t[0]=t[1]=void 0,!0):!1},has:function(e){var t=e[this.name];return t?t[0]===e:!1}},window.WeakMap=n}(),function(e){function t(e){E.push(e),b||(b=!0,w(o))}function n(e){return window.ShadowDOMPolyfill&&window.ShadowDOMPolyfill.wrapIfNeeded(e)||e}function o(){b=!1;var e=E;E=[],e.sort(function(e,t){return e.uid_-t.uid_});var t=!1;e.forEach(function(e){var n=e.takeRecords();r(e),n.length&&(e.callback_(n,e),t=!0)}),t&&o()}function r(e){e.nodes_.forEach(function(t){var n=v.get(t);n&&n.forEach(function(t){t.observer===e&&t.removeTransientObservers()})})}function i(e,t){for(var n=e;n;n=n.parentNode){var o=v.get(n);if(o)for(var r=0;r0){var r=n[o-1],i=p(r,e);if(i)return void(n[o-1]=i)}else t(this.observer);n[o]=e},addListeners:function(){this.addListeners_(this.target)},addListeners_:function(e){var t=this.options;t.attributes&&e.addEventListener("DOMAttrModified",this,!0),t.characterData&&e.addEventListener("DOMCharacterDataModified",this,!0),t.childList&&e.addEventListener("DOMNodeInserted",this,!0),(t.childList||t.subtree)&&e.addEventListener("DOMNodeRemoved",this,!0)},removeListeners:function(){this.removeListeners_(this.target)},removeListeners_:function(e){var t=this.options;t.attributes&&e.removeEventListener("DOMAttrModified",this,!0),t.characterData&&e.removeEventListener("DOMCharacterDataModified",this,!0),t.childList&&e.removeEventListener("DOMNodeInserted",this,!0),(t.childList||t.subtree)&&e.removeEventListener("DOMNodeRemoved",this,!0)},addTransientObserver:function(e){if(e!==this.target){this.addListeners_(e),this.transientObservedNodes.push(e);var t=v.get(e);t||v.set(e,t=[]),t.push(this)}},removeTransientObservers:function(){var e=this.transientObservedNodes;this.transientObservedNodes=[],e.forEach(function(e){this.removeListeners_(e);for(var t=v.get(e),n=0;n=0)){n.push(e);for(var o,r=e.querySelectorAll("link[rel="+a+"]"),d=0,s=r.length;s>d&&(o=r[d]);d++)o["import"]&&i(o["import"],t,n);t(e)}}var a=window.HTMLImports?window.HTMLImports.IMPORT_LINK_TYPE:"none";e.forDocumentTree=r,e.forSubtree=t}),window.CustomElements.addModule(function(e){function t(e,t){return n(e,t)||o(e,t)}function n(t,n){return e.upgrade(t,n)?!0:void(n&&a(t))}function o(e,t){b(e,function(e){return n(e,t)?!0:void 0})}function r(e){N.push(e),y||(y=!0,setTimeout(i))}function i(){y=!1;for(var e,t=N,n=0,o=t.length;o>n&&(e=t[n]);n++)e();N=[]}function a(e){_?r(function(){d(e)}):d(e)}function d(e){e.__upgraded__&&!e.__attached&&(e.__attached=!0,e.attachedCallback&&e.attachedCallback())}function s(e){u(e),b(e,function(e){u(e)})}function u(e){_?r(function(){c(e)}):c(e)}function c(e){e.__upgraded__&&e.__attached&&(e.__attached=!1,e.detachedCallback&&e.detachedCallback())}function l(e){for(var t=e,n=window.wrap(document);t;){if(t==n)return!0;t=t.parentNode||t.nodeType===Node.DOCUMENT_FRAGMENT_NODE&&t.host}}function f(e){if(e.shadowRoot&&!e.shadowRoot.__watched){g.dom&&console.log("watching shadow-root for: ",e.localName);for(var t=e.shadowRoot;t;)w(t),t=t.olderShadowRoot}}function p(e,n){if(g.dom){var o=n[0];if(o&&"childList"===o.type&&o.addedNodes&&o.addedNodes){for(var r=o.addedNodes[0];r&&r!==document&&!r.host;)r=r.parentNode;var i=r&&(r.URL||r._URL||r.host&&r.host.localName)||"";i=i.split("/?").shift().split("/").pop()}console.group("mutations (%d) [%s]",n.length,i||"")}var a=l(e);n.forEach(function(e){"childList"===e.type&&(M(e.addedNodes,function(e){e.localName&&t(e,a)}),M(e.removedNodes,function(e){e.localName&&s(e)}))}),g.dom&&console.groupEnd()}function m(e){for(e=window.wrap(e),e||(e=window.wrap(document));e.parentNode;)e=e.parentNode;var t=e.__observer;t&&(p(e,t.takeRecords()),i())}function w(e){if(!e.__observer){var t=new MutationObserver(p.bind(this,e));t.observe(e,{childList:!0,subtree:!0}),e.__observer=t}}function v(e){e=window.wrap(e),g.dom&&console.group("upgradeDocument: ",e.baseURI.split("/").pop());var n=e===window.wrap(document);t(e,n),w(e),g.dom&&console.groupEnd()}function h(e){E(e,v)}var g=e.flags,b=e.forSubtree,E=e.forDocumentTree,_=window.MutationObserver._isPolyfilled&&g["throttle-attached"];e.hasPolyfillMutations=_,e.hasThrottledAttached=_;var y=!1,N=[],M=Array.prototype.forEach.call.bind(Array.prototype.forEach),O=Element.prototype.createShadowRoot;O&&(Element.prototype.createShadowRoot=function(){var e=O.call(this);return window.CustomElements.watchShadow(this),e}),e.watchShadow=f,e.upgradeDocumentTree=h,e.upgradeDocument=v,e.upgradeSubtree=o,e.upgradeAll=t,e.attached=a,e.takeRecords=m}),window.CustomElements.addModule(function(e){function t(t,o){if("template"===t.localName&&window.HTMLTemplateElement&&HTMLTemplateElement.decorate&&HTMLTemplateElement.decorate(t),!t.__upgraded__&&t.nodeType===Node.ELEMENT_NODE){var r=t.getAttribute("is"),i=e.getRegisteredDefinition(t.localName)||e.getRegisteredDefinition(r);if(i&&(r&&i.tag==t.localName||!r&&!i["extends"]))return n(t,i,o)}}function n(t,n,r){return a.upgrade&&console.group("upgrade:",t.localName),n.is&&t.setAttribute("is",n.is),o(t,n),t.__upgraded__=!0,i(t),r&&e.attached(t),e.upgradeSubtree(t,r),a.upgrade&&console.groupEnd(),t}function o(e,t){Object.__proto__?e.__proto__=t.prototype:(r(e,t.prototype,t["native"]),e.__proto__=t.prototype)}function r(e,t,n){for(var o={},r=t;r!==n&&r!==HTMLElement.prototype;){for(var i,a=Object.getOwnPropertyNames(r),d=0;i=a[d];d++)o[i]||(Object.defineProperty(e,i,Object.getOwnPropertyDescriptor(r,i)),o[i]=1);r=Object.getPrototypeOf(r)}}function i(e){e.createdCallback&&e.createdCallback()}var a=e.flags;e.upgrade=t,e.upgradeWithDefinition=n,e.implementPrototype=o}),window.CustomElements.addModule(function(e){function t(t,o){var s=o||{};if(!t)throw new Error("document.registerElement: first argument `name` must not be empty");if(t.indexOf("-")<0)throw new Error("document.registerElement: first argument ('name') must contain a dash ('-'). Argument provided was '"+String(t)+"'.");if(r(t))throw new Error("Failed to execute 'registerElement' on 'Document': Registration failed for type '"+String(t)+"'. The type name is invalid.");if(u(t))throw new Error("DuplicateDefinitionError: a type with name '"+String(t)+"' is already registered");return s.prototype||(s.prototype=Object.create(HTMLElement.prototype)),s.__name=t.toLowerCase(),s["extends"]&&(s["extends"]=s["extends"].toLowerCase()),s.lifecycle=s.lifecycle||{},s.ancestry=i(s["extends"]),a(s),d(s),n(s.prototype),c(s.__name,s),s.ctor=l(s),s.ctor.prototype=s.prototype,s.prototype.constructor=s.ctor,e.ready&&v(document),s.ctor}function n(e){if(!e.setAttribute._polyfilled){var t=e.setAttribute;e.setAttribute=function(e,n){o.call(this,e,n,t)};var n=e.removeAttribute;e.removeAttribute=function(e){o.call(this,e,null,n)},e.setAttribute._polyfilled=!0}}function o(e,t,n){e=e.toLowerCase();var o=this.getAttribute(e);n.apply(this,arguments);var r=this.getAttribute(e);this.attributeChangedCallback&&r!==o&&this.attributeChangedCallback(e,o,r)}function r(e){for(var t=0;t<_.length;t++)if(e===_[t])return!0}function i(e){var t=u(e);return t?i(t["extends"]).concat([t]):[]}function a(e){for(var t,n=e["extends"],o=0;t=e.ancestry[o];o++)n=t.is&&t.tag;e.tag=n||e.__name,n&&(e.is=e.__name)}function d(e){if(!Object.__proto__){var t=HTMLElement.prototype;if(e.is){var n=document.createElement(e.tag);t=Object.getPrototypeOf(n)}for(var o,r=e.prototype,i=!1;r;)r==t&&(i=!0),o=Object.getPrototypeOf(r),o&&(r.__proto__=o),r=o;i||console.warn(e.tag+" prototype not found in prototype chain for "+e.is),e["native"]=t}}function s(e){return g(M(e.tag),e)}function u(e){return e?y[e.toLowerCase()]:void 0}function c(e,t){y[e]=t}function l(e){return function(){return s(e)}}function f(e,t,n){return e===N?p(t,n):O(e,t)}function p(e,t){e&&(e=e.toLowerCase()),t&&(t=t.toLowerCase());var n=u(t||e);if(n){if(e==n.tag&&t==n.is)return new n.ctor;if(!t&&!n.is)return new n.ctor}var o;return t?(o=p(e),o.setAttribute("is",t),o):(o=M(e),e.indexOf("-")>=0&&b(o,HTMLElement),o)}function m(e,t){var n=e[t];e[t]=function(){var e=n.apply(this,arguments);return h(e),e}}var w,v=(e.isIE,e.upgradeDocumentTree),h=e.upgradeAll,g=e.upgradeWithDefinition,b=e.implementPrototype,E=e.useNative,_=["annotation-xml","color-profile","font-face","font-face-src","font-face-uri","font-face-format","font-face-name","missing-glyph"],y={},N="http://www.w3.org/1999/xhtml",M=document.createElement.bind(document),O=document.createElementNS.bind(document);w=Object.__proto__||E?function(e,t){return e instanceof t}:function(e,t){if(e instanceof t)return!0;for(var n=e;n;){if(n===t.prototype)return!0;n=n.__proto__}return!1},m(Node.prototype,"cloneNode"),m(document,"importNode"),document.registerElement=t,document.createElement=p,document.createElementNS=f,e.registry=y,e["instanceof"]=w,e.reservedTagList=_,e.getRegisteredDefinition=u,document.register=document.registerElement}),function(e){function t(){i(window.wrap(document)),window.CustomElements.ready=!0;var e=window.requestAnimationFrame||function(e){setTimeout(e,16)};e(function(){setTimeout(function(){window.CustomElements.readyTime=Date.now(),window.HTMLImports&&(window.CustomElements.elapsed=window.CustomElements.readyTime-window.HTMLImports.readyTime),document.dispatchEvent(new CustomEvent("WebComponentsReady",{bubbles:!0}))})})}var n=e.useNative,o=e.initializeModules;e.isIE;if(n){var r=function(){};e.watchShadow=r,e.upgrade=r,e.upgradeAll=r,e.upgradeDocumentTree=r,e.upgradeSubtree=r,e.takeRecords=r,e["instanceof"]=function(e,t){return e instanceof t}}else o();var i=e.upgradeDocumentTree,a=e.upgradeDocument;if(window.wrap||(window.ShadowDOMPolyfill?(window.wrap=window.ShadowDOMPolyfill.wrapIfNeeded,window.unwrap=window.ShadowDOMPolyfill.unwrapIfNeeded):window.wrap=window.unwrap=function(e){return e}),window.HTMLImports&&(window.HTMLImports.__importsParsingHook=function(e){e["import"]&&a(wrap(e["import"]))}),"complete"===document.readyState||e.flags.eager)t();else if("interactive"!==document.readyState||window.attachEvent||window.HTMLImports&&!window.HTMLImports.ready){var d=window.HTMLImports&&!window.HTMLImports.ready?"HTMLImportsLoaded":"DOMContentLoaded";window.addEventListener(d,t)}else t()}(window.CustomElements); +if(typeof Math.imul == "undefined" || (Math.imul(0xffffffff,5) == 0)) { + Math.imul = function (a, b) { + var ah = (a >>> 16) & 0xffff; + var al = a & 0xffff; + var bh = (b >>> 16) & 0xffff; + var bl = b & 0xffff; + // the shift by 0 fixes the sign on the high part + // the final |0 converts the unsigned value into a signed value + return ((al * bl) + (((ah * bl + al * bh) << 16) >>> 0)|0); + } +} + +/** + * React v15.5.4 + * + * Copyright 2013-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + * + */ +!function(t){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=t();else if("function"==typeof define&&define.amd)define([],t);else{var e;e="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this,e.React=t()}}(function(){return function t(e,n,r){function o(u,a){if(!n[u]){if(!e[u]){var s="function"==typeof require&&require;if(!a&&s)return s(u,!0);if(i)return i(u,!0);var c=new Error("Cannot find module '"+u+"'");throw c.code="MODULE_NOT_FOUND",c}var l=n[u]={exports:{}};e[u][0].call(l.exports,function(t){var n=e[u][1][t];return o(n||t)},l,l.exports,t,e,n,r)}return n[u].exports}for(var i="function"==typeof require&&require,u=0;u1){for(var y=Array(d),h=0;h1){for(var m=Array(v),b=0;b8&&C<=11),x=32,w=String.fromCharCode(x),T={beforeInput:{phasedRegistrationNames:{bubbled:"onBeforeInput",captured:"onBeforeInputCapture"},dependencies:["topCompositionEnd","topKeyPress","topTextInput","topPaste"]},compositionEnd:{phasedRegistrationNames:{bubbled:"onCompositionEnd",captured:"onCompositionEndCapture"},dependencies:["topBlur","topCompositionEnd","topKeyDown","topKeyPress","topKeyUp","topMouseDown"]},compositionStart:{phasedRegistrationNames:{bubbled:"onCompositionStart",captured:"onCompositionStartCapture"},dependencies:["topBlur","topCompositionStart","topKeyDown","topKeyPress","topKeyUp","topMouseDown"]},compositionUpdate:{phasedRegistrationNames:{bubbled:"onCompositionUpdate",captured:"onCompositionUpdateCapture"},dependencies:["topBlur","topCompositionUpdate","topKeyDown","topKeyPress","topKeyUp","topMouseDown"]}},k=!1,P=null,S={eventTypes:T,extractEvents:function(e,t,n,r){return[u(e,t,n,r),p(e,t,n,r)]}};t.exports=S},{123:123,19:19,20:20,78:78,82:82}],4:[function(e,t,n){"use strict";function r(e,t){return e+t.charAt(0).toUpperCase()+t.substring(1)}var o={animationIterationCount:!0,borderImageOutset:!0,borderImageSlice:!0,borderImageWidth:!0,boxFlex:!0,boxFlexGroup:!0,boxOrdinalGroup:!0,columnCount:!0,flex:!0,flexGrow:!0,flexPositive:!0,flexShrink:!0,flexNegative:!0,flexOrder:!0,gridRow:!0,gridColumn:!0,fontWeight:!0,lineClamp:!0,lineHeight:!0,opacity:!0,order:!0,orphans:!0,tabSize:!0,widows:!0,zIndex:!0,zoom:!0,fillOpacity:!0,floodOpacity:!0,stopOpacity:!0,strokeDasharray:!0,strokeDashoffset:!0,strokeMiterlimit:!0,strokeOpacity:!0,strokeWidth:!0},i=["Webkit","ms","Moz","O"];Object.keys(o).forEach(function(e){i.forEach(function(t){o[r(t,e)]=o[e]})});var a={background:{backgroundAttachment:!0,backgroundColor:!0,backgroundImage:!0,backgroundPositionX:!0,backgroundPositionY:!0,backgroundRepeat:!0},backgroundPosition:{backgroundPositionX:!0,backgroundPositionY:!0},border:{borderWidth:!0,borderStyle:!0,borderColor:!0},borderBottom:{borderBottomWidth:!0,borderBottomStyle:!0,borderBottomColor:!0},borderLeft:{borderLeftWidth:!0,borderLeftStyle:!0,borderLeftColor:!0},borderRight:{borderRightWidth:!0,borderRightStyle:!0,borderRightColor:!0},borderTop:{borderTopWidth:!0,borderTopStyle:!0,borderTopColor:!0},font:{fontStyle:!0,fontVariant:!0,fontWeight:!0,fontSize:!0,lineHeight:!0,fontFamily:!0},outline:{outlineWidth:!0,outlineStyle:!0,outlineColor:!0}},s={isUnitlessNumber:o,shorthandPropertyExpansions:a};t.exports=s},{}],5:[function(e,t,n){"use strict";var r=e(4),o=e(123),i=(e(58),e(125),e(94)),a=e(136),s=e(140),u=(e(142),s(function(e){return a(e)})),l=!1,c="cssFloat";if(o.canUseDOM){var p=document.createElement("div").style;try{p.font=""}catch(e){l=!0}void 0===document.documentElement.style.cssFloat&&(c="styleFloat")}var d={createMarkupForStyles:function(e,t){var n="";for(var r in e)if(e.hasOwnProperty(r)){var o=e[r];null!=o&&(n+=u(r)+":",n+=i(r,o,t)+";")}return n||null},setValueForStyles:function(e,t,n){var o=e.style;for(var a in t)if(t.hasOwnProperty(a)){var s=i(a,t[a],n);if("float"!==a&&"cssFloat"!==a||(a=c),s)o[a]=s;else{var u=l&&r.shorthandPropertyExpansions[a];if(u)for(var p in u)o[p]="";else o[a]=""}}}};t.exports=d},{123:123,125:125,136:136,140:140,142:142,4:4,58:58,94:94}],6:[function(e,t,n){"use strict";function r(e,t){if(!(e instanceof t))throw new TypeError("Cannot call a class as a function")}var o=e(112),i=e(24),a=(e(137),function(){function e(t){r(this,e),this._callbacks=null,this._contexts=null,this._arg=t}return e.prototype.enqueue=function(e,t){this._callbacks=this._callbacks||[],this._callbacks.push(e),this._contexts=this._contexts||[],this._contexts.push(t)},e.prototype.notifyAll=function(){var e=this._callbacks,t=this._contexts,n=this._arg;if(e&&t){e.length!==t.length&&o("24"),this._callbacks=null,this._contexts=null;for(var r=0;r8));var A=!1;b.canUseDOM&&(A=k("input")&&(!document.documentMode||document.documentMode>11));var D={get:function(){return O.get.call(this)},set:function(e){I=""+e,O.set.call(this,e)}},L={eventTypes:S,extractEvents:function(e,t,n,o){var i,a,s=t?E.getNodeFromInstance(t):window;if(r(s)?R?i=u:a=l:P(s)?A?i=f:(i=m,a=h):v(s)&&(i=g),i){var c=i(e,t);if(c){var p=w.getPooled(S.change,c,n,o);return p.type="change",C.accumulateTwoPhaseDispatches(p),p}}a&&a(e,s,t),"topBlur"===e&&y(t,s)}};t.exports=L},{102:102,109:109,110:110,123:123,16:16,19:19,33:33,71:71,80:80}],8:[function(e,t,n){"use strict";function r(e,t){return Array.isArray(t)&&(t=t[1]),t?t.nextSibling:e.firstChild}function o(e,t,n){c.insertTreeBefore(e,t,n)}function i(e,t,n){Array.isArray(t)?s(e,t[0],t[1],n):m(e,t,n)}function a(e,t){if(Array.isArray(t)){var n=t[1];t=t[0],u(e,t,n),e.removeChild(n)}e.removeChild(t)}function s(e,t,n,r){for(var o=t;;){var i=o.nextSibling;if(m(e,o,r),o===n)break;o=i}}function u(e,t,n){for(;;){var r=t.nextSibling;if(r===n)break;e.removeChild(r)}}function l(e,t,n){var r=e.parentNode,o=e.nextSibling;o===t?n&&m(r,document.createTextNode(n),o):n?(h(o,n),u(r,o,t)):u(r,e,t)}var c=e(9),p=e(13),d=(e(33),e(58),e(93)),f=e(114),h=e(115),m=d(function(e,t,n){e.insertBefore(t,n)}),v=p.dangerouslyReplaceNodeWithMarkup,g={dangerouslyReplaceNodeWithMarkup:v,replaceDelimitedText:l,processUpdates:function(e,t){for(var n=0;n-1||a("96",e),!l.plugins[n]){t.extractEvents||a("97",e),l.plugins[n]=t;var r=t.eventTypes;for(var i in r)o(r[i],t,i)||a("98",i,e)}}}function o(e,t,n){l.eventNameDispatchConfigs.hasOwnProperty(n)&&a("99",n),l.eventNameDispatchConfigs[n]=e;var r=e.phasedRegistrationNames;if(r){for(var o in r)if(r.hasOwnProperty(o)){var s=r[o];i(s,t,n)}return!0}return!!e.registrationName&&(i(e.registrationName,t,n),!0)}function i(e,t,n){l.registrationNameModules[e]&&a("100",e),l.registrationNameModules[e]=t,l.registrationNameDependencies[e]=t.eventTypes[n].dependencies}var a=e(112),s=(e(137),null),u={},l={plugins:[],eventNameDispatchConfigs:{},registrationNameModules:{},registrationNameDependencies:{},possibleRegistrationNames:null,injectEventPluginOrder:function(e){s&&a("101"),s=Array.prototype.slice.call(e),r()},injectEventPluginsByName:function(e){var t=!1;for(var n in e)if(e.hasOwnProperty(n)){var o=e[n];u.hasOwnProperty(n)&&u[n]===o||(u[n]&&a("102",n),u[n]=o,t=!0)}t&&r()},getPluginModuleForEvent:function(e){var t=e.dispatchConfig;if(t.registrationName)return l.registrationNameModules[t.registrationName]||null;if(void 0!==t.phasedRegistrationNames){var n=t.phasedRegistrationNames;for(var r in n)if(n.hasOwnProperty(r)){var o=l.registrationNameModules[n[r]];if(o)return o}}return null},_resetEventPlugins:function(){s=null;for(var e in u)u.hasOwnProperty(e)&&delete u[e];l.plugins.length=0;var t=l.eventNameDispatchConfigs;for(var n in t)t.hasOwnProperty(n)&&delete t[n];var r=l.registrationNameModules;for(var o in r)r.hasOwnProperty(o)&&delete r[o]}};t.exports=l},{112:112,137:137}],18:[function(e,t,n){"use strict";function r(e){return"topMouseUp"===e||"topTouchEnd"===e||"topTouchCancel"===e}function o(e){return"topMouseMove"===e||"topTouchMove"===e}function i(e){return"topMouseDown"===e||"topTouchStart"===e}function a(e,t,n,r){var o=e.type||"unknown-event";e.currentTarget=g.getNodeFromInstance(r),t?m.invokeGuardedCallbackWithCatch(o,n,e):m.invokeGuardedCallback(o,n,e),e.currentTarget=null}function s(e,t){var n=e._dispatchListeners,r=e._dispatchInstances;if(Array.isArray(n))for(var o=0;o1?1-t:void 0;return this._fallbackText=o.slice(e,s),this._fallbackText}}),i.addPoolingTo(r),t.exports=r},{106:106,143:143,24:24}],21:[function(e,t,n){"use strict";var r=e(11),o=r.injection.MUST_USE_PROPERTY,i=r.injection.HAS_BOOLEAN_VALUE,a=r.injection.HAS_NUMERIC_VALUE,s=r.injection.HAS_POSITIVE_NUMERIC_VALUE,u=r.injection.HAS_OVERLOADED_BOOLEAN_VALUE,l={isCustomAttribute:RegExp.prototype.test.bind(new RegExp("^(data|aria)-["+r.ATTRIBUTE_NAME_CHAR+"]*$")),Properties:{accept:0,acceptCharset:0,accessKey:0,action:0,allowFullScreen:i,allowTransparency:0,alt:0,as:0,async:i,autoComplete:0,autoPlay:i,capture:i,cellPadding:0,cellSpacing:0,charSet:0,challenge:0,checked:o|i,cite:0,classID:0,className:0,cols:s,colSpan:0,content:0,contentEditable:0,contextMenu:0,controls:i,coords:0,crossOrigin:0,data:0,dateTime:0,default:i,defer:i,dir:0,disabled:i,download:u,draggable:0,encType:0,form:0,formAction:0,formEncType:0,formMethod:0,formNoValidate:i,formTarget:0,frameBorder:0,headers:0,height:0,hidden:i,high:0,href:0,hrefLang:0,htmlFor:0,httpEquiv:0,icon:0,id:0,inputMode:0,integrity:0,is:0,keyParams:0,keyType:0,kind:0,label:0,lang:0,list:0,loop:i,low:0,manifest:0,marginHeight:0,marginWidth:0,max:0,maxLength:0,media:0,mediaGroup:0,method:0,min:0,minLength:0,multiple:o|i,muted:o|i,name:0,nonce:0,noValidate:i,open:i,optimum:0,pattern:0,placeholder:0,playsInline:i,poster:0,preload:0,profile:0,radioGroup:0,readOnly:i,referrerPolicy:0,rel:0,required:i,reversed:i,role:0,rows:s,rowSpan:a,sandbox:0,scope:0,scoped:i,scrolling:0,seamless:i,selected:o|i,shape:0,size:s,sizes:0,span:s,spellCheck:0,src:0,srcDoc:0,srcLang:0,srcSet:0,start:a,step:0,style:0,summary:0,tabIndex:0,target:0,title:0,type:0,useMap:0,value:0,width:0,wmode:0,wrap:0,about:0,datatype:0,inlist:0,prefix:0,property:0,resource:0,typeof:0,vocab:0,autoCapitalize:0,autoCorrect:0,autoSave:0,color:0,itemProp:0,itemScope:i,itemType:0,itemID:0,itemRef:0,results:0,security:0,unselectable:0},DOMAttributeNames:{acceptCharset:"accept-charset",className:"class",htmlFor:"for",httpEquiv:"http-equiv"},DOMPropertyNames:{},DOMMutationMethods:{value:function(e,t){if(null==t)return e.removeAttribute("value");"number"!==e.type||!1===e.hasAttribute("value")?e.setAttribute("value",""+t):e.validity&&!e.validity.badInput&&e.ownerDocument.activeElement!==e&&e.setAttribute("value",""+t)}}};t.exports=l},{11:11}],22:[function(e,t,n){"use strict";function r(e){var t={"=":"=0",":":"=2"};return"$"+(""+e).replace(/[=:]/g,function(e){return t[e]})}function o(e){var t={"=0":"=","=2":":"};return(""+("."===e[0]&&"$"===e[1]?e.substring(2):e.substring(1))).replace(/(=0|=2)/g,function(e){return t[e]})}var i={escape:r,unescape:o};t.exports=i},{}],23:[function(e,t,n){"use strict";function r(e){null!=e.checkedLink&&null!=e.valueLink&&s("87")}function o(e){r(e),(null!=e.value||null!=e.onChange)&&s("88")}function i(e){r(e),(null!=e.checked||null!=e.onChange)&&s("89")}function a(e){if(e){var t=e.getName();if(t)return" Check the render method of `"+t+"`."}return""}var s=e(112),u=e(64),l=e(145),c=e(120),p=l(c.isValidElement),d=(e(137),e(142),{button:!0,checkbox:!0,image:!0,hidden:!0,radio:!0,reset:!0,submit:!0}),f={value:function(e,t,n){return!e[t]||d[e.type]||e.onChange||e.readOnly||e.disabled?null:new Error("You provided a `value` prop to a form field without an `onChange` handler. This will render a read-only field. If the field should be mutable use `defaultValue`. Otherwise, set either `onChange` or `readOnly`.")},checked:function(e,t,n){return!e[t]||e.onChange||e.readOnly||e.disabled?null:new Error("You provided a `checked` prop to a form field without an `onChange` handler. This will render a read-only field. If the field should be mutable use `defaultChecked`. Otherwise, set either `onChange` or `readOnly`.")},onChange:p.func},h={},m={checkPropTypes:function(e,t,n){for(var r in f){if(f.hasOwnProperty(r))var o=f[r](t,r,e,"prop",null,u);o instanceof Error&&!(o.message in h)&&(h[o.message]=!0,a(n))}},getValue:function(e){return e.valueLink?(o(e),e.valueLink.value):e.value},getChecked:function(e){return e.checkedLink?(i(e),e.checkedLink.value):e.checked},executeOnChange:function(e,t){return e.valueLink?(o(e),e.valueLink.requestChange(t.target.value)):e.checkedLink?(i(e),e.checkedLink.requestChange(t.target.checked)):e.onChange?e.onChange.call(void 0,t):void 0}};t.exports=m},{112:112,120:120,137:137,142:142,145:145,64:64}],24:[function(e,t,n){"use strict";var r=e(112),o=(e(137),function(e){var t=this;if(t.instancePool.length){var n=t.instancePool.pop();return t.call(n,e),n}return new t(e)}),i=function(e,t){var n=this;if(n.instancePool.length){var r=n.instancePool.pop();return n.call(r,e,t),r}return new n(e,t)},a=function(e,t,n){var r=this;if(r.instancePool.length){var o=r.instancePool.pop();return r.call(o,e,t,n),o}return new r(e,t,n)},s=function(e,t,n,r){var o=this;if(o.instancePool.length){var i=o.instancePool.pop();return o.call(i,e,t,n,r),i}return new o(e,t,n,r)},u=function(e){var t=this;e instanceof t||r("25"),e.destructor(),t.instancePool.length=0||null!=t.is}function h(e){var t=e.type;d(t),this._currentElement=e,this._tag=t.toLowerCase(),this._namespaceURI=null,this._renderedChildren=null,this._previousStyle=null,this._previousStyleCopy=null,this._hostNode=null,this._hostParent=null,this._rootNodeID=0,this._domID=0,this._hostContainerInfo=null,this._wrapperState=null,this._topLevelWrapper=null,this._flags=0}var m=e(112),v=e(143),g=e(2),y=e(5),_=e(9),C=e(10),b=e(11),E=e(12),x=e(16),w=e(17),T=e(25),k=e(32),P=e(33),S=e(38),N=e(39),M=e(40),I=e(43),O=(e(58),e(61)),R=e(68),A=(e(129),e(95)),D=(e(137),e(109),e(141),e(118),e(142),k),L=x.deleteListener,U=P.getNodeFromInstance,F=T.listenTo,j=w.registrationNameModules,V={string:!0,number:!0},B="__html",W={children:null,dangerouslySetInnerHTML:null,suppressContentEditableWarning:null},H=11,q={topAbort:"abort",topCanPlay:"canplay",topCanPlayThrough:"canplaythrough",topDurationChange:"durationchange",topEmptied:"emptied",topEncrypted:"encrypted",topEnded:"ended",topError:"error",topLoadedData:"loadeddata",topLoadedMetadata:"loadedmetadata",topLoadStart:"loadstart",topPause:"pause",topPlay:"play",topPlaying:"playing",topProgress:"progress",topRateChange:"ratechange",topSeeked:"seeked",topSeeking:"seeking",topStalled:"stalled",topSuspend:"suspend",topTimeUpdate:"timeupdate",topVolumeChange:"volumechange",topWaiting:"waiting"},K={area:!0,base:!0,br:!0,col:!0,embed:!0,hr:!0,img:!0,input:!0,keygen:!0,link:!0,meta:!0,param:!0,source:!0,track:!0,wbr:!0},z={listing:!0,pre:!0,textarea:!0},Y=v({menuitem:!0},K),X=/^[a-zA-Z][a-zA-Z:_\.\-\d]*$/,Q={},G={}.hasOwnProperty,$=1;h.displayName="ReactDOMComponent",h.Mixin={mountComponent:function(e,t,n,r){this._rootNodeID=$++,this._domID=n._idCounter++,this._hostParent=t,this._hostContainerInfo=n;var i=this._currentElement.props;switch(this._tag){case"audio":case"form":case"iframe":case"img":case"link":case"object":case"source":case"video":this._wrapperState={listeners:null},e.getReactMountReady().enqueue(c,this);break;case"input":S.mountWrapper(this,i,t),i=S.getHostProps(this,i),e.getReactMountReady().enqueue(c,this);break;case"option":N.mountWrapper(this,i,t),i=N.getHostProps(this,i);break;case"select":M.mountWrapper(this,i,t),i=M.getHostProps(this,i),e.getReactMountReady().enqueue(c,this);break;case"textarea":I.mountWrapper(this,i,t),i=I.getHostProps(this,i),e.getReactMountReady().enqueue(c,this)}o(this,i);var a,p;null!=t?(a=t._namespaceURI,p=t._tag):n._tag&&(a=n._namespaceURI,p=n._tag),(null==a||a===C.svg&&"foreignobject"===p)&&(a=C.html),a===C.html&&("svg"===this._tag?a=C.svg:"math"===this._tag&&(a=C.mathml)),this._namespaceURI=a;var d;if(e.useCreateElement){var f,h=n._ownerDocument;if(a===C.html)if("script"===this._tag){var m=h.createElement("div"),v=this._currentElement.type;m.innerHTML="<"+v+">",f=m.removeChild(m.firstChild)}else f=i.is?h.createElement(this._currentElement.type,i.is):h.createElement(this._currentElement.type);else f=h.createElementNS(a,this._currentElement.type);P.precacheNode(this,f),this._flags|=D.hasCachedChildNodes,this._hostParent||E.setAttributeForRoot(f),this._updateDOMProperties(null,i,e);var y=_(f);this._createInitialChildren(e,i,r,y),d=y}else{var b=this._createOpenTagMarkupAndPutListeners(e,i),x=this._createContentMarkup(e,i,r);d=!x&&K[this._tag]?b+"/>":b+">"+x+""}switch(this._tag){case"input":e.getReactMountReady().enqueue(s,this),i.autoFocus&&e.getReactMountReady().enqueue(g.focusDOMComponent,this);break;case"textarea":e.getReactMountReady().enqueue(u,this),i.autoFocus&&e.getReactMountReady().enqueue(g.focusDOMComponent,this);break;case"select":case"button":i.autoFocus&&e.getReactMountReady().enqueue(g.focusDOMComponent,this);break;case"option":e.getReactMountReady().enqueue(l,this)}return d},_createOpenTagMarkupAndPutListeners:function(e,t){var n="<"+this._currentElement.type;for(var r in t)if(t.hasOwnProperty(r)){var o=t[r];if(null!=o)if(j.hasOwnProperty(r))o&&i(this,r,o,e);else{"style"===r&&(o&&(o=this._previousStyleCopy=v({},t.style)),o=y.createMarkupForStyles(o,this));var a=null;null!=this._tag&&f(this._tag,t)?W.hasOwnProperty(r)||(a=E.createMarkupForCustomAttribute(r,o)):a=E.createMarkupForProperty(r,o),a&&(n+=" "+a)}}return e.renderToStaticMarkup?n:(this._hostParent||(n+=" "+E.createMarkupForRoot()),n+=" "+E.createMarkupForID(this._domID))},_createContentMarkup:function(e,t,n){var r="",o=t.dangerouslySetInnerHTML;if(null!=o)null!=o.__html&&(r=o.__html);else{var i=V[typeof t.children]?t.children:null,a=null!=i?null:t.children;if(null!=i)r=A(i);else if(null!=a){var s=this.mountChildren(a,e,n);r=s.join("")}}return z[this._tag]&&"\n"===r.charAt(0)?"\n"+r:r},_createInitialChildren:function(e,t,n,r){var o=t.dangerouslySetInnerHTML;if(null!=o)null!=o.__html&&_.queueHTML(r,o.__html);else{var i=V[typeof t.children]?t.children:null,a=null!=i?null:t.children;if(null!=i)""!==i&&_.queueText(r,i);else if(null!=a)for(var s=this.mountChildren(a,e,n),u=0;u"},receiveComponent:function(){},getHostNode:function(){return i.getNodeFromInstance(this)},unmountComponent:function(){i.uncacheNode(this)}}),t.exports=a},{143:143,33:33,9:9}],36:[function(e,t,n){"use strict";var r={useCreateElement:!0,useFiber:!1};t.exports=r},{}],37:[function(e,t,n){"use strict";var r=e(8),o=e(33),i={dangerouslyProcessChildrenUpdates:function(e,t){var n=o.getNodeFromInstance(e);r.processUpdates(n,t)}};t.exports=i},{33:33,8:8}],38:[function(e,t,n){"use strict";function r(){this._rootNodeID&&d.updateWrapper(this)}function o(e){return"checkbox"===e.type||"radio"===e.type?null!=e.checked:null!=e.value}function i(e){var t=this._currentElement.props,n=l.executeOnChange(t,e);p.asap(r,this);var o=t.name;if("radio"===t.type&&null!=o){for(var i=c.getNodeFromInstance(this),s=i;s.parentNode;)s=s.parentNode;for(var u=s.querySelectorAll("input[name="+JSON.stringify(""+o)+'][type="radio"]'),d=0;dt.end?(n=t.end,r=t.start):(n=t.start,r=t.end),o.moveToElementText(e),o.moveStart("character",n),o.setEndPoint("EndToStart",o),o.moveEnd("character",r-n),o.select()}function s(e,t){if(window.getSelection){var n=window.getSelection(),r=e[c()].length,o=Math.min(t.start,r),i=void 0===t.end?o:Math.min(t.end,r);if(!n.extend&&o>i){var a=i;i=o,o=a}var s=l(e,o),u=l(e,i);if(s&&u){var p=document.createRange();p.setStart(s.node,s.offset),n.removeAllRanges(),o>i?(n.addRange(p),n.extend(u.node,u.offset)):(p.setEnd(u.node,u.offset),n.addRange(p))}}}var u=e(123),l=e(105),c=e(106),p=u.canUseDOM&&"selection"in document&&!("getSelection"in window),d={getOffsets:p?o:i,setOffsets:p?a:s};t.exports=d},{105:105,106:106,123:123}],42:[function(e,t,n){"use strict";var r=e(112),o=e(143),i=e(8),a=e(9),s=e(33),u=e(95),l=(e(137),e(118),function(e){this._currentElement=e,this._stringText=""+e, +this._hostNode=null,this._hostParent=null,this._domID=0,this._mountIndex=0,this._closingComment=null,this._commentNodes=null});o(l.prototype,{mountComponent:function(e,t,n,r){var o=n._idCounter++,i=" react-text: "+o+" ";if(this._domID=o,this._hostParent=t,e.useCreateElement){var l=n._ownerDocument,c=l.createComment(i),p=l.createComment(" /react-text "),d=a(l.createDocumentFragment());return a.queueChild(d,a(c)),this._stringText&&a.queueChild(d,a(l.createTextNode(this._stringText))),a.queueChild(d,a(p)),s.precacheNode(this,c),this._closingComment=p,d}var f=u(this._stringText);return e.renderToStaticMarkup?f:""+f+""},receiveComponent:function(e,t){if(e!==this._currentElement){this._currentElement=e;var n=""+e;if(n!==this._stringText){this._stringText=n;var r=this.getHostNode();i.replaceDelimitedText(r[0],r[1],n)}}},getHostNode:function(){var e=this._commentNodes;if(e)return e;if(!this._closingComment)for(var t=s.getNodeFromInstance(this),n=t.nextSibling;;){if(null==n&&r("67",this._domID),8===n.nodeType&&" /react-text "===n.nodeValue){this._closingComment=n;break}n=n.nextSibling}return e=[this._hostNode,this._closingComment],this._commentNodes=e,e},unmountComponent:function(){this._closingComment=null,this._commentNodes=null,s.uncacheNode(this)}}),t.exports=l},{112:112,118:118,137:137,143:143,33:33,8:8,9:9,95:95}],43:[function(e,t,n){"use strict";function r(){this._rootNodeID&&c.updateWrapper(this)}function o(e){var t=this._currentElement.props,n=s.executeOnChange(t,e);return l.asap(r,this),n}var i=e(112),a=e(143),s=e(23),u=e(33),l=e(71),c=(e(137),e(142),{getHostProps:function(e,t){return null!=t.dangerouslySetInnerHTML&&i("91"),a({},t,{value:void 0,defaultValue:void 0,children:""+e._wrapperState.initialValue,onChange:e._wrapperState.onChange})},mountWrapper:function(e,t){var n=s.getValue(t),r=n;if(null==n){var a=t.defaultValue,u=t.children;null!=u&&(null!=a&&i("92"),Array.isArray(u)&&(u.length<=1||i("93"),u=u[0]),a=""+u),null==a&&(a=""),r=a}e._wrapperState={initialValue:""+r,listeners:null,onChange:o.bind(e)}},updateWrapper:function(e){var t=e._currentElement.props,n=u.getNodeFromInstance(e),r=s.getValue(t);if(null!=r){var o=""+r;o!==n.value&&(n.value=o),null==t.defaultValue&&(n.defaultValue=o)}null!=t.defaultValue&&(n.defaultValue=t.defaultValue)},postMountWrapper:function(e){var t=u.getNodeFromInstance(e),n=t.textContent;n===e._wrapperState.initialValue&&(t.value=n)}});t.exports=c},{112:112,137:137,142:142,143:143,23:23,33:33,71:71}],44:[function(e,t,n){"use strict";function r(e,t){"_hostNode"in e||u("33"),"_hostNode"in t||u("33");for(var n=0,r=e;r;r=r._hostParent)n++;for(var o=0,i=t;i;i=i._hostParent)o++;for(;n-o>0;)e=e._hostParent,n--;for(;o-n>0;)t=t._hostParent,o--;for(var a=n;a--;){if(e===t)return e;e=e._hostParent,t=t._hostParent}return null}function o(e,t){"_hostNode"in e||u("35"),"_hostNode"in t||u("35");for(;t;){if(t===e)return!0;t=t._hostParent}return!1}function i(e){return"_hostNode"in e||u("36"),e._hostParent}function a(e,t,n){for(var r=[];e;)r.push(e),e=e._hostParent;var o;for(o=r.length;o-- >0;)t(r[o],"captured",n);for(o=0;o0;)n(u[l],"captured",i)}var u=e(112);e(137);t.exports={isAncestor:o,getLowestCommonAncestor:r,getParentInstance:i,traverseTwoPhase:a,traverseEnterLeave:s}},{112:112,137:137}],45:[function(e,t,n){"use strict";var r=e(120),o=e(30),i=o;r.addons&&(r.__SECRET_INJECTED_REACT_DOM_DO_NOT_USE_OR_YOU_WILL_BE_FIRED=i),t.exports=i},{120:120,30:30}],46:[function(e,t,n){"use strict";function r(){this.reinitializeTransaction()}var o=e(143),i=e(71),a=e(89),s=e(129),u={initialize:s,close:function(){d.isBatchingUpdates=!1}},l={initialize:s,close:i.flushBatchedUpdates.bind(i)},c=[l,u];o(r.prototype,a,{getTransactionWrappers:function(){return c}});var p=new r,d={isBatchingUpdates:!1,batchedUpdates:function(e,t,n,r,o,i){var a=d.isBatchingUpdates;return d.isBatchingUpdates=!0,a?e(t,n,r,o,i):p.perform(e,null,t,n,r,o,i)}};t.exports=d},{129:129,143:143,71:71,89:89}],47:[function(e,t,n){"use strict";function r(){x||(x=!0,y.EventEmitter.injectReactEventListener(g),y.EventPluginHub.injectEventPluginOrder(s),y.EventPluginUtils.injectComponentTree(d),y.EventPluginUtils.injectTreeTraversal(h),y.EventPluginHub.injectEventPluginsByName({SimpleEventPlugin:E,EnterLeaveEventPlugin:u,ChangeEventPlugin:a,SelectEventPlugin:b,BeforeInputEventPlugin:i}),y.HostComponent.injectGenericComponentClass(p),y.HostComponent.injectTextComponentClass(m),y.DOMProperty.injectDOMPropertyConfig(o),y.DOMProperty.injectDOMPropertyConfig(l),y.DOMProperty.injectDOMPropertyConfig(C),y.EmptyComponent.injectEmptyComponentFactory(function(e){return new f(e)}),y.Updates.injectReconcileTransaction(_),y.Updates.injectBatchingStrategy(v),y.Component.injectEnvironment(c))}var o=e(1),i=e(3),a=e(7),s=e(14),u=e(15),l=e(21),c=e(27),p=e(31),d=e(33),f=e(35),h=e(44),m=e(42),v=e(46),g=e(52),y=e(55),_=e(65),C=e(73),b=e(74),E=e(75),x=!1;t.exports={inject:r}},{1:1,14:14,15:15,21:21,27:27,3:3,31:31,33:33,35:35,42:42,44:44,46:46,52:52,55:55,65:65,7:7,73:73,74:74,75:75}],48:[function(e,t,n){"use strict";var r="function"==typeof Symbol&&Symbol.for&&Symbol.for("react.element")||60103;t.exports=r},{}],49:[function(e,t,n){"use strict";var r,o={injectEmptyComponentFactory:function(e){r=e}},i={create:function(e){return r(e)}};i.injection=o,t.exports=i},{}],50:[function(e,t,n){"use strict";function r(e,t,n){try{t(n)}catch(e){null===o&&(o=e)}}var o=null,i={invokeGuardedCallback:r,invokeGuardedCallbackWithCatch:r,rethrowCaughtError:function(){if(o){var e=o;throw o=null,e}}};t.exports=i},{}],51:[function(e,t,n){"use strict";function r(e){o.enqueueEvents(e),o.processEventQueue(!1)}var o=e(16),i={handleTopLevel:function(e,t,n,i){r(o.extractEvents(e,t,n,i))}};t.exports=i},{16:16}],52:[function(e,t,n){"use strict";function r(e){for(;e._hostParent;)e=e._hostParent;var t=p.getNodeFromInstance(e),n=t.parentNode;return p.getClosestInstanceFromNode(n)}function o(e,t){this.topLevelType=e,this.nativeEvent=t,this.ancestors=[]}function i(e){var t=f(e.nativeEvent),n=p.getClosestInstanceFromNode(t),o=n;do{e.ancestors.push(o),o=o&&r(o)}while(o);for(var i=0;i/," "+i.CHECKSUM_ATTR_NAME+'="'+t+'"$&')},canReuseMarkup:function(e,t){var n=t.getAttribute(i.CHECKSUM_ATTR_NAME);return n=n&&parseInt(n,10),r(e)===n}};t.exports=i},{92:92}],60:[function(e,t,n){"use strict";function r(e,t){for(var n=Math.min(e.length,t.length),r=0;r.":"function"==typeof t?" Instead of passing a class like Foo, pass React.createElement(Foo) or .":null!=t&&void 0!==t.props?" This may be caused by unintentionally loading two independent copies of React.":"");var a,s=v.createElement(F,{child:t});if(e){var u=E.get(e);a=u._processChildContext(u._context)}else a=P;var c=d(n);if(c){var p=c._currentElement,h=p.props.child;if(M(h,t)){var m=c._renderedComponent.getPublicInstance(),g=r&&function(){r.call(m)};return j._updateRootComponent(c,s,a,n,g),m}j.unmountComponentAtNode(n)}var y=o(n),_=y&&!!i(y),C=l(n),b=_&&!c&&!C,x=j._renderNewRootComponent(s,n,b,a)._renderedComponent.getPublicInstance();return r&&r.call(x),x},render:function(e,t,n){return j._renderSubtreeIntoContainer(null,e,t,n)},unmountComponentAtNode:function(e){c(e)||f("40");var t=d(e);return t?(delete L[t._instance.rootID],k.batchedUpdates(u,t,e,!1),!0):(l(e),1===e.nodeType&&e.hasAttribute(O),!1)},_mountImageIntoNode:function(e,t,n,i,a){if(c(t)||f("41"),i){var s=o(t);if(x.canReuseMarkup(e,s))return void y.precacheNode(n,s);var u=s.getAttribute(x.CHECKSUM_ATTR_NAME);s.removeAttribute(x.CHECKSUM_ATTR_NAME);var l=s.outerHTML;s.setAttribute(x.CHECKSUM_ATTR_NAME,u);var p=e,d=r(p,l),m=" (client) "+p.substring(d-20,d+20)+"\n (server) "+l.substring(d-20,d+20);t.nodeType===A&&f("42",m)}if(t.nodeType===A&&f("43"),a.useCreateElement){for(;t.lastChild;)t.removeChild(t.lastChild);h.insertTreeBefore(t,e,null)}else N(t,e),y.precacheNode(n,t.firstChild)}};t.exports=j},{108:108,11:11,112:112,114:114,116:116,119:119,120:120,130:130,137:137,142:142,25:25,33:33,34:34,36:36,53:53,57:57,58:58,59:59,66:66,70:70,71:71,9:9}],61:[function(e,t,n){"use strict";function r(e,t,n){return{type:"INSERT_MARKUP",content:e,fromIndex:null,fromNode:null,toIndex:n,afterNode:t}}function o(e,t,n){return{type:"MOVE_EXISTING",content:null,fromIndex:e._mountIndex,fromNode:d.getHostNode(e),toIndex:n,afterNode:t}}function i(e,t){return{type:"REMOVE_NODE",content:null,fromIndex:e._mountIndex,fromNode:t,toIndex:null,afterNode:null}}function a(e){return{type:"SET_MARKUP",content:e,fromIndex:null,fromNode:null,toIndex:null,afterNode:null}}function s(e){return{type:"TEXT_CONTENT",content:e,fromIndex:null,fromNode:null,toIndex:null,afterNode:null}}function u(e,t){return t&&(e=e||[],e.push(t)),e}function l(e,t){p.processChildrenUpdates(e,t)}var c=e(112),p=e(28),d=(e(57),e(58),e(119),e(66)),f=e(26),h=(e(129),e(97)),m=(e(137),{Mixin:{_reconcilerInstantiateChildren:function(e,t,n){return f.instantiateChildren(e,t,n)},_reconcilerUpdateChildren:function(e,t,n,r,o,i){var a;return a=h(t,0),f.updateChildren(e,a,n,r,o,this,this._hostContainerInfo,i,0),a},mountChildren:function(e,t,n){var r=this._reconcilerInstantiateChildren(e,t,n);this._renderedChildren=r;var o=[],i=0;for(var a in r)if(r.hasOwnProperty(a)){var s=r[a],u=d.mountComponent(s,t,this,this._hostContainerInfo,n,0);s._mountIndex=i++,o.push(u)}return o},updateTextContent:function(e){var t=this._renderedChildren;f.unmountChildren(t,!1);for(var n in t)t.hasOwnProperty(n)&&c("118");l(this,[s(e)])},updateMarkup:function(e){var t=this._renderedChildren;f.unmountChildren(t,!1);for(var n in t)t.hasOwnProperty(n)&&c("118");l(this,[a(e)])},updateChildren:function(e,t,n){this._updateChildren(e,t,n)},_updateChildren:function(e,t,n){var r=this._renderedChildren,o={},i=[],a=this._reconcilerUpdateChildren(r,e,i,o,t,n);if(a||r){var s,c=null,p=0,f=0,h=0,m=null;for(s in a)if(a.hasOwnProperty(s)){var v=r&&r[s],g=a[s];v===g?(c=u(c,this.moveChild(v,m,p,f)),f=Math.max(v._mountIndex,f),v._mountIndex=p):(v&&(f=Math.max(v._mountIndex,f)),c=u(c,this._mountChildAtIndex(g,i[h],m,p,t,n)),h++),p++,m=d.getHostNode(g)}for(s in o)o.hasOwnProperty(s)&&(c=u(c,this._unmountChild(r[s],o[s])));c&&l(this,c),this._renderedChildren=a}},unmountChildren:function(e){var t=this._renderedChildren;f.unmountChildren(t,e),this._renderedChildren=null},moveChild:function(e,t,n,r){if(e._mountIndex0&&r.length<20?n+" (keys: "+r.join(", ")+")":n}function i(e,t){var n=s.get(e);return n||null}var a=e(112),s=(e(119),e(57)),u=(e(58),e(71)),l=(e(137),e(142),{isMounted:function(e){var t=s.get(e);return!!t&&!!t._renderedComponent},enqueueCallback:function(e,t,n){l.validateCallback(t,n);var o=i(e);if(!o)return null;o._pendingCallbacks?o._pendingCallbacks.push(t):o._pendingCallbacks=[t],r(o)},enqueueCallbackInternal:function(e,t){e._pendingCallbacks?e._pendingCallbacks.push(t):e._pendingCallbacks=[t],r(e)},enqueueForceUpdate:function(e){var t=i(e,"forceUpdate");t&&(t._pendingForceUpdate=!0,r(t))},enqueueReplaceState:function(e,t,n){var o=i(e,"replaceState");o&&(o._pendingStateQueue=[t],o._pendingReplaceState=!0,void 0!==n&&null!==n&&(l.validateCallback(n,"replaceState"),o._pendingCallbacks?o._pendingCallbacks.push(n):o._pendingCallbacks=[n]),r(o))},enqueueSetState:function(e,t){var n=i(e,"setState");n&&((n._pendingStateQueue||(n._pendingStateQueue=[])).push(t),r(n))},enqueueElementInternal:function(e,t,n){e._pendingElement=t,e._context=n,r(e)},validateCallback:function(e,t){e&&"function"!=typeof e&&a("122",t,o(e))}});t.exports=l},{112:112,119:119,137:137,142:142,57:57,58:58,71:71}],71:[function(e,t,n){"use strict";function r(){P.ReactReconcileTransaction&&b||c("123")}function o(){this.reinitializeTransaction(),this.dirtyComponentsLength=null,this.callbackQueue=d.getPooled(),this.reconcileTransaction=P.ReactReconcileTransaction.getPooled(!0)}function i(e,t,n,o,i,a){return r(),b.batchedUpdates(e,t,n,o,i,a)}function a(e,t){return e._mountOrder-t._mountOrder}function s(e){var t=e.dirtyComponentsLength;t!==g.length&&c("124",t,g.length),g.sort(a),y++;for(var n=0;n]/;t.exports=o},{}],96:[function(e,t,n){"use strict";function r(e){if(null==e)return null;if(1===e.nodeType)return e;var t=a.get(e);if(t)return t=s(t),t?i.getNodeFromInstance(t):null;"function"==typeof e.render?o("44"):o("45",Object.keys(e))}var o=e(112),i=(e(119),e(33)),a=e(57),s=e(103);e(137),e(142);t.exports=r},{103:103,112:112,119:119,137:137,142:142,33:33,57:57}],97:[function(e,t,n){(function(n){"use strict";function r(e,t,n,r){if(e&&"object"==typeof e){var o=e;void 0===o[n]&&null!=t&&(o[n]=t)}}function o(e,t){if(null==e)return e;var n={};return i(e,r,n),n}var i=(e(22),e(117));e(142);void 0!==n&&n.env,t.exports=o}).call(this,void 0)},{117:117,142:142,22:22}],98:[function(e,t,n){"use strict";function r(e,t,n){Array.isArray(e)?e.forEach(t,n):e&&t.call(n,e)}t.exports=r},{}],99:[function(e,t,n){"use strict";function r(e){var t,n=e.keyCode;return"charCode"in e?0===(t=e.charCode)&&13===n&&(t=13):t=n,t>=32||13===t?t:0}t.exports=r},{}],100:[function(e,t,n){"use strict";function r(e){if(e.key){var t=i[e.key]||e.key;if("Unidentified"!==t)return t}if("keypress"===e.type){var n=o(e);return 13===n?"Enter":String.fromCharCode(n)}return"keydown"===e.type||"keyup"===e.type?a[e.keyCode]||"Unidentified":""}var o=e(99),i={Esc:"Escape",Spacebar:" ",Left:"ArrowLeft",Up:"ArrowUp",Right:"ArrowRight",Down:"ArrowDown",Del:"Delete",Win:"OS",Menu:"ContextMenu",Apps:"ContextMenu",Scroll:"ScrollLock",MozPrintableKey:"Unidentified"},a={8:"Backspace",9:"Tab",12:"Clear",13:"Enter",16:"Shift",17:"Control",18:"Alt",19:"Pause",20:"CapsLock",27:"Escape",32:" ",33:"PageUp",34:"PageDown",35:"End",36:"Home",37:"ArrowLeft",38:"ArrowUp",39:"ArrowRight",40:"ArrowDown",45:"Insert",46:"Delete",112:"F1",113:"F2",114:"F3",115:"F4",116:"F5",117:"F6",118:"F7",119:"F8",120:"F9",121:"F10",122:"F11",123:"F12",144:"NumLock",145:"ScrollLock",224:"Meta"};t.exports=r},{99:99}],101:[function(e,t,n){"use strict";function r(e){var t=this,n=t.nativeEvent;if(n.getModifierState)return n.getModifierState(e);var r=i[e];return!!r&&!!n[r]}function o(e){return r}var i={Alt:"altKey",Control:"ctrlKey",Meta:"metaKey",Shift:"shiftKey"};t.exports=o},{}],102:[function(e,t,n){"use strict";function r(e){var t=e.target||e.srcElement||window;return t.correspondingUseElement&&(t=t.correspondingUseElement),3===t.nodeType?t.parentNode:t}t.exports=r},{}],103:[function(e,t,n){"use strict";function r(e){for(var t;(t=e._renderedNodeType)===o.COMPOSITE;)e=e._renderedComponent;return t===o.HOST?e._renderedComponent:t===o.EMPTY?null:void 0}var o=e(62);t.exports=r},{62:62}],104:[function(e,t,n){"use strict";function r(e){var t=e&&(o&&e[o]||e[i]);if("function"==typeof t)return t}var o="function"==typeof Symbol&&Symbol.iterator,i="@@iterator";t.exports=r},{}],105:[function(e,t,n){"use strict";function r(e){for(;e&&e.firstChild;)e=e.firstChild;return e}function o(e){for(;e;){if(e.nextSibling)return e.nextSibling;e=e.parentNode}}function i(e,t){for(var n=r(e),i=0,a=0;n;){if(3===n.nodeType){if(a=i+n.textContent.length,i<=t&&a>=t)return{node:n,offset:t-i};i=a}n=r(o(n))}}t.exports=i},{}],106:[function(e,t,n){"use strict";function r(){return!i&&o.canUseDOM&&(i="textContent"in document.documentElement?"textContent":"innerText"),i}var o=e(123),i=null;t.exports=r},{123:123}],107:[function(e,t,n){"use strict";function r(e,t){var n={};return n[e.toLowerCase()]=t.toLowerCase(),n["Webkit"+e]="webkit"+t,n["Moz"+e]="moz"+t,n["ms"+e]="MS"+t,n["O"+e]="o"+t.toLowerCase(),n}function o(e){if(s[e])return s[e];if(!a[e])return e;var t=a[e];for(var n in t)if(t.hasOwnProperty(n)&&n in u)return s[e]=t[n];return""}var i=e(123),a={animationend:r("Animation","AnimationEnd"),animationiteration:r("Animation","AnimationIteration"),animationstart:r("Animation","AnimationStart"),transitionend:r("Transition","TransitionEnd")},s={},u={};i.canUseDOM&&(u=document.createElement("div").style,"AnimationEvent"in window||(delete a.animationend.animation,delete a.animationiteration.animation,delete a.animationstart.animation),"TransitionEvent"in window||delete a.transitionend.transition),t.exports=o},{123:123}],108:[function(e,t,n){"use strict";function r(e){if(e){var t=e.getName();if(t)return" Check the render method of `"+t+"`."}return""}function o(e){return"function"==typeof e&&void 0!==e.prototype&&"function"==typeof e.prototype.mountComponent&&"function"==typeof e.prototype.receiveComponent}function i(e,t){var n;if(null===e||!1===e)n=l.create(i);else if("object"==typeof e){var s=e,u=s.type;if("function"!=typeof u&&"string"!=typeof u){var d="";d+=r(s._owner),a("130",null==u?u:typeof u,d)}"string"==typeof s.type?n=c.createInternalComponent(s):o(s.type)?(n=new s.type(s),n.getHostNode||(n.getHostNode=n.getNativeNode)):n=new p(s)}else"string"==typeof e||"number"==typeof e?n=c.createInstanceForText(e):a("131",typeof e);return n._mountIndex=0,n._mountImage=null,n}var a=e(112),s=e(143),u=e(29),l=e(49),c=e(54),p=(e(121),e(137),e(142),function(e){this.construct(e)});s(p.prototype,u,{_instantiateReactComponent:i}),t.exports=i},{112:112,121:121,137:137,142:142,143:143,29:29,49:49,54:54}],109:[function(e,t,n){"use strict";function r(e,t){if(!i.canUseDOM||t&&!("addEventListener"in document))return!1;var n="on"+e,r=n in document;if(!r){var a=document.createElement("div");a.setAttribute(n,"return;"),r="function"==typeof a[n]}return!r&&o&&"wheel"===e&&(r=document.implementation.hasFeature("Events.wheel","3.0")),r}var o,i=e(123);i.canUseDOM&&(o=document.implementation&&document.implementation.hasFeature&&!0!==document.implementation.hasFeature("","")),t.exports=r},{123:123}],110:[function(e,t,n){"use strict";function r(e){var t=e&&e.nodeName&&e.nodeName.toLowerCase();return"input"===t?!!o[e.type]:"textarea"===t}var o={color:!0,date:!0,datetime:!0,"datetime-local":!0,email:!0,month:!0,number:!0,password:!0,range:!0,search:!0,tel:!0,text:!0,time:!0,url:!0,week:!0};t.exports=r},{}],111:[function(e,t,n){"use strict";function r(e){return'"'+o(e)+'"'}var o=e(95);t.exports=r},{95:95}],112:[function(e,t,n){"use strict";function r(e){for(var t=arguments.length-1,n="Minified React error #"+e+"; visit http://facebook.github.io/react/docs/error-decoder.html?invariant="+e,r=0;r]/,u=e(93),l=u(function(e,t){if(e.namespaceURI!==i.svg||"innerHTML"in e)e.innerHTML=t;else{r=r||document.createElement("div"),r.innerHTML=""+t+"";for(var n=r.firstChild;n.firstChild;)e.appendChild(n.firstChild)}});if(o.canUseDOM){var c=document.createElement("div");c.innerHTML=" ",""===c.innerHTML&&(l=function(e,t){if(e.parentNode&&e.parentNode.replaceChild(e,e),a.test(t)||"<"===t[0]&&s.test(t)){e.innerHTML=String.fromCharCode(65279)+t;var n=e.firstChild;1===n.data.length?e.removeChild(n):n.deleteData(0,1)}else e.innerHTML=t}),c=null}t.exports=l},{10:10,123:123,93:93}],115:[function(e,t,n){"use strict";var r=e(123),o=e(95),i=e(114),a=function(e,t){if(t){var n=e.firstChild;if(n&&n===e.lastChild&&3===n.nodeType)return void(n.nodeValue=t)}e.textContent=t};r.canUseDOM&&("textContent"in document.documentElement||(a=function(e,t){if(3===e.nodeType)return void(e.nodeValue=t);i(e,o(t))})),t.exports=a},{114:114,123:123,95:95}],116:[function(e,t,n){"use strict";function r(e,t){var n=null===e||!1===e,r=null===t||!1===t;if(n||r)return n===r;var o=typeof e,i=typeof t;return"string"===o||"number"===o?"string"===i||"number"===i:"object"===i&&e.type===t.type&&e.key===t.key}t.exports=r},{}],117:[function(e,t,n){"use strict";function r(e,t){return e&&"object"==typeof e&&null!=e.key?l.escape(e.key):t.toString(36)}function o(e,t,n,i){var d=typeof e;if("undefined"!==d&&"boolean"!==d||(e=null),null===e||"string"===d||"number"===d||"object"===d&&e.$$typeof===s)return n(i,e,""===t?c+r(e,0):t),1;var f,h,m=0,v=""===t?c:t+p;if(Array.isArray(e))for(var g=0;g":"<"+e+">",s[e]=!a.firstChild),s[e]?d[e]:null}var o=e(123),i=e(137),a=o.canUseDOM?document.createElement("div"):null,s={},u=[1,'"],l=[1,"","
"],c=[3,"","
"],p=[1,'',""],d={"*":[1,"?
","
"],area:[1,"",""],col:[2,"","
"],legend:[1,"
","
"],param:[1,"",""],tr:[2,"","
"],optgroup:u,option:u,caption:l,colgroup:l,tbody:l,tfoot:l,thead:l,td:c,th:c};["circle","clipPath","defs","ellipse","g","image","line","linearGradient","mask","path","pattern","polygon","polyline","radialGradient","rect","stop","text","tspan"].forEach(function(e){d[e]=p,s[e]=!0}),t.exports=r},{123:123,137:137}],134:[function(e,t,n){"use strict";function r(e){return e.Window&&e instanceof e.Window?{x:e.pageXOffset||e.document.documentElement.scrollLeft,y:e.pageYOffset||e.document.documentElement.scrollTop}:{x:e.scrollLeft,y:e.scrollTop}}t.exports=r},{}],135:[function(e,t,n){"use strict";function r(e){return e.replace(o,"-$1").toLowerCase()}var o=/([A-Z])/g;t.exports=r},{}],136:[function(e,t,n){"use strict";function r(e){return o(e).replace(i,"-ms-")}var o=e(135),i=/^ms-/;t.exports=r},{135:135}],137:[function(e,t,n){"use strict";function r(e,t,n,r,i,a,s,u){if(o(t),!e){var l;if(void 0===t)l=new Error("Minified exception occurred; use the non-minified dev environment for the full error message and additional helpful warnings.");else{var c=[n,r,i,a,s,u],p=0;l=new Error(t.replace(/%s/g,function(){return c[p++]})),l.name="Invariant Violation"}throw l.framesToPop=1,l}}var o=function(e){};t.exports=r},{}],138:[function(e,t,n){"use strict";function r(e){var t=e?e.ownerDocument||e:document,n=t.defaultView||window;return!(!e||!("function"==typeof n.Node?e instanceof n.Node:"object"==typeof e&&"number"==typeof e.nodeType&&"string"==typeof e.nodeName))}t.exports=r},{}],139:[function(e,t,n){"use strict";function r(e){return o(e)&&3==e.nodeType}var o=e(138);t.exports=r},{138:138}],140:[function(e,t,n){"use strict";function r(e){var t={};return function(n){return t.hasOwnProperty(n)||(t[n]=e.call(this,n)),t[n]}}t.exports=r},{}],141:[function(e,t,n){"use strict";function r(e,t){return e===t?0!==e||0!==t||1/e==1/t:e!==e&&t!==t}function o(e,t){if(r(e,t))return!0;if("object"!=typeof e||null===e||"object"!=typeof t||null===t)return!1;var n=Object.keys(e),o=Object.keys(t);if(n.length!==o.length)return!1;for(var a=0;a 0x10FFFF || // not a valid Unicode code point + floor(codePoint) != codePoint // not an integer + ) { + throw RangeError('Invalid code point: ' + codePoint); + } + if (codePoint <= 0xFFFF) { // BMP code point + codeUnits.push(codePoint); + } else { // Astral code point; split in surrogate halves + // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae + codePoint -= 0x10000; + highSurrogate = (codePoint >> 10) + 0xD800; + lowSurrogate = (codePoint % 0x400) + 0xDC00; + codeUnits.push(highSurrogate, lowSurrogate); + } + if (index + 1 == length || codeUnits.length > MAX_SIZE) { + result += stringFromCharCode.apply(null, codeUnits); + codeUnits.length = 0; + } + } + return result; + }; + if (defineProperty) { + defineProperty(String, 'fromCodePoint', { + 'value': fromCodePoint, + 'configurable': true, + 'writable': true + }); + } else { + String.fromCodePoint = fromCodePoint; + } + }()); +} + +/*! http://mths.be/codepointat v0.1.0 by @mathias */ +if (!String.prototype.codePointAt) { + (function() { + 'use strict'; // needed to support `apply`/`call` with `undefined`/`null` + var codePointAt = function(position) { + if (this == null) { + throw TypeError(); + } + var string = String(this); + var size = string.length; + // `ToInteger` + var index = position ? Number(position) : 0; + if (index != index) { // better `isNaN` + index = 0; + } + // Account for out-of-bounds indices: + if (index < 0 || index >= size) { + return undefined; + } + // Get the first code unit + var first = string.charCodeAt(index); + var second; + if ( // check if it’s the start of a surrogate pair + first >= 0xD800 && first <= 0xDBFF && // high surrogate + size > index + 1 // there is a next code unit + ) { + second = string.charCodeAt(index + 1); + if (second >= 0xDC00 && second <= 0xDFFF) { // low surrogate + // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae + return (first - 0xD800) * 0x400 + second - 0xDC00 + 0x10000; + } + } + return first; + }; + if (Object.defineProperty) { + Object.defineProperty(String.prototype, 'codePointAt', { + 'value': codePointAt, + 'configurable': true, + 'writable': true + }); + } else { + String.prototype.codePointAt = codePointAt; + } + }()); +} + +function registerAsciinemaPlayerElement() { + var AsciinemaPlayerProto = Object.create(HTMLElement.prototype); + + function merge() { + var merged = {}; + for (var i=0; i>>0),ma=0;function na(a,b,c){return a.call.apply(a.bind,arguments)} +function oa(a,b,c){if(!a)throw Error();if(2b?1:0};var ua=Array.prototype.indexOf?function(a,b,c){return Array.prototype.indexOf.call(a,b,c)}:function(a,b,c){c=null==c?0:0>c?Math.max(0,a.length+c):c;if(ca(a))return ca(b)&&1==b.length?a.indexOf(b,c):-1;for(;cb?null:ca(a)?a.charAt(b):a[b]}function ya(a,b){var c=ua(a,b),d;(d=0<=c)&&Array.prototype.splice.call(a,c,1);return d}function za(a,b){a.sort(b||Aa)}function Ca(a,b){for(var c=Array(a.length),d=0;db?1:a2*this.Fc&&Na(this),!0):!1};function Na(a){if(a.Fc!=a.ib.length){for(var b=0,c=0;ba){var b=Ra[a];if(b)return b}b=new Qa([a|0],0>a?-1:0);-128<=a&&128>a&&(Ra[a]=b);return b}function Ta(a){if(isNaN(a)||!isFinite(a))return Ua;if(0>a)return Ta(-a).kb();for(var b=[],c=1,d=0;a>=c;d++)b[d]=a/c|0,c*=Va;return new Qa(b,0)}var Va=4294967296,Ua=Sa(0),Wa=Sa(1),Xa=Sa(16777216);g=Qa.prototype; +g.Of=function(){return 0a||36>>0).toString(a);c=e;if(c.hc())return f+d;for(;6>f.length;)f="0"+f;d=""+f+d}};function Ya(a,b){return 0>b?0:bthis.compare(Xa)};g.Ve=function(a){return 0>=this.compare(a)};g.compare=function(a){a=this.ze(a);return a.Eb()?-1:a.hc()?0:1};g.kb=function(){return this.Hf().add(Wa)}; +g.add=function(a){for(var b=Math.max(this.Ma.length,a.Ma.length),c=[],d=0,e=0;e<=b;e++){var f=d+(Ya(this,e)&65535)+(Ya(a,e)&65535),h=(f>>>16)+(Ya(this,e)>>>16)+(Ya(a,e)>>>16);d=h>>>16;f&=65535;h&=65535;c[e]=h<<16|f}return new Qa(c,c[c.length-1]&-2147483648?-1:0)};g.ze=function(a){return this.add(a.kb())}; +g.multiply=function(a){if(this.hc()||a.hc())return Ua;if(this.Eb())return a.Eb()?this.kb().multiply(a.kb()):this.kb().multiply(a).kb();if(a.Eb())return this.multiply(a.kb()).kb();if(this.Ue()&&a.Ue())return Ta(this.vd()*a.vd());for(var b=this.Ma.length+a.Ma.length,c=[],d=0;d<2*b;d++)c[d]=0;for(d=0;d>>16,h=Ya(this,d)&65535,k=Ya(a,e)>>>16,l=Ya(a,e)&65535;c[2*d+2*e]+=h*l;ab(c,2*d+2*e);c[2*d+2*e+1]+=f*l;ab(c,2*d+2*e+1);c[2*d+2*e+1]+= +h*k;ab(c,2*d+2*e+1);c[2*d+2*e+2]+=f*k;ab(c,2*d+2*e+2)}for(d=0;d>>16,a[b]&=65535,b++} +function Za(a,b){if(b.hc())throw Error("division by zero");if(a.hc())return Ua;if(a.Eb())return b.Eb()?Za(a.kb(),b.kb()):Za(a.kb(),b).kb();if(b.Eb())return Za(a,b.kb()).kb();if(30=f?1:Math.pow(2,f-48);h=Ta(e);for(var k=h.multiply(b);k.Eb()||k.xf(d);)e-=f,h=Ta(e),k=h.multiply(b);h.hc()&&(h=Wa);c=c.add(h);d=d.ze(k)}return c}g.Hf=function(){for(var a=this.Ma.length,b=[],c=0;c>5;a%=32;for(var c=this.Ma.length+b+(0>>32-a:Ya(this,e-b);return new Qa(d,this.Lc)}; +g.ad=function(a){var b=a>>5;a%=32;for(var c=this.Ma.length-b,d=[],e=0;e>>a|Ya(this,e+b+1)<<32-a:Ya(this,e+b);return new Qa(d,this.Lc)};function cb(a,b){null!=a&&this.append.apply(this,arguments)}g=cb.prototype;g.xc="";g.set=function(a){this.xc=""+a};g.append=function(a,b,c){this.xc+=String(a);if(null!=b)for(var d=1;d>>16&65535)*d+c*(b>>>16&65535)<<16>>>0)|0};function hd(a){a=gd(a|0,-862048943);return gd(a<<15|a>>>-15,461845907)} +function id(a,b){var c=(a|0)^(b|0);return gd(c<<13|c>>>-13,5)+-430675100|0}function jd(a,b){var c=(a|0)^b;c=gd(c^c>>>16,-2048144789);c=gd(c^c>>>13,-1028477387);return c^c>>>16}function kd(a){a:{var b=1;for(var c=0;;)if(b>2)}function qd(a){return a instanceof rd} +function sd(a,b){if(a.Zb===b.Zb)return 0;var c=wb(a.fb);if(t(c?b.fb:c))return-1;if(t(a.fb)){if(wb(b.fb))return 1;c=Aa(a.fb,b.fb);return 0===c?Aa(a.name,b.name):c}return Aa(a.name,b.name)}function rd(a,b,c,d,e){this.fb=a;this.name=b;this.Zb=c;this.Oc=d;this.hb=e;this.m=2154168321;this.J=4096}g=rd.prototype;g.toString=function(){return this.Zb};g.equiv=function(a){return this.K(null,a)};g.K=function(a,b){return b instanceof rd?this.Zb===b.Zb:!1}; +g.call=function(){var a=null;a=function(a,c,d){switch(arguments.length){case 2:return D.c(c,this);case 3:return D.l(c,this,d)}throw Error("Invalid arity: "+(arguments.length-1));};a.c=function(a,c){return D.c(c,this)};a.l=function(a,c,d){return D.l(c,this,d)};return a}();g.apply=function(a,b){return this.call.apply(this,[this].concat(Gb(b)))};g.h=function(a){return D.c(a,this)};g.c=function(a,b){return D.l(a,this,b)};g.P=function(){return this.hb}; +g.T=function(a,b){return new rd(this.fb,this.name,this.Zb,this.Oc,b)};g.U=function(){var a=this.Oc;return null!=a?a:this.Oc=a=pd(kd(this.name),nd(this.fb))};g.hd=function(){return this.name};g.jd=function(){return this.fb};g.R=function(a,b){return Jc(b,this.Zb)};var td=function td(a){switch(arguments.length){case 1:return td.h(arguments[0]);case 2:return td.c(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",v.h(arguments.length)].join(""));}}; +td.h=function(a){if(a instanceof rd)return a;var b=a.indexOf("/");return 1>b?td.c(null,a):td.c(a.substring(0,b),a.substring(b+1,a.length))};td.c=function(a,b){var c=null!=a?[v.h(a),"/",v.h(b)].join(""):b;return new rd(a,b,c,null,null)};td.L=2;function ud(a){return null!=a?a.J&131072||q===a.Tf?!0:a.J?!1:Ab(cd,a):Ab(cd,a)} +function E(a){if(null==a)return null;if(null!=a&&(a.m&8388608||q===a.Pe))return a.S(null);if(vb(a)||"string"===typeof a)return 0===a.length?null:new Jb(a,0,null);if(Ab(Bc,a))return Cc(a);throw Error([v.h(a)," is not ISeqable"].join(""));}function y(a){if(null==a)return null;if(null!=a&&(a.m&64||q===a.G))return a.Ia(null);a=E(a);return null==a?null:Wb(a)}function vd(a){return null!=a?null!=a&&(a.m&64||q===a.G)?a.bb(null):(a=E(a))?Yb(a):wd:wd} +function z(a){return null==a?null:null!=a&&(a.m&128||q===a.Id)?a.Ka(null):E(vd(a))}var G=function G(a){switch(arguments.length){case 1:return G.h(arguments[0]);case 2:return G.c(arguments[0],arguments[1]);default:for(var c=[],d=arguments.length,e=0;;)if(e=d)return-1;!(0c&&(c+=d,c=0>c?0:c);for(;;)if(cc?d+c:c;for(;;)if(0<=c){if(G.c(Vd(a,c),b))return c;--c}else return-1}function Yd(a,b){this.o=a;this.i=b} +Yd.prototype.ja=function(){return this.ia?0:a};g.Rc=function(){var a=this.W(null);return 0d)c=1;else if(0===c)c=0;else a:for(d=0;;){var e=Ke(Vd(a,d),Vd(b,d));if(0===e&&d+1>1&1431655765;a=(a&858993459)+(a>>2&858993459);return 16843009*(a+(a>>4)&252645135)>>24} +var v=function v(a){switch(arguments.length){case 0:return v.B();case 1:return v.h(arguments[0]);default:for(var c=[],d=arguments.length,e=0;;)if(ed:e))c[d]=a.next(),d+=1;else return qf(new nf(c,0,d),Rf.h?Rf.h(a):Rf.call(null,a))}else return null},null,null)};function Sf(a,b,c,d,e,f){this.buffer=a;this.ub=b;this.pe=c;this.Rb=d;this.ye=e;this.Gf=f} +Sf.prototype.step=function(){if(this.ub!==Nf)return!0;for(;;)if(this.ub===Nf)if(this.buffer.Td()){if(this.pe)return!1;if(this.ye.ja()){if(this.Gf)var a=P(this.Rb,ae(null,this.ye.next()));else a=this.ye.next(),a=this.Rb.c?this.Rb.c(null,a):this.Rb.call(null,null,a);Hd(a)&&(this.Rb.h?this.Rb.h(null):this.Rb.call(null,null),this.pe=!0)}else this.Rb.h?this.Rb.h(null):this.Rb.call(null,null),this.pe=!0}else this.ub=this.buffer.remove();else return!0};Sf.prototype.ja=function(){return this.step()}; +Sf.prototype.next=function(){if(this.ja()){var a=this.ub;this.ub=Nf;return a}throw Error("No such element");};Sf.prototype.remove=function(){return Error("Unsupported operation")};Sf.prototype[Fb]=function(){return yd(this)}; +function Tf(a,b){var c=new Sf(Qf,Nf,!1,null,b,!1);c.Rb=function(){var b=function(a){return function(){function b(b,c){a.buffer=a.buffer.add(c);return b}var c=null;c=function(a,c){switch(arguments.length){case 0:return null;case 1:return a;case 2:return b.call(this,a,c)}throw Error("Invalid arity: "+(arguments.length-1));};c.B=function(){return null};c.h=function(a){return a};c.c=b;return c}()}(c);return a.h?a.h(b):a.call(null,b)}();return c} +function Uf(a,b){var c=Kf(b);c=Tf(a,c);c=Rf(c);return t(c)?c:wd}function Vf(a,b){for(;;){if(null==E(b))return!0;var c=y(b);c=a.h?a.h(c):a.call(null,c);if(t(c)){c=a;var d=z(b);a=c;b=d}else return!1}}function Wf(a,b){for(;;)if(E(b)){var c=y(b);c=a.h?a.h(c):a.call(null,c);if(t(c))return c;c=a;var d=z(b);a=c;b=d}else return null}function Xf(a){if(Ge(a))return 0===(a&1);throw Error(["Argument must be an integer: ",v.h(a)].join(""));} +function Yf(a){return function(){function b(b,c){return wb(a.c?a.c(b,c):a.call(null,b,c))}function c(b){return wb(a.h?a.h(b):a.call(null,b))}function d(){return wb(a.B?a.B():a.call(null))}var e=null,f=function(){function b(a,b,d){var e=null;if(2a?0:a-1>>>5<<5}function Jg(a,b,c){for(;;){if(0===b)return c;var d=Gg(a);d.o[0]=c;c=d;b-=5}} +var Kg=function Kg(a,b,c,d){var f=Hg(c),h=a.F-1>>>b&31;5===b?f.o[h]=d:(c=c.o[h],null!=c?(b-=5,a=Kg.M?Kg.M(a,b,c,d):Kg.call(null,a,b,c,d)):a=Jg(null,b-5,d),f.o[h]=a);return f};function Lg(a,b){throw Error(["No item ",v.h(a)," in vector of length ",v.h(b)].join(""));}function Mg(a,b){if(b>=Ig(a))return a.fa;for(var c=a.root,d=a.shift;;)if(0>>d&31];d=e}else return c.o} +var Ng=function Ng(a,b,c,d,e){var h=Hg(c);if(0===b)h.o[d&31]=e;else{var k=d>>>b&31;b-=5;c=c.o[k];a=Ng.Z?Ng.Z(a,b,c,d,e):Ng.call(null,a,b,c,d,e);h.o[k]=a}return h},Og=function Og(a,b,c){var e=a.F-2>>>b&31;if(5=this.F)a=new Jb(this.fa,0,null);else{a:{a=this.root;for(var b=this.shift;;)if(0this.F-Ig(this)){for(var c=this.fa.length,d=Array(c+1),e=0;;)if(e>>5>1<b)return new R(null,b,5,T,a,null);for(var c=32,d=(new R(null,32,5,T,a.slice(0,32),null)).Pc(null);;)if(cb||this.end<=this.start+b?Lg(b,this.end-this.start):A.c(this.Ja,this.start+b)};g.ka=function(a,b,c){return 0>b||this.end<=this.start+b?c:A.l(this.Ja,this.start+b,c)}; +g.dc=function(a,b,c){a=this.start+b;if(0>b||this.end+1<=a)throw Error(["Index ",v.h(b)," out of bounds [0,",v.h(this.W(null)),"]"].join(""));b=this.meta;c=K.l(this.Ja,a,c);var d=this.end;a+=1;return Zg(b,c,this.start,d>a?d:a,null)};g.ba=function(){return null!=this.Ja&&q===this.Ja.fe?Qg(this.Ja,this.start,this.end):new Jf(Hf,this)};g.P=function(){return this.meta};g.W=function(){return this.end-this.start};g.Ac=function(){return A.c(this.Ja,this.end-1)}; +g.Bc=function(){if(this.start===this.end)throw Error("Can't pop empty vector");return Zg(this.meta,this.Ja,this.start,this.end-1,null)};g.Rc=function(){return this.start!==this.end?new Zd(this,this.end-this.start-1,null):null};g.U=function(){var a=this.w;return null!=a?a:this.w=a=Ad(this)};g.K=function(a,b){return $d(this,b)};g.oa=function(){return tc(he,this.meta)};g.Fa=function(a,b){return null!=this.Ja&&q===this.Ja.fe?Rg(this.Ja,b,this.start,this.end):Kd(this,b)}; +g.Ga=function(a,b,c){return null!=this.Ja&&q===this.Ja.fe?Sg(this.Ja,b,c,this.start,this.end):Ld(this,b,c)};g.O=function(a,b,c){if("number"===typeof b)return this.dc(null,b,c);throw Error("Subvec's key for assoc must be a number.");};g.S=function(){var a=this;return function(b){return function e(d){return d===a.end?null:ae(A.c(a.Ja,d),new kf(null,function(){return function(){return e(d+1)}}(b),null,null))}}(this)(a.start)};g.T=function(a,b){return Zg(b,this.Ja,this.start,this.end,this.w)}; +g.X=function(a,b){return Zg(this.meta,qc(this.Ja,this.end,b),this.start,this.end+1,null)};g.call=function(){var a=null;a=function(a,c,d){switch(arguments.length){case 2:return this.$(null,c);case 3:return this.ka(null,c,d)}throw Error("Invalid arity: "+(arguments.length-1));};a.c=function(a,c){return this.$(null,c)};a.l=function(a,c,d){return this.ka(null,c,d)};return a}();g.apply=function(a,b){return this.call.apply(this,[this].concat(Gb(b)))};g.h=function(a){return this.$(null,a)}; +g.c=function(a,b){return this.ka(null,a,b)};Yg.prototype[Fb]=function(){return yd(this)};function Zg(a,b,c,d,e){for(;;)if(b instanceof Yg)c=b.start+c,d=b.start+d,b=b.Ja;else{if(!ze(b))throw Error("v must satisfy IVector");var f=H(b);if(0>c||0>d||c>f||d>f)throw Error("Index out of bounds");return new Yg(a,b,c,d,e)}}function $g(a,b){return a===b.la?b:new Fg(a,Gb(b.o))} +var ah=function ah(a,b,c,d){c=$g(a.root.la,c);var f=a.F-1>>>b&31;if(5===b)a=d;else{var h=c.o[f];null!=h?(b-=5,a=ah.M?ah.M(a,b,h,d):ah.call(null,a,b,h,d)):a=Jg(a.root.la,b-5,d)}c.o[f]=a;return c};function Tg(a,b,c,d){this.F=a;this.shift=b;this.root=c;this.fa=d;this.J=88;this.m=275}g=Tg.prototype; +g.Dc=function(a,b){if(this.root.la){if(32>this.F-Ig(this))this.fa[this.F&31]=b;else{var c=new Fg(this.root.la,this.fa),d=[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null];d[0]=b;this.fa=d;if(this.F>>>5>1<>>d&31,m=k(d-5,f.o[p]);f.o[p]=m}return f}}(a)(a.shift,a.root)}();a.root=d}return a}if(b===a.F)return a.Dc(null,c);throw Error(["Index ",v.h(b)," out of bounds for TransientVector of length",v.h(a.F)].join(""));}throw Error("assoc! after persistent!");} +g.W=function(){if(this.root.la)return this.F;throw Error("count after persistent!");};g.$=function(a,b){if(this.root.la)return(0<=b&&b=c)return new r(this.meta,this.F-1,d,null);G.c(b,this.o[e])||(d[f]=this.o[e],d[f+1]=this.o[e+1],f+=2);e+=2}}else return this}; +g.O=function(a,b,c){a=ih(this.o,b);if(-1===a){if(this.Fb?4:2*(b+1));Be(this.o,0,c,0,2*b);return new xh(a,this.na,c)};g.qd=function(){return yh(this.o,0,null)};g.Jc=function(a,b){return vh(this.o,a,b)};g.sc=function(a,b,c,d){var e=1<<(b>>>a&31);if(0===(this.na&e))return d;var f=$e(this.na&e-1);e=this.o[2*f];f=this.o[2*f+1];return null==e?f.sc(a+5,b,c,d):rh(c,e)?f:d}; +g.Kb=function(a,b,c,d,e,f){var h=1<<(c>>>b&31),k=$e(this.na&h-1);if(0===(this.na&h)){var l=$e(this.na);if(2*l>>b&31]=zh.Kb(a,b+5,c,d,e,f);for(e=d=0;;)if(32>d)0!== +(this.na>>>d&1)&&(k[d]=null!=this.o[e]?zh.Kb(a,b+5,od(this.o[e]),this.o[e],this.o[e+1],f):this.o[e+1],e+=2),d+=1;else break;return new Ah(a,l+1,k)}b=Array(2*(l+4));Be(this.o,0,b,0,2*k);b[2*k]=d;b[2*k+1]=e;Be(this.o,2*k,b,2*(k+1),2*(l-k));f.H=!0;a=this.Gc(a);a.o=b;a.na|=h;return a}l=this.o[2*k];h=this.o[2*k+1];if(null==l)return l=h.Kb(a,b+5,c,d,e,f),l===h?this:uh(this,a,2*k+1,l);if(rh(d,l))return e===h?this:uh(this,a,2*k+1,e);f.H=!0;f=b+5;b=od(l);if(b===c)e=new Bh(null,b,2,[l,h,d,e]);else{var p=new qh; +e=zh.Kb(a,f,b,l,h,p).Kb(a,f,c,d,e,p)}d=2*k;k=2*k+1;a=this.Gc(a);a.o[d]=null;a.o[k]=e;return a}; +g.Jb=function(a,b,c,d,e){var f=1<<(b>>>a&31),h=$e(this.na&f-1);if(0===(this.na&f)){var k=$e(this.na);if(16<=k){h=[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null];h[b>>>a&31]=zh.Jb(a+5,b,c,d,e);for(d=c=0;;)if(32>c)0!==(this.na>>>c&1)&&(h[c]=null!=this.o[d]?zh.Jb(a+5,od(this.o[d]),this.o[d],this.o[d+1],e):this.o[d+1],d+=2),c+=1;else break;return new Ah(null,k+1,h)}a=Array(2*(k+1));Be(this.o, +0,a,0,2*h);a[2*h]=c;a[2*h+1]=d;Be(this.o,2*h,a,2*(h+1),2*(k-h));e.H=!0;return new xh(null,this.na|f,a)}var l=this.o[2*h];f=this.o[2*h+1];if(null==l)return k=f.Jb(a+5,b,c,d,e),k===f?this:new xh(null,this.na,sh(this.o,2*h+1,k));if(rh(c,l))return d===f?this:new xh(null,this.na,sh(this.o,2*h+1,d));e.H=!0;e=this.na;k=this.o;a+=5;var p=od(l);if(p===b)c=new Bh(null,p,2,[l,f,c,d]);else{var m=new qh;c=zh.Jb(a,p,l,f,m).Jb(a,b,c,d,m)}a=2*h;h=2*h+1;d=Gb(k);d[a]=null;d[h]=c;return new xh(null,e,d)}; +g.rd=function(a,b,c){var d=1<<(b>>>a&31);if(0===(this.na&d))return this;var e=$e(this.na&d-1),f=this.o[2*e],h=this.o[2*e+1];return null==f?(a=h.rd(a+5,b,c),a===h?this:null!=a?new xh(null,this.na,sh(this.o,2*e+1,a)):this.na===d?null:new xh(null,this.na^d,th(this.o,e))):rh(c,f)?new xh(null,this.na^d,th(this.o,e)):this};g.ba=function(){return new wh(this.o,0,null,null)};var zh=new xh(null,0,[]);function Ch(a,b,c){this.o=a;this.i=b;this.Lb=c} +Ch.prototype.ja=function(){for(var a=this.o.length;;){if(null!=this.Lb&&this.Lb.ja())return!0;if(this.i>>a&31];return null!=e?e.sc(a+5,b,c,d):d};g.Kb=function(a,b,c,d,e,f){var h=c>>>b&31,k=this.o[h];if(null==k)return a=uh(this,a,h,zh.Kb(a,b+5,c,d,e,f)),a.F+=1,a;b=k.Kb(a,b+5,c,d,e,f);return b===k?this:uh(this,a,h,b)}; +g.Jb=function(a,b,c,d,e){var f=b>>>a&31,h=this.o[f];if(null==h)return new Ah(null,this.F+1,sh(this.o,f,zh.Jb(a+5,b,c,d,e)));a=h.Jb(a+5,b,c,d,e);return a===h?this:new Ah(null,this.F,sh(this.o,f,a))}; +g.rd=function(a,b,c){var d=b>>>a&31,e=this.o[d];if(null!=e){a=e.rd(a+5,b,c);if(a===e)d=this;else if(null==a)if(8>=this.F)a:{e=this.o;a=e.length;b=Array(2*(this.F-1));c=0;for(var f=1,h=0;;)if(ca?d:rh(c,this.o[a])?this.o[a+1]:d}; +g.Kb=function(a,b,c,d,e,f){if(c===this.ec){b=Eh(this.o,this.F,d);if(-1===b){if(this.o.length>2*this.F)return b=2*this.F,c=2*this.F+1,a=this.Gc(a),a.o[b]=d,a.o[c]=e,f.H=!0,a.F+=1,a;c=this.o.length;b=Array(c+2);Be(this.o,0,b,0,c);b[c]=d;b[c+1]=e;f.H=!0;d=this.F+1;a===this.la?(this.o=b,this.F=d,a=this):a=new Bh(this.la,this.ec,d,b);return a}return this.o[b+1]===e?this:uh(this,a,b+1,e)}return(new xh(a,1<<(this.ec>>>b&31),[null,this,null,null])).Kb(a,b,c,d,e,f)}; +g.Jb=function(a,b,c,d,e){return b===this.ec?(a=Eh(this.o,this.F,c),-1===a?(a=2*this.F,b=Array(a+2),Be(this.o,0,b,0,a),b[a]=c,b[a+1]=d,e.H=!0,new Bh(null,this.ec,this.F+1,b)):G.c(this.o[a+1],d)?this:new Bh(null,this.ec,this.F,sh(this.o,a+1,d))):(new xh(null,1<<(this.ec>>>a&31),[null,this])).Jb(a,b,c,d,e)};g.rd=function(a,b,c){a=Eh(this.o,this.F,c);return-1===a?this:1===this.F?null:new Bh(null,this.ec,this.F-1,th(this.o,Ze(a)))};g.ba=function(){return new wh(this.o,0,null,null)}; +function Fh(a,b,c,d,e){this.meta=a;this.Mb=b;this.i=c;this.s=d;this.w=e;this.m=32374988;this.J=0}g=Fh.prototype;g.toString=function(){return fd(this)};g.equiv=function(a){return this.K(null,a)};g.indexOf=function(){var a=null;a=function(a,c){switch(arguments.length){case 1:return Ud(this,a,0);case 2:return Ud(this,a,c)}throw Error("Invalid arity: "+(arguments.length-1));};a.h=function(a){return Ud(this,a,0)};a.c=function(a,c){return Ud(this,a,c)};return a}(); +g.lastIndexOf=function(){function a(a){return Xd(this,a,H(this))}var b=null;b=function(b,d){switch(arguments.length){case 1:return a.call(this,b);case 2:return Xd(this,b,d)}throw Error("Invalid arity: "+(arguments.length-1));};b.h=a;b.c=function(a,b){return Xd(this,a,b)};return b}();g.P=function(){return this.meta};g.Ka=function(){return null==this.s?yh(this.Mb,this.i+2,null):yh(this.Mb,this.i,z(this.s))};g.U=function(){var a=this.w;return null!=a?a:this.w=a=Ad(this)}; +g.K=function(a,b){return $d(this,b)};g.oa=function(){return tc(wd,this.meta)};g.Fa=function(a,b){return ce(b,this)};g.Ga=function(a,b,c){return de(b,c,this)};g.Ia=function(){return null==this.s?new R(null,2,5,T,[this.Mb[this.i],this.Mb[this.i+1]],null):y(this.s)};g.bb=function(){var a=null==this.s?yh(this.Mb,this.i+2,null):yh(this.Mb,this.i,z(this.s));return null!=a?a:wd};g.S=function(){return this};g.T=function(a,b){return new Fh(b,this.Mb,this.i,this.s,this.w)};g.X=function(a,b){return ae(b,this)}; +Fh.prototype[Fb]=function(){return yd(this)};function yh(a,b,c){if(null==c)for(c=a.length;;)if(bthis.F?H(z(this))+1:this.F};g.U=function(){var a=this.w;return null!=a?a:this.w=a=Ad(this)};g.K=function(a,b){return $d(this,b)};g.oa=function(){return tc(wd,this.meta)};g.Fa=function(a,b){return ce(b,this)};g.Ga=function(a,b,c){return de(b,c,this)};g.Ia=function(){var a=this.stack;return null==a?null:nc(a)};g.bb=function(){var a=y(this.stack);a=Mh(this.vc?a.right:a.left,z(this.stack),this.vc);return null!=a?new Nh(null,a,this.vc,this.F-1,null):wd};g.S=function(){return this}; +g.T=function(a,b){return new Nh(b,this.stack,this.vc,this.F,this.w)};g.X=function(a,b){return ae(b,this)};Nh.prototype[Fb]=function(){return yd(this)};function Oh(a,b,c){return new Nh(null,Mh(a,null,b),b,c,null)} +function Ph(a,b,c,d){return c instanceof Qh?c.left instanceof Qh?new Qh(c.key,c.H,c.left.bc(),new Rh(a,b,c.right,d,null),null):c.right instanceof Qh?new Qh(c.right.key,c.right.H,new Rh(c.key,c.H,c.left,c.right.left,null),new Rh(a,b,c.right.right,d,null),null):new Rh(a,b,c,d,null):new Rh(a,b,c,d,null)} +function Sh(a,b,c,d){return d instanceof Qh?d.right instanceof Qh?new Qh(d.key,d.H,new Rh(a,b,c,d.left,null),d.right.bc(),null):d.left instanceof Qh?new Qh(d.left.key,d.left.H,new Rh(a,b,c,d.left.left,null),new Rh(d.key,d.H,d.left.right,d.right,null),null):new Rh(a,b,c,d,null):new Rh(a,b,c,d,null)} +function Th(a,b,c,d){if(c instanceof Qh)return new Qh(a,b,c.bc(),d,null);if(d instanceof Rh)return Sh(a,b,c,d.ud());if(d instanceof Qh&&d.left instanceof Rh)return new Qh(d.left.key,d.left.H,new Rh(a,b,c,d.left.left,null),Sh(d.key,d.H,d.left.right,d.right.ud()),null);throw Error("red-black tree invariant violation");} +function Uh(a,b,c,d){if(d instanceof Qh)return new Qh(a,b,c,d.bc(),null);if(c instanceof Rh)return Ph(a,b,c.ud(),d);if(c instanceof Qh&&c.right instanceof Rh)return new Qh(c.right.key,c.right.H,Ph(c.key,c.H,c.left.ud(),c.right.left),new Rh(a,b,c.right.right,d,null),null);throw Error("red-black tree invariant violation");} +var Vh=function Vh(a,b,c){var e=null!=a.left?function(){var e=a.left;return Vh.l?Vh.l(e,b,c):Vh.call(null,e,b,c)}():c;if(Hd(e))return e;var f=function(){var c=a.key,f=a.H;return b.l?b.l(e,c,f):b.call(null,e,c,f)}();if(Hd(f))return f;if(null!=a.right){var h=a.right;return Vh.l?Vh.l(h,b,f):Vh.call(null,h,b,f)}return f};function Rh(a,b,c,d,e){this.key=a;this.H=b;this.left=c;this.right=d;this.w=e;this.m=32402207;this.J=0}g=Rh.prototype; +g.lastIndexOf=function(){function a(a){return Xd(this,a,H(this))}var b=null;b=function(b,d){switch(arguments.length){case 1:return a.call(this,b);case 2:return Xd(this,b,d)}throw Error("Invalid arity: "+(arguments.length-1));};b.h=a;b.c=function(a,b){return Xd(this,a,b)};return b}(); +g.indexOf=function(){var a=null;a=function(a,c){switch(arguments.length){case 1:return Ud(this,a,0);case 2:return Ud(this,a,c)}throw Error("Invalid arity: "+(arguments.length-1));};a.h=function(a){return Ud(this,a,0)};a.c=function(a,c){return Ud(this,a,c)};return a}();g.Ee=function(a){return a.He(this)};g.ud=function(){return new Qh(this.key,this.H,this.left,this.right,null)};g.bc=function(){return this};g.De=function(a){return a.Ge(this)};g.replace=function(a,b,c,d){return new Rh(a,b,c,d,null)}; +g.Ge=function(a){return new Rh(a.key,a.H,this,a.right,null)};g.He=function(a){return new Rh(a.key,a.H,a.left,this,null)};g.Jc=function(a,b){return Vh(this,a,b)};g.V=function(a,b){return this.ka(null,b,null)};g.I=function(a,b,c){return this.ka(null,b,c)};g.$=function(a,b){if(0===b)return this.key;if(1===b)return this.H;throw Error("Index out of bounds");};g.ka=function(a,b,c){return 0===b?this.key:1===b?this.H:c};g.dc=function(a,b,c){return(new R(null,2,5,T,[this.key,this.H],null)).dc(null,b,c)}; +g.P=function(){return null};g.W=function(){return 2};g.fd=function(){return this.key};g.gd=function(){return this.H};g.Ac=function(){return this.H};g.Bc=function(){return new R(null,1,5,T,[this.key],null)};g.U=function(){var a=this.w;return null!=a?a:this.w=a=Ad(this)};g.K=function(a,b){return $d(this,b)};g.oa=function(){return he};g.Fa=function(a,b){return Kd(this,b)};g.Ga=function(a,b,c){return Ld(this,b,c)};g.O=function(a,b,c){return K.l(new R(null,2,5,T,[this.key,this.H],null),b,c)}; +g.yc=function(a,b){return 0===b||1===b};g.S=function(){var a=this.key;return Tb(Tb(wd,this.H),a)};g.T=function(a,b){return tc(new R(null,2,5,T,[this.key,this.H],null),b)};g.X=function(a,b){return new R(null,3,5,T,[this.key,this.H,b],null)}; +g.call=function(){var a=null;a=function(a,c,d){switch(arguments.length){case 2:return this.$(null,c);case 3:return this.ka(null,c,d)}throw Error("Invalid arity: "+(arguments.length-1));};a.c=function(a,c){return this.$(null,c)};a.l=function(a,c,d){return this.ka(null,c,d)};return a}();g.apply=function(a,b){return this.call.apply(this,[this].concat(Gb(b)))};g.h=function(a){return this.$(null,a)};g.c=function(a,b){return this.ka(null,a,b)};Rh.prototype[Fb]=function(){return yd(this)}; +function Qh(a,b,c,d,e){this.key=a;this.H=b;this.left=c;this.right=d;this.w=e;this.m=32402207;this.J=0}g=Qh.prototype;g.lastIndexOf=function(){function a(a){return Xd(this,a,H(this))}var b=null;b=function(b,d){switch(arguments.length){case 1:return a.call(this,b);case 2:return Xd(this,b,d)}throw Error("Invalid arity: "+(arguments.length-1));};b.h=a;b.c=function(a,b){return Xd(this,a,b)};return b}(); +g.indexOf=function(){var a=null;a=function(a,c){switch(arguments.length){case 1:return Ud(this,a,0);case 2:return Ud(this,a,c)}throw Error("Invalid arity: "+(arguments.length-1));};a.h=function(a){return Ud(this,a,0)};a.c=function(a,c){return Ud(this,a,c)};return a}();g.Ee=function(a){return new Qh(this.key,this.H,this.left,a,null)};g.ud=function(){throw Error("red-black tree invariant violation");};g.bc=function(){return new Rh(this.key,this.H,this.left,this.right,null)}; +g.De=function(a){return new Qh(this.key,this.H,a,this.right,null)};g.replace=function(a,b,c,d){return new Qh(a,b,c,d,null)};g.Ge=function(a){return this.left instanceof Qh?new Qh(this.key,this.H,this.left.bc(),new Rh(a.key,a.H,this.right,a.right,null),null):this.right instanceof Qh?new Qh(this.right.key,this.right.H,new Rh(this.key,this.H,this.left,this.right.left,null),new Rh(a.key,a.H,this.right.right,a.right,null),null):new Rh(a.key,a.H,this,a.right,null)}; +g.He=function(a){return this.right instanceof Qh?new Qh(this.key,this.H,new Rh(a.key,a.H,a.left,this.left,null),this.right.bc(),null):this.left instanceof Qh?new Qh(this.left.key,this.left.H,new Rh(a.key,a.H,a.left,this.left.left,null),new Rh(this.key,this.H,this.left.right,this.right,null),null):new Rh(a.key,a.H,a.left,this,null)};g.Jc=function(a,b){return Vh(this,a,b)};g.V=function(a,b){return this.ka(null,b,null)};g.I=function(a,b,c){return this.ka(null,b,c)}; +g.$=function(a,b){if(0===b)return this.key;if(1===b)return this.H;throw Error("Index out of bounds");};g.ka=function(a,b,c){return 0===b?this.key:1===b?this.H:c};g.dc=function(a,b,c){return(new R(null,2,5,T,[this.key,this.H],null)).dc(null,b,c)};g.P=function(){return null};g.W=function(){return 2};g.fd=function(){return this.key};g.gd=function(){return this.H};g.Ac=function(){return this.H};g.Bc=function(){return new R(null,1,5,T,[this.key],null)}; +g.U=function(){var a=this.w;return null!=a?a:this.w=a=Ad(this)};g.K=function(a,b){return $d(this,b)};g.oa=function(){return he};g.Fa=function(a,b){return Kd(this,b)};g.Ga=function(a,b,c){return Ld(this,b,c)};g.O=function(a,b,c){return K.l(new R(null,2,5,T,[this.key,this.H],null),b,c)};g.yc=function(a,b){return 0===b||1===b};g.S=function(){var a=this.key;return Tb(Tb(wd,this.H),a)};g.T=function(a,b){return tc(new R(null,2,5,T,[this.key,this.H],null),b)}; +g.X=function(a,b){return new R(null,3,5,T,[this.key,this.H,b],null)};g.call=function(){var a=null;a=function(a,c,d){switch(arguments.length){case 2:return this.$(null,c);case 3:return this.ka(null,c,d)}throw Error("Invalid arity: "+(arguments.length-1));};a.c=function(a,c){return this.$(null,c)};a.l=function(a,c,d){return this.ka(null,c,d)};return a}();g.apply=function(a,b){return this.call.apply(this,[this].concat(Gb(b)))};g.h=function(a){return this.$(null,a)}; +g.c=function(a,b){return this.ka(null,a,b)};Qh.prototype[Fb]=function(){return yd(this)}; +var Wh=function Wh(a,b,c,d,e){if(null==b)return new Qh(c,d,null,null,null);var h=function(){var d=b.key;return a.c?a.c(c,d):a.call(null,c,d)}();if(0===h)return e[0]=b,null;if(0>h)return h=function(){var h=b.left;return Wh.Z?Wh.Z(a,h,c,d,e):Wh.call(null,a,h,c,d,e)}(),null!=h?b.De(h):null;h=function(){var h=b.right;return Wh.Z?Wh.Z(a,h,c,d,e):Wh.call(null,a,h,c,d,e)}();return null!=h?b.Ee(h):null},Xh=function Xh(a,b){if(null==a)return b;if(null==b)return a;if(a instanceof Qh){if(b instanceof Qh){var d= +function(){var d=a.right,f=b.left;return Xh.c?Xh.c(d,f):Xh.call(null,d,f)}();return d instanceof Qh?new Qh(d.key,d.H,new Qh(a.key,a.H,a.left,d.left,null),new Qh(b.key,b.H,d.right,b.right,null),null):new Qh(a.key,a.H,a.left,new Qh(b.key,b.H,d,b.right,null),null)}return new Qh(a.key,a.H,a.left,function(){var d=a.right;return Xh.c?Xh.c(d,b):Xh.call(null,d,b)}(),null)}if(b instanceof Qh)return new Qh(b.key,b.H,function(){var d=b.left;return Xh.c?Xh.c(a,d):Xh.call(null,a,d)}(),b.right,null);d=function(){var d= +a.right,f=b.left;return Xh.c?Xh.c(d,f):Xh.call(null,d,f)}();return d instanceof Qh?new Qh(d.key,d.H,new Rh(a.key,a.H,a.left,d.left,null),new Rh(b.key,b.H,d.right,b.right,null),null):Th(a.key,a.H,a.left,new Rh(b.key,b.H,d,b.right,null))},Yh=function Yh(a,b,c,d){if(null!=b){var f=function(){var d=b.key;return a.c?a.c(c,d):a.call(null,c,d)}();if(0===f)return d[0]=b,Xh(b.left,b.right);if(0>f)return f=function(){var f=b.left;return Yh.M?Yh.M(a,f,c,d):Yh.call(null,a,f,c,d)}(),null!=f||null!=d[0]?b.left instanceof +Rh?Th(b.key,b.H,f,b.right):new Qh(b.key,b.H,f,b.right,null):null;f=function(){var f=b.right;return Yh.M?Yh.M(a,f,c,d):Yh.call(null,a,f,c,d)}();return null!=f||null!=d[0]?b.right instanceof Rh?Uh(b.key,b.H,b.left,f):new Qh(b.key,b.H,b.left,f,null):null}return null},Zh=function Zh(a,b,c,d){var f=b.key,h=a.c?a.c(c,f):a.call(null,c,f);return 0===h?b.replace(f,d,b.left,b.right):0>h?b.replace(f,b.H,function(){var f=b.left;return Zh.M?Zh.M(a,f,c,d):Zh.call(null,a,f,c,d)}(),b.right):b.replace(f,b.H,b.left, +function(){var f=b.right;return Zh.M?Zh.M(a,f,c,d):Zh.call(null,a,f,c,d)}())};function $h(a,b,c,d,e){this.Bb=a;this.mc=b;this.F=c;this.meta=d;this.w=e;this.m=418776847;this.J=8192}g=$h.prototype;g.forEach=function(a){for(var b=E(this),c=null,d=0,e=0;;)if(ed?c.left:c.right}else return null}g.has=function(a){return He(this,a)};g.V=function(a,b){return this.I(null,b,null)}; +g.I=function(a,b,c){a=ai(this,b);return null!=a?a.H:c};g.Qc=function(a,b,c){return null!=this.mc?Jd(Vh(this.mc,b,c)):c};g.P=function(){return this.meta};g.W=function(){return this.F};g.Rc=function(){return 0(a.h?a.h(c):a.call(null,c))?b:c};Ai.A=function(a,b,c,d){return Mb(function(b,c){return Ai.l(a,b,c)},Ai.l(a,b,c),d)};Ai.N=function(a){var b=y(a),c=z(a);a=y(c);var d=z(c);c=y(d);d=z(d);return Ai.A(b,a,c,d)};Ai.L=3;function Bi(a,b){return new kf(null,function(){var c=E(b);if(c){var d=y(c);d=a.h?a.h(d):a.call(null,d);c=t(d)?ae(y(c),Bi(a,vd(c))):null}else c=null;return c},null,null)}function Di(a,b,c){this.i=a;this.end=b;this.step=c} +Di.prototype.ja=function(){return 0this.end};Di.prototype.next=function(){var a=this.i;this.i+=this.step;return a};function Ei(a,b,c,d,e){this.meta=a;this.start=b;this.end=c;this.step=d;this.w=e;this.m=32375006;this.J=139264}g=Ei.prototype;g.toString=function(){return fd(this)};g.equiv=function(a){return this.K(null,a)}; +g.indexOf=function(){var a=null;a=function(a,c){switch(arguments.length){case 1:return Ud(this,a,0);case 2:return Ud(this,a,c)}throw Error("Invalid arity: "+(arguments.length-1));};a.h=function(a){return Ud(this,a,0)};a.c=function(a,c){return Ud(this,a,c)};return a}(); +g.lastIndexOf=function(){function a(a){return Xd(this,a,H(this))}var b=null;b=function(b,d){switch(arguments.length){case 1:return a.call(this,b);case 2:return Xd(this,b,d)}throw Error("Invalid arity: "+(arguments.length-1));};b.h=a;b.c=function(a,b){return Xd(this,a,b)};return b}();g.$=function(a,b){if(0<=b&&bthis.end&&0===this.step)return this.start;throw Error("Index out of bounds");}; +g.ka=function(a,b,c){return 0<=b&&bthis.end&&0===this.step?this.start:c};g.ba=function(){return new Di(this.start,this.end,this.step)};g.P=function(){return this.meta};g.Ka=function(){return 0this.end?new Ei(this.meta,this.start+this.step,this.end,this.step,null):null}; +g.W=function(){return wb(this.S(null))?0:Math.ceil((this.end-this.start)/this.step)};g.U=function(){var a=this.w;return null!=a?a:this.w=a=Ad(this)};g.K=function(a,b){return $d(this,b)};g.oa=function(){return tc(wd,this.meta)};g.Fa=function(a,b){return Kd(this,b)};g.Ga=function(a,b,c){for(a=this.start;;)if(0this.end){c=b.c?b.c(c,a):b.call(null,c,a);if(Hd(c))return B(c);a+=this.step}else return c};g.Ia=function(){return null==this.S(null)?null:this.start}; +g.bb=function(){return null!=this.S(null)?new Ei(this.meta,this.start+this.step,this.end,this.step,null):wd};g.S=function(){return 0this.step?this.start>this.end?this:null:this.start===this.end?null:this};g.T=function(a,b){return new Ei(b,this.start,this.end,this.step,this.w)};g.X=function(a,b){return ae(b,this)};Ei.prototype[Fb]=function(){return yd(this)};function Fi(a,b,c){return new Ei(null,a,b,c,null)} +function Gi(a,b){return new R(null,2,5,T,[Bi(a,b),ng(a,b)],null)} +function Hi(a){var b=y;return function(){function c(c,d,e){return new R(null,2,5,T,[b.l?b.l(c,d,e):b.call(null,c,d,e),a.l?a.l(c,d,e):a.call(null,c,d,e)],null)}function d(c,d){return new R(null,2,5,T,[b.c?b.c(c,d):b.call(null,c,d),a.c?a.c(c,d):a.call(null,c,d)],null)}function e(c){return new R(null,2,5,T,[b.h?b.h(c):b.call(null,c),a.h?a.h(c):a.call(null,c)],null)}function f(){return new R(null,2,5,T,[b.B?b.B():b.call(null),a.B?a.B():a.call(null)],null)}var h=null,k=function(){function c(a,b,c,e){var f= +null;if(3lb)return Jc(a,"#");Jc(a,c);if(0===tb.h(f))E(h)&&Jc(a,function(){var a=Ki.h(f);return t(a)?a:"..."}());else{if(E(h)){var l=y(h);b.l?b.l(l,a,f):b.call(null,l,a,f)}for(var p=z(h),m=tb.h(f)-1;;)if(!p||null!=m&&0===m){E(p)&&0===m&&(Jc(a,d),Jc(a,function(){var a=Ki.h(f);return t(a)?a:"..."}()));break}else{Jc(a,d);var u=y(p);c=a;h=f;b.l?b.l(u,c,h):b.call(null,u,c,h);var w=z(p);c=m-1;p=w;m=c}}return Jc(a,e)}finally{lb=k}} +function Li(a,b){for(var c=E(b),d=null,e=0,f=0;;)if(fH(a)?a.toUpperCase():[v.h(a.substring(0,1).toUpperCase()),v.h(a.substring(1))].join("")} +function Qo(a){if("string"===typeof a)return a;a=jf(a);var b=Fo(a,/-/),c=E(b);b=y(c);c=z(c);return t(Oo.h?Oo.h(b):Oo.call(null,b))?a:Kb(v,b,ig.c(Po,c))}function Ro(a){var b=function(){var b=function(){var b=me(a);return b?(b=a.displayName,t(b)?b:a.name):b}();if(t(b))return b;b=function(){var b=null!=a?a.J&4096||q===a.Oe?!0:!1:!1;return b?jf(a):b}();if(t(b))return b;b=qe(a);return xe(b)?Tk.h(b):null}();return Do(""+v.h(b),"$",".")}var So=!1;if("undefined"===typeof To)var To=0;function Uo(a){return setTimeout(a,16)}var Vo="undefined"===typeof window||null==window.document?Uo:function(){var a=window,b=a.requestAnimationFrame;if(t(b))return b;b=a.webkitRequestAnimationFrame;if(t(b))return b;b=a.mozRequestAnimationFrame;if(t(b))return b;a=a.msRequestAnimationFrame;return t(a)?a:Uo}();function Wo(a,b){return a.cljsMountOrder-b.cljsMountOrder}if("undefined"===typeof Xo)var Xo=function(){return null};function Yo(a){this.Yd=a} +function Zo(a,b){var c=a[b];if(null==c)return null;a[b]=null;for(var d=c.length,e=0;;)if(e=d&&a.push(gq(c));return a}}(e),[b,c],a))}};if("undefined"===typeof jq)var jq=null;function kq(){if(null!=jq)return jq;if("undefined"!==typeof ReactDOM)return jq=ReactDOM;if("undefined"!==typeof require){var a=jq=require("react-dom");if(t(a))return a;throw Error("require('react-dom') failed");}throw Error("js/ReactDOM is missing");}if("undefined"===typeof lq)var lq=dg.h(Ef); +function mq(a,b,c){var d=So;So=!0;try{return kq().render(a.B?a.B():a.call(null),b,function(){return function(){var d=So;So=!1;try{return gg.M(lq,K,b,new R(null,2,5,T,[a,b],null)),Zo(bp,"afterRender"),null!=c?c.B?c.B():c.call(null):null}finally{So=d}}}(d))}finally{So=d}}function nq(a,b){return mq(a,b,null)}function oq(a,b,c){qp();return mq(function(){return gq(me(a)?a.B?a.B():a.call(null):a)},b,c)}Wp=function(a){return kq().findDOMNode(a)};function pq(a){switch(arguments.length){case 2:return oq(arguments[0],arguments[1],null);case 3:return oq(arguments[0],arguments[1],arguments[2]);default:throw Error(["Invalid arity: ",v.h(arguments.length)].join(""));}}function qq(a,b){return oq(a,b,null)} +da("reagent.core.force_update_all",function(){qp();qp();for(var a=E(mh(B(lq))),b=null,c=0,d=0;;)if(d=Number(c)?a:a=-1Number(a)?"-":0<=b.indexOf("+")?"+":0<=b.indexOf(" ")?" ":"";0<=Number(a)&&(d=f+d);if(isNaN(c)||d.length>=Number(c))return d;d=isNaN(e)?Math.abs(Number(a)).toString():Math.abs(Number(a)).toFixed(e);a=Number(c)-d.length-f.length;0<=b.indexOf("-",0)?d=f+d+sa(" ",a):(b=0<=b.indexOf("0",0)?"0":" ",d=f+sa(b,a)+d);return d};yq.fc.d=function(a,b,c,d,e,f,h,k){return yq.fc.f(parseInt(a,10),b,c,d,0,f,h,k)}; +yq.fc.i=yq.fc.d;yq.fc.u=yq.fc.d;function zq(a){var b=be([Vk,null]);return wg.c(t(a)?a:Ef,function(){return function e(a){return new kf(null,function(){for(var b=a;;)if(b=E(b)){if(Ae(b)){var d=Wc(b),k=H(d),l=of(k);a:for(var p=0;;)if(p=H(h)&&Vf(function(){return function(a){return!(a instanceof Xq)}}(b,c,d,e,f,h),h)))throw Error(Bq("%s is not a valid sequence schema; %s%s%s",be([a,"a valid sequence schema consists of zero or more `one` elements, ","followed by zero or more `optional` elements, followed by an optional ", +"schema that will match the remaining elements."])));return new R(null,2,5,T,[O.c(c,f),y(h)],null)} +R.prototype.xb=function(){var a=this,b=Zq(a),c=J(b,0,null),d=J(b,1,null);return Wg(O.c(function(){return function(a,b,c,d){return function m(e){return new kf(null,function(){return function(){for(;;){var a=E(e);if(a){if(Ae(a)){var b=Wc(a),c=H(b),d=of(c);return function(){for(var a=0;;)if(ac?f:c;return $r(a,ea?0:a}():function(){var a=e-b;return f>a?f:a}())} +function gs(a,b){var c=null!=a&&(a.m&64||q===a.G)?P(U,a):a,d=D.c(c,pl);d=null!=d&&(d.m&64||q===d.G)?P(U,d):d;var e=D.c(d,Aj),f=D.c(c,Yj),h=D.c(c,no);return $r(c,e>f?function(){var a=h-1,c=e+b;return a=a}}(l,p,a,c,c,d,e,f,h,k),h),l,p);return Zr(c,d)} +function it(a,b){var c=null!=a&&(a.m&64||q===a.G)?P(U,a):a,d=D.c(c,pl),e=null!=d&&(d.m&64||q===d.G)?P(U,d):d,f=D.c(e,zn),h=D.c(c,tk),k=D.c(c,fl),l=b-1;d=J(cf(Bi(function(a,b,c,d,e,f,h){return function(a){return h>a}}(l,a,c,c,d,e,f,h,k),h)),l,0);return Zr(c,d)}function jt(a){return K.l(a,im,Ve)}function kt(a){return K.l(a,im,Hr)}function lt(a,b,c){return K.l(a,b,c)}function mt(a,b,c){return Wg(O.A(jg(b,a),new R(null,1,5,T,[c],null),be([jg(H(a)-b-1,kg(b,a))])))} +function nt(a,b){var c=null!=a&&(a.m&64||q===a.G)?P(U,a):a,d=D.c(c,pl),e=null!=d&&(d.m&64||q===d.G)?P(U,d):d;d=D.c(e,zn);e=D.c(e,Aj);var f=D.c(c,fl);D.c(c,no);var h=D.c(c,Oj),k=D.c(c,Rj),l=D.c(c,$l),p=D.c(c,im);p=95b?p.h?p.h(b):p.call(null,b):b;h=tr(p,h);return G.c(f,d+1)?t(k)?K.l(Yr(zg(c,new R(null,3,5,T,[il,e,d],null),h),d+1),vk,!0):zg(c,new R(null,3,5,T,[il,e,d],null),h):Yr(Ag.Z(c,new R(null,2,5,T,[il,e],null),t(l)?mt:lt,d,h),d+1)} +function ot(a,b){var c=null!=a&&(a.m&64||q===a.G)?P(U,a):a,d=D.c(c,Rj),e=D.c(c,vk);t(t(d)?e:d)&&(c=null!=c&&(c.m&64||q===c.G)?P(U,c):c,d=D.c(c,pl),d=null!=d&&(d.m&64||q===d.G)?P(U,d):d,d=D.c(d,Aj),e=D.c(c,no),c=Yr(c,0),c=G.c(e,d+1)?Tr.h(c):$r(c,d+1));return c=nt(c,b)}function pt(a){a=null!=a&&(a.m&64||q===a.G)?P(U,a):a;var b=D.c(a,fl),c=D.c(a,no);return K.l(a,il,Wg(qg(c,Wg(qg(b,new R(null,2,5,T,[69,Ef],null))))))} +function qt(a){a=null!=a&&(a.m&64||q===a.G)?P(U,a):a;var b=D.c(a,pl);b=null!=b&&(b.m&64||q===b.G)?P(U,b):b;b=D.c(b,Aj);var c=D.c(a,fl),d=D.c(a,Oj);return zg(a,new R(null,2,5,T,[il,b],null),gr.c(c,d))}function rt(a,b,c){return Wg(O.c(jg(b,a),qg(H(a)-b,vr(c))))}function st(a,b,c){return Wg(O.c(qg(b+1,vr(c)),kg(b+1,a)))} +function tt(a){a=null!=a&&(a.m&64||q===a.G)?P(U,a):a;var b=D.c(a,pl),c=null!=b&&(b.m&64||q===b.G)?P(U,b):b;b=D.c(c,zn);c=D.c(c,Aj);var d=D.c(a,fl),e=D.c(a,Oj);--d;return Ag.Z(a,new R(null,2,5,T,[il,c],null),rt,b=k?Zr(c,k-1):c,m=Mb(D,p,new R(null,2,5,T,[pl,zn],null));return Ag.l(p,new R(null,2,5,T,[il,h],null),function(a,b,c,d,e,f,h,k,m,l,p,Q){return function(a){return Wg(O.A(jg(b,a),kg(b+c,a),be([qg(c,vr(Q))])))}}(p,m,function(){var a=k-m;return b=a}}(c,b)(b)}()))return Gu(a,b+64);throw Jt;}catch(h){if(h instanceof Error){var d=h;if(d===Jt)try{if(55===b)return Bg(a,V,ms);throw Jt;}catch(k){if(k instanceof Error){var e=k;if(e===Jt)try{if(56===b)return Bg(a,V,ns);throw Jt;}catch(l){if(l instanceof Error){var f=l;if(f===Jt)try{if(99===b)return du(a); +throw Jt;}catch(p){if(p instanceof Error){d=p;if(d===Jt)throw Jt;throw d;}throw p;}else throw f;}else throw l;}else throw e;}else throw k;}else throw d;}else throw h;}else throw Jt;}catch(h){if(h instanceof Error)if(d=h,d===Jt)try{if(35===c)try{if(56===b)return Bg(a,V,pt);throw Jt;}catch(k){if(k instanceof Error){e=k;if(e===Jt)throw Jt;throw e;}throw k;}else throw Jt;}catch(k){if(k instanceof Error)if(e=k,e===Jt)try{if(40===c)try{if(48===b)return Zt(a);throw Jt;}catch(l){if(l instanceof Error){f= +l;if(f===Jt)return $t(a);throw f;}throw l;}else throw Jt;}catch(l){if(l instanceof Error){f=l;if(f===Jt)return a;throw f;}throw l;}else throw e;else throw k;}else throw d;else throw h;}},function(a){return a},function(a){return a},Gu,function(a,b){return Cg(a,V,ot,b)},function(a,b){var c=function(){switch(b){case 64:return eu;case 65:return fu;case 66:return gu;case 67:return hu;case 68:return iu;case 69:return ju;case 70:return ku;case 71:return lu;case 72:return mu;case 73:return nu;case 74:return ou; +case 75:return pu;case 76:return su;case 77:return tu;case 80:return uu;case 83:return qu;case 84:return ru;case 87:return vu;case 88:return wu;case 90:return xu;case 96:return lu;case 97:return hu;case 100:return Du;case 101:return fu;case 102:return mu;case 103:return yu;case 104:return zu;case 108:return Au;case 109:return Cu;case 112:return Eu;case 114:return Fu;default:return null}}();return t(c)?c.h?c.h(a):c.call(null,a):a},function(a){return a},function(a,b){return K.l(a,kk,ge.c(kk.h(a),b))}, +function(a){return a},function(a,b){return K.l(a,rk,ge.c(rk.h(a),b))},function(a){return a},function(a){return a},function(a){return K.A(a,rk,he,be([kk,he]))}]);function Iu(a,b){for(var c=a,d=Tl.h(c),e=b;;){var f=y(e);if(t(f)){var h=160<=f?65:f;h=D.c(d.h?d.h(xq):d.call(null,xq),h);d=J(h,0,null);h=J(h,1,null);a:for(;;)if(E(h)){var k=y(h);k=Hu.h?Hu.h(k):Hu.call(null,k);c=k.c?k.c(c,f):k.call(null,c,f);h=z(h)}else break a;e=vd(e)}else return K.l(c,Tl,d)}} +function Ju(a,b){var c=xg(function(a){return a.codePointAt(0)},b);return Iu(a,c)} +function Ku(a,b){try{if(ze(b)&&3===H(b)){var c=Vd(b,0),d=Vd(b,1),e=Vd(b,2);return[v.h(a+8),";2;",v.h(c),";",v.h(d),";",v.h(e)].join("")}throw Jt;}catch(k){if(k instanceof Error){var f=k;if(f===Jt)try{if(t(function(){return function(){return function(a){return 8>a}}(f)(b)}()))return""+v.h(a+b);throw Jt;}catch(l){if(l instanceof Error){var h=l;if(h===Jt)try{if(t(function(){return function(){return function(a){return 16>a}}(h,f)(b)}()))return""+v.h(a+52+b);throw Jt;}catch(p){if(p instanceof Error){c= +p;if(c===Jt)return[v.h(a+8),";5;",v.h(b)].join("");throw c;}throw p;}else throw h;}else throw l;}else throw f;}else throw k;}}ag.c(Ku,30);ag.c(Ku,40);var Lu=function Lu(a){if(null!=a&&null!=a.yd)return a.yd(a);var c=Lu[n(null==a?null:a)];if(null!=c)return c.h?c.h(a):c.call(null,a);c=Lu._;if(null!=c)return c.h?c.h(a):c.call(null,a);throw Cb("Screen.lines",a);},Mu=function Mu(a){if(null!=a&&null!=a.xd)return a.xd(a);var c=Mu[n(null==a?null:a)];if(null!=c)return c.h?c.h(a):c.call(null,a);c=Mu._;if(null!=c)return c.h?c.h(a):c.call(null,a);throw Cb("Screen.cursor",a);};function Nu(a,b){var c=0parseFloat(Iv)){Hv=String(Kv);break a}}Hv=Iv}var gb={}; +function Lv(a){return fb(a,function(){for(var b=0,c=ra(String(Hv)).split("."),d=ra(String(a)).split("."),e=Math.max(c.length,d.length),f=0;0==b&&f=a.keyCode)a.keyCode=-1}catch(b){}};var Uv="closure_listenable_"+(1E6*Math.random()|0),Vv=0;function Wv(a,b,c,d,e){this.listener=a;this.Xd=null;this.src=b;this.type=c;this.capture=!!d;this.Ub=e;this.key=++Vv;this.$c=this.Fd=!1}function Xv(a){a.$c=!0;a.listener=null;a.Xd=null;a.src=null;a.Ub=null};function Yv(a){this.src=a;this.rb={};this.wd=0}Yv.prototype.add=function(a,b,c,d,e){var f=a.toString();a=this.rb[f];a||(a=this.rb[f]=[],this.wd++);var h=Zv(a,b,d,e);-1e.keyCode||void 0!=e.returnValue)){a:{var f=!1;if(0==e.keyCode)try{e.keyCode=-1;break a}catch(l){f=!0}if(f||void 0==e.returnValue)e.returnValue=!0}e=[];for(f=c.currentTarget;f;f=f.parentNode)e.push(f);f=a.type;for(var h=e.length-1;!c.Kc&&0<=h;h--){c.currentTarget=e[h];var k=nw(e[h],f,!0,c);d=d&&k}for(h=0;!c.Kc&& +h>>0);function fw(a){if(ha(a))return a;a[pw]||(a[pw]=function(b){return a.handleEvent(b)});return a[pw]};function qw(){wv.call(this);this.Ib=new Yv(this);this.ff=this;this.ve=null}qa(qw,wv);qw.prototype[Uv]=!0;g=qw.prototype;g.addEventListener=function(a,b,c,d){dw(this,a,b,c,d)};g.removeEventListener=function(a,b,c,d){lw(this,a,b,c,d)}; +g.dispatchEvent=function(a){var b,c=this.ve;if(c)for(b=[];c;c=c.ve)b.push(c);c=this.ff;var d=a.type||a;if(ca(a))a=new Sv(a,c);else if(a instanceof Sv)a.target=a.target||c;else{var e=a;a=new Sv(d,c);Ia(a,e)}e=!0;if(b)for(var f=b.length-1;!a.Kc&&0<=f;f--){var h=a.currentTarget=b[f];e=rw(h,d,!0,a)&&e}a.Kc||(h=a.currentTarget=c,e=rw(h,d,!0,a)&&e,a.Kc||(e=rw(h,d,!1,a)&&e));if(b)for(f=0;!a.Kc&&fthis.head?(Yw(this.o,this.fa,a,0,this.o.length-this.fa),Yw(this.o,0,a,this.o.length-this.fa,this.head),this.fa=0,this.head=this.length,this.o=a):this.fa===this.head?(this.head=this.fa=0,this.o=a):null};function ax(a,b){for(var c=a.length,d=0;;)if(da)){a+=1;continue}break}hx=!1;return 0c)return a;a:for(;;){var e=cMath.random()&&15>d)d+=1;else break a;if(d>this.level){for(var e=this.level+1;;)if(e<=d+1)c[e]=this.header,e+=1;else break;this.level=d}for(d=Ex(a,b,Array(d));;)return 0<=this.level?(c=c[0].forward,d.forward[0]=c[0],c[0]=d):null}; +Gx.prototype.remove=function(a){var b=Array(15),c=Fx(this.header,a,this.level,b);c=0===c.forward.length?null:c.forward[0];if(null!=c&&c.key===a){for(a=0;;)if(a<=this.level){var d=b[a].forward;c===(ad)return c===b.header?null:c;var e;a:for(e=c;;){e=d=a)break a}null!=e?(--d,c=e):--d}}Gx.prototype.S=function(){return function(a){return function d(c){return new kf(null,function(){return function(){return null==c?null:ae(new R(null,2,5,T,[c.key,c.H],null),d(c.forward[0]))}}(a),null,null)}}(this)(this.header.forward[0])}; +Gx.prototype.R=function(a,b,c){return Y(b,function(){return function(a){return Y(b,Qi,""," ","",c,a)}}(this),"{",", ","}",c,this)};var Ix=new Gx(Ex(null,null,0),0);function Jx(a){var b=(new Date).valueOf()+a,c=Hx(b),d=t(t(c)?c.keya:b)?a+8:a,[v.h(c),v.h(a)].join("")):null} +function Vy(a){var b=J(a,0,null),c=J(a,1,null);a=J(a,2,null);return["rgb(",v.h(b),",",v.h(c),",",v.h(a),")"].join("")} +var Wy=hj(function(a){a=null!=a&&(a.m&64||q===a.G)?P(U,a):a;var b=D.c(a,Nk),c=D.c(a,pl);a=K.l(a,Nk,t(c)?wb(b):b);var d=null!=a&&(a.m&64||q===a.G)?P(U,a):a,e=D.c(d,Ok),f=D.c(d,Tn);b=D.c(d,Kj);var h=D.c(d,dk);c=D.c(d,Vl);var k=D.c(d,Nk),l=D.c(d,Yn);d=D.c(d,pl);var p=t(k)?t(e)?e:"fg":f;e=Uy(t(k)?t(f)?f:"bg":e,b,"fg-");h=Uy(p,h,"bg-");c=vg(ub,new R(null,6,5,T,[e,h,t(b)?"bright":null,t(l)?"italic":null,t(c)?"underline":null,t(d)?"cursor":null],null));if(E(c))a:for(b=new cb,c=E(c);;)if(null!=c)b.append(""+ +v.h(y(c))),c=z(c),null!=c&&b.append(" ");else{b=b.toString();break a}else b=null;l=null!=a&&(a.m&64||q===a.G)?P(U,a):a;a=D.c(l,Ok);c=D.c(l,Tn);h=D.c(l,Nk);l=t(h)?c:a;a=t(h)?a:c;a=hi.A(be([t(ze.h?ze.h(l):ze.call(null,l))?new r(null,1,[ik,Vy(l)],null):null,t(ze.h?ze.h(a):ze.call(null,a))?new r(null,1,[al,Vy(a)],null):null]));return hi.A(be([t(b)?new r(null,1,[vn,b],null):null,t(a)?new r(null,1,[fm,a],null):null]))}); +function Xy(a,b){var c=J(a,0,null),d=J(a,1,null);d=Bg(d,pl,function(){return function(a){return t(a)?B(b):a}}(a,c,d));return new R(null,3,5,T,[ro,Wy.h?Wy.h(d):Wy.call(null,d),c],null)}function Yy(a,b){var c=J(a,0,null),d=J(a,1,null),e=jg(b,c);e=E(e)?new R(null,2,5,T,[Eo(e),d],null):null;var f=K.l(d,pl,!0);f=new R(null,2,5,T,[Vd(c,b),f],null);c=kg(b+1,c);d=E(c)?new R(null,2,5,T,[Eo(c),d],null):null;return vg(ub,new R(null,3,5,T,[e,f,d],null))} +function Zy(a,b){for(var c=he,d=a,e=b;;)if(E(d)){var f=y(d),h=J(f,0,null);J(f,1,null);h=H(h);if(h<=e)c=ge.c(c,f),d=vd(d),e-=h;else return O.A(c,Yy(f,e),be([vd(d)]))}else return c}function $y(a,b,c){a=t(B(b))?Zy(B(a),B(b)):B(a);return new R(null,2,5,T,[Lm,Ii(bg(function(){return function(a,b){return pe(new R(null,3,5,T,[Xy,b,c],null),new r(null,1,[mk,a],null))}}(a),a))],null)}var qA=new ti(null,new r(null,3,["small",null,"medium",null,"big",null],null),null); +function rA(a,b,c,d,e){var f=yp(function(){var a=B(c);return t(qA.h?qA.h(a):qA.call(null,a))?["font-",v.h(a)].join(""):null}),h=yp(function(){return function(){var d=B(a),e=B(b),f=B(c);f=t(qA.h?qA.h(f):qA.call(null,f))?null:new r(null,1,[wk,f],null);return hi.A(be([new r(null,2,[fl,[v.h(d),"ch"].join(""),no,[v.h(1.3333333333*e),"em"].join("")],null),f]))}}(f)),k=yp(function(){return function(){return Lu(B(d))}}(f,h)),l=yp(function(a,c,d){return function(){return xg(function(a,b,c){return function(d){return yp(function(a, +b,c){return function(){return D.c(B(c),d)}}(a,b,c))}}(a,c,d),Fi(0,B(b),1))}}(f,h,k)),p=yp(function(){return function(){return Mu(B(d))}}(f,h,k,l)),m=yp(function(a,b,c,d,e){return function(){return zn.h(B(e))}}(f,h,k,l,p)),u=yp(function(a,b,c,d,e){return function(){return Aj.h(B(e))}}(f,h,k,l,p,m)),w=yp(function(a,b,c,d,e){return function(){return On.h(B(e))}}(f,h,k,l,p,m,u));return function(a,b,c,d,f,h,k,l){return function(){return new R(null,3,5,T,[Gm,new r(null,2,[vn,B(a),fm,B(b)],null),bg(function(a, +b,c,d,f,h,k,l){return function(m,p){var u=yp(function(a,b,c,d,e,f,h,k){return function(){var a=B(k);return t(a)?(a=G.c(m,B(h)))?B(f):a:a}}(a,b,c,d,f,h,k,l));return pe(new R(null,4,5,T,[$y,p,u,e],null),new r(null,1,[mk,m],null))}}(a,b,c,d,f,h,k,l),B(d))],null)}}(f,h,k,l,p,m,u,w)} +function sA(){return new R(null,2,5,T,[Ym,new r(null,4,[Mn,"1.1",Fl,"0 0 866.0254037844387 866.0254037844387",vn,"icon",mo,new r(null,1,[An,'\x3cdefs\x3e \x3cmask id\x3d"small-triangle-mask"\x3e \x3crect width\x3d"100%" height\x3d"100%" fill\x3d"white"/\x3e \x3cpolygon points\x3d"508.01270189221935 433.01270189221935, 208.0127018922194 259.8076211353316, 208.01270189221927 606.217782649107" fill\x3d"black"\x3e\x3c/polygon\x3e \x3c/mask\x3e \x3c/defs\x3e \x3cpolygon points\x3d"808.0127018922194 433.01270189221935, 58.01270189221947 -1.1368683772161603e-13, 58.01270189221913 866.0254037844386" mask\x3d"url(#small-triangle-mask)" fill\x3d"white"\x3e\x3c/polygon\x3e \x3cpolyline points\x3d"481.2177826491071 333.0127018922194, 134.80762113533166 533.0127018922194" stroke\x3d"white" stroke-width\x3d"90"\x3e\x3c/polyline\x3e'],null)], +null)],null)}function tA(){return new R(null,3,5,T,[Ym,new r(null,3,[Mn,"1.1",Fl,"0 0 12 12",vn,"icon"],null),new R(null,2,5,T,[Fj,new r(null,1,[pn,"M1,0 L11,6 L1,12 Z"],null)],null)],null)}function uA(){return new R(null,4,5,T,[Ym,new r(null,3,[Mn,"1.1",Fl,"0 0 12 12",vn,"icon"],null),new R(null,2,5,T,[Fj,new r(null,1,[pn,"M1,0 L4,0 L4,12 L1,12 Z"],null)],null),new R(null,2,5,T,[Fj,new r(null,1,[pn,"M8,0 L11,0 L11,12 L8,12 Z"],null)],null)],null)} +function vA(){return new R(null,4,5,T,[Ym,new r(null,3,[Mn,"1.1",Fl,"0 0 12 12",vn,"icon"],null),new R(null,2,5,T,[Fj,new r(null,1,[pn,"M12,0 L7,0 L9,2 L7,4 L8,5 L10,3 L12,5 Z"],null)],null),new R(null,2,5,T,[Fj,new r(null,1,[pn,"M0,12 L0,7 L2,9 L4,7 L5,8 L3,10 L5,12 Z"],null)],null)],null)} +function wA(){return new R(null,4,5,T,[Ym,new r(null,3,[Mn,"1.1",Fl,"0 0 12 12",vn,"icon"],null),new R(null,2,5,T,[Fj,new r(null,1,[pn,"M7,5 L7,0 L9,2 L11,0 L12,1 L10,3 L12,5 Z"],null)],null),new R(null,2,5,T,[Fj,new r(null,1,[pn,"M5,7 L0,7 L2,9 L0,11 L1,12 L3,10 L5,12 Z"],null)],null)],null)}function xA(a,b){return function(b){return function(){return new R(null,3,5,T,[cl,new r(null,1,[Sl,b],null),new R(null,1,5,T,[t(B(a))?uA:tA],null)],null)}}(Ty(b,new fy(null,null,null)))} +function yA(a){return 10>a?["0",v.h(a)].join(""):a}function zA(a){var b=Math.floor((a%60+60)%60);return[v.h(yA(Math.floor(a/60))),":",v.h(yA(b))].join("")}function AA(a,b){var c=T,d=new R(null,2,5,T,[Yk,zA(B(a))],null),e=T;var f=B(a);var h=B(b);f=["-",v.h(zA(h-f))].join("");return new R(null,3,5,c,[Ml,d,new R(null,2,5,e,[co,f],null)],null)} +function BA(){function a(a){a.preventDefault();return Ry(a.currentTarget.parentNode.parentNode.parentNode)}return function(){return new R(null,4,5,T,[un,new r(null,1,[Sl,a],null),new R(null,1,5,T,[vA],null),new R(null,1,5,T,[wA],null)],null)}} +function CA(a,b){var c=Sy(b,function(a){var b=a.currentTarget.offsetWidth,c=a.currentTarget.getBoundingClientRect();return cy(Nu(a.clientX-c.left,b)/b)}),d=yp(function(){return function(){return[v.h(100*B(a)),"%"].join("")}}(c));return function(a,b){return function(){return new R(null,2,5,T,[Vj,new R(null,3,5,T,[Bl,new r(null,1,[Ql,a],null),new R(null,2,5,T,[Cj,new R(null,2,5,T,[ro,new r(null,1,[fm,new r(null,1,[fl,B(b)],null)],null)],null)],null)],null)],null)}}(c,d)} +function DA(a,b,c,d){return function(e){return function(){return new R(null,5,5,T,[Kk,new R(null,3,5,T,[xA,a,d],null),new R(null,3,5,T,[AA,b,c],null),new R(null,1,5,T,[BA],null),new R(null,3,5,T,[CA,e,d],null)],null)}}(yp(function(){return B(b)/B(c)}))} +function EA(a){return function(a){return function(){return new R(null,3,5,T,[ol,new r(null,1,[Sl,a],null),new R(null,2,5,T,[Xk,new R(null,2,5,T,[km,new R(null,2,5,T,[ro,new R(null,1,5,T,[sA],null)],null)],null)],null)],null)}}(Ty(a,new fy(null,null,null)))}function FA(){return new R(null,2,5,T,[Ek,new R(null,1,5,T,[xn],null)],null)}function GA(a){return Wf(function(b){return a[b]},new R(null,4,5,T,["altKey","shiftKey","metaKey","ctrlKey"],null))} +function HA(a){var b=t(GA(a))?null:function(){switch(a.key){case " ":return new fy(null,null,null);case "f":return bm;case "0":return cy(0);case "1":return cy(.1);case "2":return cy(.2);case "3":return cy(.3);case "4":return cy(.4);case "5":return cy(.5);case "6":return cy(.6);case "7":return cy(.7);case "8":return cy(.8);case "9":return cy(.9);default:return null}}();if(t(b))return b;switch(a.key){case "\x3e":return new ey(null,null,null);case "\x3c":return new dy(null,null,null);default:return null}} +function IA(a){if(t(GA(a)))return null;switch(a.which){case 37:return new ay(null,null,null);case 39:return new $x(null,null,null);default:return null}}function JA(a){var b=HA(a);return t(b)?(a.preventDefault(),G.c(b,bm)?(Ry(a.currentTarget),null):b):null}function KA(a){var b=IA(a);return t(b)?(a.preventDefault(),b):null} +function LA(a,b,c,d){a=t(a)?['"',v.h(a),'"'].join(""):"untitled";return new R(null,4,5,T,[dl,t(d)?new R(null,2,5,T,[jo,new r(null,1,[zl,d],null)],null):null,a,t(b)?new R(null,3,5,T,[ro," by ",t(c)?new R(null,3,5,T,[lo,new r(null,1,[ho,c],null),b],null):b],null):null],null)} +function MA(a){var b=Mx(1,ig.h(iy)),c=Kx(1);lx(function(c){return function(){var d=function(){return function(a){return function(){function b(b){for(;;){a:try{for(;;){var c=a(b);if(!N(c,Z)){var d=c;break a}}}catch(x){if(x instanceof Object)b[5]=x,Cx(b),d=Z;else throw x;}if(!N(d,Z))return d}}function c(){var a=[null,null,null,null,null,null,null,null,null,null,null,null];a[0]=d;a[1]=1;return a}var d=null;d=function(a){switch(arguments.length){case 0:return c.call(this);case 1:return b.call(this,a)}throw Error("Invalid arity: "+ +(arguments.length-1));};d.B=c;d.h=b;return d}()}(function(){return function(c){var d=c[1];if(7===d)return c[7]=c[2],Ax(c,12,b,!1);if(1===d)return c[2]=null,c[1]=2,Z;if(4===d)return c[8]=c[2],Ax(c,5,b,!0);if(6===d)return d=Jx(3E3),Ux(c,8,new R(null,2,5,T,[a,d],null));if(3===d)return Bx(c,c[2]);if(12===d)return c[9]=c[2],c[2]=null,c[1]=2,Z;if(2===d)return zx(c,4,a);if(11===d)return c[2]=c[2],c[1]=7,Z;if(9===d)return c[2]=null,c[1]=6,Z;if(5===d)return c[10]=c[2],c[2]=null,c[1]=6,Z;if(10===d)return c[2]= +null,c[1]=11,Z;if(8===d){var e=c[2];d=J(e,0,null);e=J(e,1,null);e=G.c(e,a);c[11]=d;c[1]=e?9:10;return Z}return null}}(c),c)}(),f=function(){var a=d.B?d.B():d.call(null);a[6]=c;return a}();return yx(f)}}(c));return b} +function NA(a,b){var c=dg.h(b),d=Kx(1);lx(function(b,c){return function(){var d=function(){return function(a){return function(){function b(b){for(;;){a:try{for(;;){var c=a(b);if(!N(c,Z)){var d=c;break a}}}catch(F){if(F instanceof Object)b[5]=F,Cx(b),d=Z;else throw F;}if(!N(d,Z))return d}}function c(){var a=[null,null,null,null,null,null,null,null,null,null,null,null,null];a[0]=d;a[1]=1;return a}var d=null;d=function(a){switch(arguments.length){case 0:return c.call(this);case 1:return b.call(this, +a)}throw Error("Invalid arity: "+(arguments.length-1));};d.B=c;d.h=b;return d}()}(function(b,c){return function(d){var e=d[1];if(7===e){var f=d[7],h=wb(null==f);d[8]=d[2];d[1]=h?8:9;return Z}if(20===e)return f=d[7],d[1]=t(q===f.Fe)?23:24,Z;if(27===e)return d[2]=!1,d[1]=28,Z;if(1===e)return d[2]=null,d[1]=2,Z;if(24===e)return f=d[7],d[1]=t(!f.Tc)?26:27,Z;if(4===e){f=d[7];var k=d[9];h=d[2];var l=J(h,0,null),m=J(h,1,null);d[10]=m;d[7]=l;d[9]=h;d[1]=t(null==l)?5:6;return Z}return 15===e?(d[2]=!1,d[1]= +16,Z):21===e?(f=d[7],h=Ab(Yx,f),d[2]=h,d[1]=22,Z):31===e?(d[11]=d[2],d[2]=null,d[1]=2,Z):13===e?(d[2]=d[2],d[1]=10,Z):22===e?(d[1]=t(d[2])?29:30,Z):29===e?(f=d[7],h=B(a),h=Zx(f,h),h=gg.l(c,wo,h),d[2]=h,d[1]=31,Z):6===e?(d[2]=null,d[1]=7,Z):28===e?(d[2]=d[2],d[1]=25,Z):25===e?(d[2]=d[2],d[1]=22,Z):17===e?(m=d[10],f=d[7],k=d[9],h=gg.c(a,function(){return function(a,b){return function(a){return Xx(b,a)}}(k,f,m,m,f,k,e,b,c)}()),d[2]=h,d[1]=19,Z):3===e?Bx(d,d[2]):12===e?(f=d[7],d[1]=t(!f.Tc)?14:15,Z): +2===e?(h=B(c),h=E(h),Ux(d,4,h)):23===e?(d[2]=!0,d[1]=25,Z):19===e?(f=d[7],h=wb(null==f),d[12]=d[2],d[1]=h?20:21,Z):11===e?(d[2]=!0,d[1]=13,Z):9===e?(f=d[7],h=Ab(Wx,f),d[2]=h,d[1]=10,Z):5===e?(m=d[10],h=gg.l(c,re,m),d[2]=h,d[1]=7,Z):14===e?(f=d[7],h=Ab(Wx,f),d[2]=h,d[1]=16,Z):26===e?(f=d[7],h=Ab(Yx,f),d[2]=h,d[1]=28,Z):16===e?(d[2]=d[2],d[1]=13,Z):30===e?(d[2]=null,d[1]=31,Z):10===e?(d[1]=t(d[2])?17:18,Z):18===e?(d[2]=null,d[1]=19,Z):8===e?(f=d[7],d[1]=t(q===f.sb)?11:12,Z):null}}(b,c),b,c)}(),e=function(){var a= +d.B?d.B():d.call(null);a[6]=b;return a}();return yx(e)}}(d,c));return d} +function OA(a,b,c){c=Ty(c,!0);var d=Sy(b,JA),e=Sy(b,KA),f=yp(function(){return function(){return Hm.h(B(a))}}(c,d,e)),h=yp(function(){return function(){return el.h(B(a))}}(c,d,e,f)),k=yp(function(a,b,c,d,e){return function(){var a=B(d);return t(a)?a:B(e)}}(c,d,e,f,h)),l=yp(function(b,c,d,e,f,h){return function(){var b=Gk.h(B(a));b=t(b)?b:wb(B(h));return t(b)?"hud":null}}(c,d,e,f,h,k)),p=yp(function(){return function(){return["asciinema-theme-",v.h(gm.h(B(a)))].join("")}}(c,d,e,f,h,k,l)),m=yp(function(){return function(){var b= +fl.h(B(a));return t(b)?b:80}}(c,d,e,f,h,k,l,p)),u=yp(function(){return function(){var b=no.h(B(a));return t(b)?b:24}}(c,d,e,f,h,k,l,p,m)),w=yp(function(){return function(){return wk.h(B(a))}}(c,d,e,f,h,k,l,p,m,u)),x=yp(function(){return function(){return V.h(B(a))}}(c,d,e,f,h,k,l,p,m,u,w)),C=yp(function(){return function(){return ml.h(B(a))}}(c,d,e,f,h,k,l,p,m,u,w,x)),F=yp(function(){return function(){return jn.h(B(a))}}(c,d,e,f,h,k,l,p,m,u,w,x,C)),I=yp(function(){return function(){return Uj.h(B(a))}}(c, +d,e,f,h,k,l,p,m,u,w,x,C,F)),M=yp(function(){return function(){return wl.h(B(a))}}(c,d,e,f,h,k,l,p,m,u,w,x,C,F,I)),S=B(a),X=null!=S&&(S.m&64||q===S.G)?P(U,S):S,Ga=D.c(X,ki),db=D.c(X,li),Q=D.c(X,mi),xb=D.c(X,ni);return function(a,c,d,e,f,h,k,l,m,p,u,w,x,C,F,I,M,S,Q,X,Ga,db){return function(){return new R(null,3,5,T,[Cn,new r(null,5,[Jj,-1,Zj,c,Rn,d,Vm,a,vn,B(k)],null),new R(null,7,5,T,[Sm,new r(null,1,[vn,B(l)],null),new R(null,6,5,T,[rA,m,p,u,w,x],null),new R(null,5,5,T,[DA,C,F,I,b],null),t(t(Q)?Q: +X)?new R(null,5,5,T,[LA,Q,X,Ga,db],null):null,t(B(h))?null:new R(null,2,5,T,[EA,b],null),t(B(e))?new R(null,1,5,T,[FA],null):null],null)],null)}}(c,d,e,f,h,k,l,p,m,u,w,x,C,F,I,M,S,X,Ga,db,Q,xb)} +function PA(a){var b=Kx(null),c=Kx(new dx(bx(1),1));return function(b,c){return function(){return Pp(new r(null,4,[ln,"asciinema-player",Dm,function(b,c){return function(){return OA(a,b,c)}}(b,c),$k,function(b,c){return function(){var d=ty(Gl.h(B(a))),e=MA(c);Tx(e,b);return NA(a,Je([b,d]))}}(b,c),Wm,function(){return function(){return uy(Gl.h(B(a)))}}(b,c)],null))}}(b,c)};function QA(a,b){var c=null!=b&&(b.m&64||q===b.G)?P(U,b):b,d=D.c(c,Ak),e=D.c(c,Gl);d=a.h?a.h(d):a.call(null,d);zy(e,d);return K.l(c,Ak,d)}$x.prototype.sb=q;$x.prototype.qb=function(a,b){var c=null!=b&&(b.m&64||q===b.G)?P(U,b):b,d=D.c(c,Uj),e=D.c(c,wl),f=D.c(c,Gl);t(e)&&yy(f,Nu(d+5,e));return c};ay.prototype.sb=q;ay.prototype.qb=function(a,b){var c=null!=b&&(b.m&64||q===b.G)?P(U,b):b,d=D.c(c,Uj),e=D.c(c,wl),f=D.c(c,Gl);t(e)&&yy(f,Nu(d+-5,e));return c};by.prototype.sb=q; +by.prototype.qb=function(a,b){var c=null!=b&&(b.m&64||q===b.G)?P(U,b):b,d=D.c(c,wl),e=D.c(c,Gl);t(d)&&(d*=nn.h(this),yy(e,d));return c};dy.prototype.sb=q;dy.prototype.qb=function(a,b){return QA(function(){return function(a){return a/2}}(this),b)};ey.prototype.sb=q;ey.prototype.qb=function(a,b){return QA(function(){return function(a){return 2*a}}(this),b)};fy.prototype.sb=q;fy.prototype.qb=function(a,b){xy(Gl.h(b));return b};gy.prototype.sb=q;gy.prototype.qb=function(a,b){return K.l(b,ml,so.h(this))}; +hy.prototype.sb=q;hy.prototype.qb=function(a,b){return K.l(b,Gk,so.h(this))};jy.prototype.sb=q;jy.prototype.qb=function(a,b){var c=null!=a&&(a.m&64||q===a.G)?P(U,a):a;D.c(c,fl);D.c(c,no);D.c(c,wl);c=null!=b&&(b.m&64||q===b.G)?P(U,b):b;var d=D.c(c,fl),e=D.c(c,no),f=null!=this&&(this.m&64||q===this.G)?P(U,this):this,h=D.c(f,fl),k=D.c(f,no);f=D.c(f,wl);return K.A(c,fl,t(d)?d:h,be([no,t(e)?e:k,wl,f]))};ky.prototype.sb=q;ky.prototype.qb=function(a,b){return K.l(b,Hm,Hm.h(this))};oy.prototype.sb=q; +oy.prototype.qb=function(a,b){var c=null!=b&&(b.m&64||q===b.G)?P(U,b):b,d=D.c(c,oi);t(d)&&(ap(bp),d.B?d.B():d.call(null));return c};ry.prototype.sb=q;ry.prototype.qb=function(a,b){return K.l(b,Uj,Zk.h(this))};function RA(){return ig.l(function(a,b){return new R(null,2,5,T,[a,new gy(b,null,null,null)],null)},rg(function(a){return a+.5},.5),og(new R(null,2,5,T,[!1,!0],null)))}function SA(a){var b=Dy(RA());return K.l(K.l(a,ml,!0),Ol,b)} +function TA(a){a=null!=a&&(a.m&64||q===a.G)?P(U,a):a;var b=D.c(a,Ol);Tw(b);return K.l(K.l(a,ml,!0),Ol,null)}function UA(a){a=null!=a&&(a.m&64||q===a.G)?P(U,a):a;a=D.c(a,Ol);return t(a)?Je([a]):vi}my.prototype.sb=q; +my.prototype.qb=function(a,b){var c=null!=a&&(a.m&64||q===a.G)?P(U,a):a;D.c(c,jn);var d=null!=b&&(b.m&64||q===b.G)?P(U,b):b,e=D.c(d,jn);c=D.c(d,pi);var f=D.c(d,qi),h=null!=this&&(this.m&64||q===this.G)?P(U,this):this;h=D.c(h,jn);if(G.c(e,h))return d;d=K.A(d,jn,h,be([el,!0]));if(t(h))return t(c)&&(c.B?c.B():c.call(null)),SA(d);t(f)&&(f.B?f.B():f.call(null));return TA(d)};my.prototype.Fe=q;my.prototype.de=function(a,b){return UA(b)};py.prototype.sb=q; +py.prototype.qb=function(a,b){var c=K.l(b,V,V.h(this));c=null!=c&&(c.m&64||q===c.G)?P(U,c):c;var d=D.c(c,Ol);return t(d)?SA(TA(c)):c};py.prototype.Fe=q;py.prototype.de=function(a,b){return UA(b)};function VA(a){return t(a)?(a=ig.c(parseFloat,Fo(""+v.h(a),/:/)),a=ig.l(Ye,cf(a),rg(function(){return function(a){return 60*a}}(a),1)),P(Xe,a)):null} +function WA(a,b,c){t(a)?"string"===typeof a?t(0===a.indexOf("data:application/json;base64,"))?(b=a.substring(29).replace(RegExp("\\s","g"),""),b=JSON.parse(atob(b)),b=fj(b),b=new r(null,1,[V,new r(null,1,[il,b],null)],null)):t(0===a.indexOf("data:text/plain,"))?(a=a.substring(16),b=Ju(Ot(t(b)?b:80,t(c)?c:24),a),b=new r(null,1,[V,b],null)):b=t(0===a.indexOf("npt:"))?new r(null,1,[Zk,VA(a.substring(4))],null):null:b=new r(null,1,[V,new r(null,1,[il,a],null)],null):b=null;return b} +var XA=new r(null,2,[pl,new r(null,1,[On,!1],null),il,he],null); +function YA(a,b){var c=null!=b&&(b.m&64||q===b.G)?P(U,b):b,d=D.c(c,no),e=D.l(c,wk,"small"),f=D.l(c,Ak,1),h=D.c(c,Hk),k=D.c(c,fl),l=D.c(c,rl),p=D.l(c,cm,!1),m=D.l(c,gm,"asciinema"),u=D.c(c,qm),w=D.c(c,Bm),x=D.l(c,vm,!1),C=D.l(c,Em,!1),F=function(){var a=VA(h);return t(a)?a:0}();w=WA(w,k,d);var I=null!=w&&(w.m&64||q===w.G)?P(U,w):w;w=D.c(I,V);I=D.c(I,Zk);var M=t(I)?I:wb(w)&&0`_ course from the Computer Science +and Engineering Department, the Faculty of Automatic Control and +Computers, University POLITEHNICA of Bucharest. + +You can get the latest version at http://github.com/linux-kernel-labs. + +To get started build the documentation from the sources after +installing docker-compose on you host: + +.. code-block:: c + + cd tools/labs && make docker-docs + +then point your browser at **Documentation/output/labs/index.html**. + +Alternatively, you can build directly on the host (see +tools/labs/docs/Dockerfile for dependencies): + +.. code-block:: c + + cd tools/labs && make docs + +.. toctree:: + :caption: Lectures + + lectures/so2.cs.pub.ro.rst + lectures/intro.rst + lectures/syscalls.rst + lectures/interrupts.rst + lectures/smp.rst + lectures/debugging.rst + +.. toctree:: + :caption: Labs + + labs/infrastructure.rst + labs/introduction.rst + labs/kernel_modules.rst + labs/kernel_api.rst + labs/device_drivers.rst + labs/interrupts.rst + labs/deferred_work.rst + labs/block_device_drivers.rst + labs/filesystems_part1.rst + labs/filesystems_part2.rst + labs/networking.rst + labs/memory_mapping.rst + labs/device_model.rst + +.. toctree:: + :caption: Useful info + + info/vm.rst + info/contributing.rst + diff --git a/Documentation/teaching/info/contributing.rst b/Documentation/teaching/info/contributing.rst new file mode 100644 index 00000000000000..21995208cfc704 --- /dev/null +++ b/Documentation/teaching/info/contributing.rst @@ -0,0 +1,206 @@ +================================= +Contributing to linux-kernel-labs +================================= + +``linux-kernel-labs`` is an open platform. +You can help it get better by contributing to the documentation, exercises or +the infrastructure. +All contributions are welcome, no matter if they are just fixes for typos or +new sections in the documentation. + +All information required for making a contribution can be found in the +`linux-kernel-labs Linux repo `_. +In order to change anything, you need to create a Pull Request (``PR``) +from your own fork to this repository. +The PR will be reviewed by ther members of the team and will be merged once +any pottential issue is fixed. + +******************** +Repository structure +******************** + +The `linux-kernel-labs repo `_ is +a fork of the Linux kernel repo, with the following additions: + + * ``/tools/labs``: contains the labs and the :ref:`virtual machine (VM) infrastructure` + + * ``tools/labs/templates``: contains the skeletons sources + * ``tools/labs/qemu``: contains the qemu VM configuration + + * ``/Documentation/teaching``: contains the sources used to generate this + documentation + +************************** +Building the documentation +************************** + +To build the documentation, navigate to ``tools/labs`` and run the following +command: + +.. code-block:: bash + + make docs + +.. note:: + The command should install all the required packages. + In some cases, installing the packages or building the documentation might + fail, because of broken dependencies versions. + + Instead of struggling to fix the dependencies, the simplest way to build + the documentation is using a `Docker `_. + First, install ``docker`` and ``docker-compose`` on your host, and then run: + + .. code-block:: bash + + make docker-docs + + The first run might take some time, but subsequent builds will be faster. + +*********************** +Creating a contribution +*********************** + +Forking the repository +====================== + +1. If you haven't done it already, clone the + `linux-kernel-labs repo `_ + repository locally: + + .. code-block:: bash + + $ mkdir -p ~/src + $ git clone git@github.com:linux-kernel-labs/linux.git ~/src/linux + +2. Go to https://github.com/linux-kernel-labs/linux, make sure you are logged + in and click ``Fork`` in the top right of the page. + +3. Add the forked repo as a new remote to the local repo: + + .. code-block:: bash + + $ git remote add my_fork git@github.com:/linux.git + +Now, you can push to your fork by using ``my_fork`` instead of ``origin`` +(e.g. ``git push my_fork master``). + +Creating a pull request +======================= + +.. warning:: + + Pull requests must be created from their own branches, wich are started from + ``master``. + +1. Go to the master branch and make sure you have no local changes: + + .. code-block:: bash + + student@eg106:~/src/linux$ git checkout master + student@eg106:~/src/linux$ git status + On branch master + Your branch is up-to-date with 'origin/master'. + nothing to commit, working directory clean + + +2. Make sure the local master branch is up-to-date with linux-kernel-labs: + + .. code-block:: bash + + student@eg106:~/src/linux$ git pull origin master + + .. note:: + + You can also push the latest master to your forked repo: + + .. code-block:: bash + + student@eg106:~/src/linux$ git push my_fork master + +3. Create a new branch for your change: + + .. code-block:: bash + + student@eg106:~/src/linux$ git checkout -b + +4. Make some changes and commit them. In this example, we are going to change + ``Documentation/teaching/index.rst``: + + .. code-block:: bash + + student@eg106:~/src/linux$ vim Documentation/teaching/index.rst + student@eg106:~/src/linux$ git add Documentation/teaching/index.rst + student@eg106:~/src/linux$ git commit -m "" + + .. warning:: + + The commit message must include a relevant description of your change + and the location of the changed component. + + Examples: + + * ``documentation: index: Fix typo in the first section`` + * ``labs: block_devices: Change printk log level`` + +5. Push the local branch to your forked repository: + + .. code-block:: bash + + student@eg106:~/src/linux$ git push my_fork + +6. Open the Pull Pequest + + * Go to https://github.com and open your forked repository page + * Click ``New pull request``. + * Make sure base repository (left side) is ``linux-kernel-labs/linux`` and the + base is master. + * Make sure the head repository (right side) is your forked repo and the + compare branch is your pushed branch. + * Click ``Create pull request``. + +Making changes to a Pull Request +================================ + +After receiving feedback for your changes, you might need to update the Pull +Request. +Your goal is to do a new push on the same branch. For this, follow the next steps: + +1. Make sure your branch is still up to date with the ``linux-kernel-labs`` repo + ``master`` branch. + + .. code-block:: bash + + student@eg106:~/src/linux$ git fetch origin master + student@eg106:~/src/linux$ git rebase FETCH_HEAD + + .. note:: + + If you are getting conflicts, it means that someone else modified the same + files/lines as you and already merged the changes since you opened the + Pull Request. + + In this case, you will need to fix the conflicts by editing the + conflicting files manually (run ``git status`` to see these files). + After fixing the conflicts, add them using ``git add`` and then run + ``git rebase --continue``. + + +2. Apply the changes to your local files +3. Commit the changes. We want all the changes to be in the same commit, so + we will amend the changes to the initial commit. + + .. code-block:: bash + + student@eg106:~/src/linux$ git add Documentation/teaching/index.rst + student@eg106:~/src/linux$ git commit --amend + +4. Force-push the updated commit: + + .. code-block:: bash + + student@eg106:~/src/linux$ git push my_fork -f + + After this step, the Pull Request is updated. It is now up to the + linux-kernel-labs team to review the pull request and integrate your + contributions in the main project. + diff --git a/Documentation/teaching/info/vm.rst b/Documentation/teaching/info/vm.rst new file mode 100644 index 00000000000000..1742d420b0d1c0 --- /dev/null +++ b/Documentation/teaching/info/vm.rst @@ -0,0 +1,161 @@ +.. _vm_link: + +===================== +Virtual Machine Setup +===================== + +Exercises are designed to run on a qemu based virtual machine. In +order to run the virtual machine you will need following packages: + +* build-essential +* qemu-system-x86 +* qemu-system-arm +* kvm +* python3 + +The virtual machine setup uses prebuild Yocto images that it downloads +from downloads.yocyoproject.org and a kernel image that it builds +itself. The following images are supported: + +* core-image-minimal-qemu +* core-image-minimal-dev-qemu +* core-image-sato-dev-qemu +* core-image-sato-qemu +* core-image-sato-sdk-qemu + +and can be selected from tools/labs/qemu/Makefile. + + +Starting the VM +--------------- + +The virtual machine scripts are available in tools/labs/qemeu and you +can can start the virtual machine by using the **boot** make target in +tools/labs: + +.. code-block:: shell + + ~/src/linux/tools/labs$ make boot + ARCH=x86 qemu/qemu.sh -kernel zImage.x86 -device virtio-serial \ + -chardev pty,id=virtiocon0 -device virtconsole,chardev=virtiocon0 \ + -net nic,model=virtio,vlan=0 -net tap,ifname=tap0,vlan=0,script=no,downscript=no\ + -drive file=rootfs.img,if=virtio,format=raw --append "root=/dev/vda console=hvc0" \ + --display none -s + char device redirected to /dev/pts/19 (label virtiocon0) + + +.. note:: To show the qemu console use "QEMU_DISPLAY=sdl make + boot". This will show the VGA output and will also give + access to the standard keyboard. + +.. _vm_interaction_link: + +Connecting to the VM +-------------------- + +Once the machine is booted you can connect to it on the serial port. A +link named *serial.pts* is created to the right emulated serial port +and you can use **minicom**, **picocom** to connect to the virtual +machine from the host: + +.. code-block:: shell + + $ minicom -D serial.pts + + Poky (Yocto Project Reference Distro) 2.3 qemux86 /dev/hvc0 + + qemux86 login: root + root@qemux86:~# + +Networking is also setup and you can use ssh to connect to the virtual +machine after finding out the allocated IP address: + +.. code-block:: shell + + $ minicom -D serial.pts + + Poky (Yocto Project Reference Distro) 2.3 qemux86 /dev/hvc0 + + qemux86 login: root + root@qemux86:~# ifconfig + eth0 Link encap:Ethernet HWaddr 52:54:00:12:34:56 + inet addr:172.20.0.6 Bcast:172.20.0.255 Mask:255.255.255.0 + UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1 + RX packets:41 errors:0 dropped:0 overruns:0 frame:0 + TX packets:6 errors:0 dropped:0 overruns:0 carrier:0 + collisions:0 txqueuelen:1000 + RX bytes:7578 (7.4 KiB) TX bytes:1296 (1.2 KiB) + + lo Link encap:Local Loopback + inet addr:127.0.0.1 Mask:255.0.0.0 + inet6 addr: ::1%134535719/128 Scope:Host + UP LOOPBACK RUNNING MTU:65536 Metric:1 + RX packets:0 errors:0 dropped:0 overruns:0 frame:0 + TX packets:0 errors:0 dropped:0 overruns:0 carrier:0 + collisions:0 txqueuelen:1000 + RX bytes:0 (0.0 B) TX bytes:0 (0.0 B) + + $ ssh root@172.20.0.6 + The authenticity of host '172.20.0.6 (172.20.0.6)' can't be established. + RSA key fingerprint is SHA256:CW1opJUHi4LDt1lnKjBVv12kXZ4s+8rreMBm5Jsdm00. + Are you sure you want to continue connecting (yes/no)? yes + Warning: Permanently added '172.20.0.6' (RSA) to the list of known hosts. + root@qemux86:~# + +.. attention:: The Yocto core-image-minimal-qemu does not include an + SSH server, so you will not able to connect via ssh if + you are using this image. + + +Connecting a debugger to the VM kernel +-------------------------------------- + +You can connect gdb to the running VM kernel and inspect the state of +the kernel by running the *gdb* target from tools/labs: + +.. code-block :: shell + + $ make gdb + ln -fs /home/tavi/src/linux/vmlinux vmlinux + gdb -ex "target remote localhost:1234" vmlinux + GNU gdb (Ubuntu 7.11.1-0ubuntu1~16.04) 7.11.1 + Copyright (C) 2016 Free Software Foundation, Inc. + License GPLv3+: GNU GPL version 3 or later + This is free software: you are free to change and redistribute it. + There is NO WARRANTY, to the extent permitted by law. Type "show copying" + and "show warranty" for details. + This GDB was configured as "x86_64-linux-gnu". + Type "show configuration" for configuration details. + For bug reporting instructions, please see: + . + Find the GDB manual and other documentation resources online at: + . + For help, type "help". + Type "apropos word" to search for commands related to "word"... + Reading symbols from vmlinux...done. + Remote debugging using localhost:1234 + 0xc13cf2f2 in native_safe_halt () at ./arch/x86/include/asm/irqflags.h:53 + 53asm volatile("sti; hlt": : :"memory"); + (gdb) bt + #0 0xc13cf2f2 in native_safe_halt () at ./arch/x86/include/asm/irqflags.h:53 + #1 arch_safe_halt () at ./arch/x86/include/asm/irqflags.h:95 + #2 default_idle () at arch/x86/kernel/process.c:341 + #3 0xc101f136 in arch_cpu_idle () at arch/x86/kernel/process.c:332 + #4 0xc106a6dd in cpuidle_idle_call () at kernel/sched/idle.c:156 + #5 do_idle () at kernel/sched/idle.c:245 + #6 0xc106a8c5 in cpu_startup_entry (state=) + at kernel/sched/idle.c:350 + #7 0xc13cb14a in rest_init () at init/main.c:415 + #8 0xc1507a7a in start_kernel () at init/main.c:679 + #9 0xc10001da in startup_32_smp () at arch/x86/kernel/head_32.S:368 + #10 0x00000000 in ?? () + (gdb) + +Rebuild the kernel image +------------------------ + +The kernel image is built the first time the VM is started. To rebuild +the kernel remove the **zImage** file and run the zImage target (or +start the VM again). + +.. add info about how to update the image diff --git a/Documentation/teaching/labs/block_device_drivers.rst b/Documentation/teaching/labs/block_device_drivers.rst new file mode 100644 index 00000000000000..4c27a3635b14e7 --- /dev/null +++ b/Documentation/teaching/labs/block_device_drivers.rst @@ -0,0 +1,1151 @@ +==================== +Block Device Drivers +==================== + +Lab objectives +============== + + * acquiring knowledge about the behavior of the I/O subsystem on Linux + * hands-on activities in structures and functions of block devices + * acquiring basic skills for utilizing the API for block devices, by solving + exercises + +Overview +======== + +Block devices are characterized by random access to data organized in fixed-size +blocks. Examples of such devices are hard drives, CD-ROM drives, RAM disks, etc. +The speed of block devices is generally much higher than the speed of character +devices, and their performance is also important. This is why the Linux kernel +handles differently these 2 types of devices (it uses a specialized API). + +Working with block devices is therefore more complicated than working with +character devices. Character devices have a single current position, while block +devices must be able to move to any position in the device to provide random +access to data. To simplify work with block devices, the Linux kernel provides +an entire subsystem called the block I/O (or block layer) subsystem. + +From the kernel perspective, the smallest logical unit of addressing is the +block. Although the physical device can be addressed at sector level, the kernel +performs all disk operations using blocks. Since the smallest unit of physical +addressing is the sector, the size of the block must be a multiple of the size +of the sector. Additionally, the block size must be a power of 2 and can not +exceed the size of a page. The size of the block may vary depending on the file +system used, the most common values being 512 bytes, 1 kilobytes and 4 +kilobytes. + + +Register a block I/O device +=========================== + +To register a block I/O device, function :c:func:`register_blkdev` is used. +To deregister a block I/O device, function :c:func:`unregister_blkdev` is +used. + +Starting with version 4.9 of the Linux kernel, the call to +:c:func:`register_blkdev` is optional. The only operations performed by this +function are the dynamic allocation of a major (if the major argument is 0 when +calling the function) and creating an entry in :file:`/proc/devices`. In +future kernel versions it may be removed; however, most drivers still call it. + +Usually, the call to the register function is performed in the module +initialization function, and the call to the deregister function is performed in +the module exit function. A typical scenario is presented below: + + +.. code-block:: c + + #include + + #define MY_BLOCK_MAJOR 240 + #define MY_BLKDEV_NAME "mybdev" + + static int my_block_init(void) + { + int status; + + status = register_blkdev(MY_BLOCK_MAJOR, MY_BLKDEV_NAME); + if (status < 0) { + printk(KERN_ERR "unable to register mybdev block device\n"); + return -EBUSY; + } + //... + } + + static void my_block_exit(void) + { + //... + unregister_blkdev(MY_BLOCK_MAJOR, MY_BLKDEV_NAME); + } + + +Register a disk +=============== + +Although the :c:func:`register_blkdev` function obtains a major, it does not +provide a device (disk) to the system. For creating and using block devices +(disks), a specialized interface defined in :file:`linux/genhd.h` is used. + +The useful functions defined in :file:`linux/genhd.h` are to register /allocate +a disk, add it to the system, and de-register /unmount the disk. + +The :c:func:`alloc_disk` function is used to allocate a disk, and the +:c:func:`del_gendisk` function is used to deallocate it. Adding the disk to the +system is done using the :c:func:`add_disk` function. + +The :c:func:`alloc_disk` and :c:func:`add_disk` functions are typically used in +the module initialization function, and the :c:func:`del_gendisk` function in +the module exit function. + +.. code-block:: c + + #include + #include + + #define MY_BLOCK_MINORS 1 + + static struct my_block_dev { + struct gendisk *gd; + //... + } dev; + + static int create_block_device(struct my_block_dev *dev) + { + dev->gd = alloc_disk(MY_BLOCK_MINORS); + //... + add_disk(dev->gd); + } + + static int my_block_init(void) + { + //... + create_block_device(&dev); + } + + static void delete_block_device(struct my_block_dev *dev) + { + if (dev->gd) + del_gendisk(dev->gd); + //... + } + + static void my_block_exit(void) + { + delete_block_device(&dev); + //... + } + +As with character devices, it is recommended to use :c:type:`my_block_dev` +structure to store important elements describing the block device. + +Note that immediately after calling the :c:func:`add_disk` function (actually +even during the call), the disk is active and its methods can be called at any +time. As a result, this function should not be called before the driver is fully +initialized and ready to respond to requests for the registered disk. + + +It can be noticed that the basic structure in working with block devices (disks) +is the :c:type:`struct gendisk` structure. + +After a call to :c:func:`del_gendisk`, the :c:type:`struct gendisk` structure +may continue to exist (and the device operations may still be called) if there +are still users (an open operation was called on the device but the associated +release operation has not been called). One solution is to keep the number of +users of the device and call the :c:func:`del_gendisk` function only when there +are no users left of the device. + +:c:type:`struct gendisk` structure +================================== + +The :c:type:`struct gendisk` structure stores information about a disk. As +stated above, such a structure is obtained from the :c:func:`alloc_disk` call +and its fields must be filled before it is sent to the :c:func:`add_disk` +function. + +The :c:type:`struct gendisk` structure has the following important fields: + + * :c:member:`major`, :c:member:`first_minor`, :c:member:`minor`, describing + the identifiers used by the disk; a disk must have at least one minor; if + the disk allows the partitioning operation, a minor must be allocated for + each possible partition + * :c:member:`disk_name`, which represents the disk name as it appears in + :file:`/proc/partitions` and in sysfs (:file:`/sys/block`) + * :c:member:`fops`, representing operations associated with the disk + * :c:member:`queue`, which represents the queue of requests + * :c:member:`capacity`, which is disk capacity in 512 byte sectors; + it is initialized using the :c:func:`set_capacity` function + * :c:member:`private_data`, which is a pointer to private data + +An example of filling a :c:type:`struct gendisk` structure is presented below: + +.. code-block:: c + + #include + #include + #include + + #define NR_SECTORS 1024 + + #define KERNEL_SECTOR_SIZE 512 + + static struct my_block_dev { + //... + spinlock_t lock; /* For mutual exclusion */ + struct request_queue *queue; /* The device request queue */ + struct gendisk *gd; /* The gendisk structure */ + //... + } dev; + + static int create_block_device(struct my_block_dev *dev) + { + ... + /* Initialize the gendisk structure */ + dev->gd = alloc_disk(MY_BLOCK_MINORS); + if (!dev->gd) { + printk (KERN_NOTICE "alloc_disk failure\n"); + return -ENOMEM; + } + + dev->gd->major = MY_BLOCK_MAJOR; + dev->gd->first_minor = 0; + dev->gd->fops = &my_block_ops; + dev->gd->queue = dev->queue; + dev->gd->private_data = dev; + snprintf (dev->gd->disk_name, 32, "myblock"); + set_capacity(dev->gd, NR_SECTORS); + + add_disk(dev->gd); + + return 0; + } + + static int my_block_init(void) + { + int status; + //... + status = create_block_device(&dev); + if (status < 0) + return status; + //... + } + + static void delete_block_device(struct my_block_dev *dev) + { + if (dev->gd) { + del_gendisk(dev->gd); + } + //... + } + + static void my_block_exit(void) + { + delete_block_device(&dev); + //... + } + +As stated before, the kernel considers a disk as a vector of 512 byte sectors. +In reality, the devices may have a different size of the sector. To work with +these devices, the kernel needs to be informed about the real size of a sector, +and for all operations the necessary conversions must be made. + +To inform the kernel about the device sector size, a parameter of the request +queue must be set just after the request queue is allocated, using the +:c:func:`blk_queue_logical_block_size` function. All requests generated by the +kernel will be multiple of this sector size and will be aligned accordingly. +However, communication between the device and the driver will still be performed +in sectors of 512 bytes in size, so conversion should be done each time (an +example of such conversion is when calling the :c:func:`set_capacity` function +in the code above). + +:c:type:`struct block_device_operations` structure +================================================== + +Just as for a character device, operations in :c:type:`struct file_operations` +should be completed, so for a block device, the operations in +:c:type:`struct block_device_operations` should be completed. The association +of operations is done through the :c:member:`fops` field in the +:c:type:`struct gendisk` +structure. + +Some of the fields of the :c:type:`struct block_device_operations` structure +are presented below: + +.. code-block:: c + + struct block_device_operations { + int (*open) (struct block_device *, fmode_t); + int (*release) (struct gendisk *, fmode_t); + int (*locked_ioctl) (struct block_device *, fmode_t, unsigned, + unsigned long); + int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); + int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, + unsigned long); + int (*direct_access) (struct block_device *, sector_t, + void **, unsigned long *); + int (*media_changed) (struct gendisk *); + int (*revalidate_disk) (struct gendisk *); + int (*getgeo)(struct block_device *, struct hd_geometry *); + struct module *owner; + } + +:c:func:`open` and :c:func:`release` operations are called directly from user +space by utilities that may perform the following tasks: partitioning, file +system creation, file system verification. In a :c:func:`mount` operation, the +:c:func:`open` function is called directly from the kernel space, the file +descriptor being stored by the kernel. A driver for a block device can not +differentiate between :c:func:`open` calls performed from user space and kernel +space. + +An example of how to use these two functions is given below: + +.. code-block:: c + + #include + #include + + static struct my_block_dev { + //... + struct gendisk * gd; + //... + } dev; + + static int my_block_open(struct block_device *bdev, fmode_t mode) + { + //... + + return 0; + } + + static int my_block_release(struct gendisk *gd, fmode_t mode) + { + //... + + return 0; + } + + struct block_device_operations my_block_ops = { + .owner = THIS_MODULE, + .open = my_block_open, + .release = my_block_release + }; + + static int create_block_device(struct my_block_dev *dev) + { + //.... + dev->gd->fops = &my_block_ops; + dev->gd->private_data = dev; + //... + } + +Please notice that there are no read or write operations. These operations are +performed by the :c:func:`request` function associated with the request queue +of the disk. + +Request queues +============== + +Drivers for block devices use queues to store the block requests I/O that will +be processed. A request queue is represented by the +:c:type:`struct request_queue` structure. The request queue is made up of a +double-linked list of requests and their associated control information. The +requests are added to the queue by higher-level kernel code (for example, file +systems). As long as the request queue is not empty, the queue's associated +driver will have to retrieve the first request from the queue and pass it to the +associated block device. Each item in the request queue is a request represented +by the :c:type:`struct request` structure. + +Request queues implement an interface that allows the use of multiple I/O +schedulers. A scheduler must sort the requests and present them to the driver +in order to maximize performance. The scheduler also deals with the combination +of adjacent requests (which refer to adjacent sectors of the disk). + +Create and delete a request queue +--------------------------------- + +A request queue is created with the :c:func:`blk_init_queue` function and is +deleted using the :c:func:`blk_cleanup_queue` function. + +An example of using these functions is as follows: + +.. code-block:: c + + #include + #include + #include + + static struct my_block_dev { + //... + struct request_queue *queue; + //... + } dev; + + static void my_block_request(struct request_queue *q); + //... + + static int create_block_device(struct my_block_dev *dev) + { + /* Initialize the I/O queue */ + spin_lock_init(&dev->lock); + dev->queue = blk_init_queue(my_block_request, &dev->lock); + if (dev->queue == NULL) + goto out_err; + blk_queue_logical_block_size(dev->queue, KERNEL_SECTOR_SIZE); + dev->queue->queuedata = dev; + //... + + out_err: + return -ENOMEM; + } + + static int my_block_init(void) + { + int status; + //... + status = create_block_device(&dev); + if (status < 0) + return status; + //... + } + + static void delete_block_device(struct block_dev *dev) + { + //... + if (dev->queue) + blk_cleanup_queue(dev->queue); + } + + static void my_block_exit(void) + { + delete_block_device(&dev); + //... + } + +The :c:func:`blk_init_queue` function receives as first argument a pointer to +the function which processes the requests for the device (of type +:c:type:`request_fn_proc`). In the example above, the function is +:c:func:`my_block_request`. The lock parameter is a spinlock (initialized by the +driver) that the kernel holds during the :c:func:`request` function call to +ensure exclusive access to the queue. This spinlock can also be used in other +driver functions to protect access to shared data with the :c:func:`request` +function. + +As part of the request queue initialization, you can configure the +:c:member:`queuedata` field, which is equivalent to the :c:member:`private_data` +field in other structures. + +Useful functions for processing request queues +---------------------------------------------- + +The function of type :c:type:`request_fn_proc` is used to handle requests for +working with the block device. This function is the equivalent of read and +write functions encountered on character devices. The function receives the +request queue associated with the device as an argument and can use various +functions for processing the requests from the request queue. + +The functions used to process the requests from the request queue are +described below: + + * :c:func:`blk_peek_request` - retrieves a reference to the first request + from the queue; the respective request must be started using + :c:func:`blk_start_request`; + * :c:func:`blk_start_request` - extracts the request from the queue and + starts it for processing; in general, the function receives as a reference + a pointer to a request returned by :c:func:`blk_peek_request`; + * :c:func:`blk_fetch_request` - retrieves the first request from the queue + (using :c:func:`blk_peek_request`) and starts it (using + :c:func:`blk_start_request`); + * :c:func:`blk_requeue_request` - to re-enter queue. + +Before calling any of the functions above, the spinlock associated to the queue +must be acquired. If the function is called from the function of type +:c:type:`request_fn_proc`, then the spinlock is already held. + +Requests for block devices +========================== + +A request for a block device is described by :c:type:`struct request` +structure. + +The fields of :c:type:`struct request` structure include: + + * :c:member:`cmd_flags`: a series of flags including direction (reading or + writing); to find out the direction, the macrodefinition + :c:macro:`rq_data_dir` is used, which returns 0 for a read request and 1 + for a write request on the device; + * :c:member:`__sector`: the first sector of the transfer request; if the + device sector has a different size, the appropriate conversion should be + done. To access this field, use the :c:macro:`blk_rq_pos` macro; + * :c:member:`__data_len`: the total number of bytes to be transferred; to + access this field the :c:macro:`blk_rq_bytes` macro is used; + * generally, data from the current :c:type:`struct bio` will be + transferred; the data size is obtained using the + :c:macro:`blk_rq_cur_bytes` macro; + * :c:member:`bio`, a dynamic list of :c:type:`struct bio` structures that + is a set of buffers associated to the request; this field is accessed by + macrodefinition :c:macro:`rq_for_each_segment` if there are multiple + buffers, or by :c:macro:`bio_data` macrodefinition in case there is only + one associated buffer; + * :c:member:`bio_data`: the address of the buffer associated to the request + * about the :c:type:`struct bio` structure and its associated operations + will be discussed in the :ref:`bio_structure` section; + +Create a request +---------------- + +Read /write requests are created by code layers superior to the kernel I/O +subsystem. Typically, the subsystem that creates requests for block devices is +the file management subsystem. The I/O subsystem acts as an interface between +the file management subsystem and the block device driver. The main operations +under the responsibility of the I/O subsystem are adding requests to the queue +of the specific block device and sorting and merging requests according to +performance considerations. + +Finish a request +---------------- + +When the driver has finished transferring all the sectors of a request to /from +the device, it must inform the I/O subsystem by calling the +:c:func:`blk_end_request` function. If the lock associated to the request queue +is already acquired, the :c:func:`__blk_end_request` function can be used. + +If the driver wants to close the request even if it did not transfer all the +related sectors, it can call the :c:func:`blk_end_request_all` or +:c:func:`__blk_end_request_all` function. The :c:func:`__blk_end_request_all` +function is called if the lock associated to the request queue is already +acquired. + +Process a request +----------------- + +The central part of a block device driver is the :c:type:`request_fn_proc` +function type. In previous examples, the function that fulfilled this role was +:c:func:`my_block_request`. As stated in the +`Create and delete a request queue`_ section, this function is associated to the +driver by calling :c:func:`blk_init_queue` function. + +This function is called when the kernel considers that the driver should process +I/O requests. The function must start processing the requests from the queue, +but it is not mandatory to finish them, as requests may be finished by other +parts of the driver. + +The :c:data:`lock` parameter, sent when creating a request queue, is a spinlock +that the kernel holds when executing the request method. For this reason, the +request function runs in an atomic context and must follow the rules for +atomic code (it does not need to call functions that can cause sleep, etc.). +This lock also ensures that no other requests for the device will be added to +the queue while the request function is running. + +Calling the function that processes the request queue is asynchronous relative +to the actions of any userspace process and no assumptions about the process +in which the respective function is running should be made. Also, it should not +be assumed that the buffer provided by a request is from kernel space or user +space, any operation that accesses the userspace being erroneous. + +Below is presented one of the simplest function of type +:c:type:`request_fn_proc`: + +.. code-block:: c + + static void my_block_request(struct request_queue *q) + { + struct request *rq; + struct my_block_dev *dev = q->queuedata; + + while (1) { + rq = blk_fetch_request(q); + if (rq == NULL) + break; + + if (blk_rq_is_passthrough(rq)) { + printk (KERN_NOTICE "Skip non-fs request\n"); + __blk_end_request_all(rq, -EIO); + continue; + } + + /* do work */ + ... + + __blk_end_request_all(rq, 0); + } + } + +The :c:func:`my_block_request` function contains a :c:func:`while` loop for +iterating through the request queue sent as argument. The operations performed +within this loop are: + + * Read the first request from the queue using :c:func:`blk_fetch_request`. + As described in `Useful functions for processing request queues`_ section, + the :c:func:`blk_fetch_request` function retrieves the first item from the + request queue and starts the request. + * If the function returns NULL, it has reached the end of the request queue + (there is no remaining request to be processed) and exits + :c:func:`my_block_request`. + * A block device can receive calls which do not transfer data blocks (e.g. + low level operations on the disk, instructions referring to special ways of + accessing the device). Most drivers do not know how to handle these + requests and return an error. + * To return an error, :c:func:`__blk_end_request_all` function is called, + -EIO being the second argument. + * The request is processed according to the needs of the associated device. + * The request ends. In this case, :c:func:`__blk_end_request_all` function is + called in order to complete the request entirely. If all request sectors + have been processed, the :c:func:`__blk_end_request` function is used. + +.. bio_structure: + +:c:type:`struct bio` structure +============================== + +Each :c:type:`struct request` structure is an I/O block request, but may come +from combining more independent requests from a higher level. The sectors to be +transferred for a request can be scattered into the main memory but they always +correspond to a set of consecutive sectors on the device. The request is +represented as a series of segments, each corresponding to a buffer in memory. +The kernel can combine requests that refer to adjacent sectors but will not +combine write requests with read requests into a single +:c:type:`struct request` structure. + +A :c:type:`struct request` structure is implemented as a linked list of +:c:type:`struct bio` structures together with information that allows the +driver to retain its current position while processing the request. + +The :c:type:`struct bio` structure is a low-level description of a portion of +a block I/O request. + +.. code-block:: c + + struct bio { + //... + struct gendisk *bi_disk; + unsigned int bi_opf; /* bottom bits req flags, top bits REQ_OP. Use accessors. */ + //... + struct bio_vec *bi_io_vec; /* the actual vec list */ + //... + struct bvec_iter bi_iter; + /... + void *bi_private; + //... + }; + +In turn, the :c:type:`struct bio` structure contains a :c:member:`bi_io_vec` +vector of :c:type:`struct bio_vec` structures. It consists of the individual +pages in the physical memory to be transferred, the offset within the page and +the size of the buffer. To iterate through a :c:type:`struct bio` structure, +we need to iterate through the vector of :c:type:`struct bio_vec` and transfer +the data from every physical page. To simplify vector iteration, the +:c:type:`struct bvec_iter` structure is used. This structure maintains +information about how many buffers and sectors were consumed during the +iteration. The request type is encoded in the :c:member:`bi_opf` field; to +determine it, use the :c:func:`bio_data_dir` function. + +Create a :c:type:`struct bio` structure +--------------------------------------- + +Two functions can be used to create a :c:type:`struct bio` structure: + + * :c:func:`bio_alloc`: allocates space for a new structure; the structure + must be initialized; + * :c:func:`bio_clone`: makes a copy of an existing :c:type:`struct bio` + structure; the newly obtained structure is initialized with the values of + the cloned structure fields; the buffers are shared with the + :c:type:`struct bio` structure that has been cloned so that access to the + buffers has to be done carefully to avoid access to the same memory area + from the two clones; + +Both functions return a new :c:type:`struct bio` structure. + +Submit a :c:type:`struct bio` structure +--------------------------------------- + +Usually, a :c:type:`struct bio` structure is created by the higher levels of +the kernel (usually the file system). A structure thus created is then +transmitted to the I/O subsystem that gathers more :c:type:`struct bio` +structures into a request. + +For submitting a :c:type:`struct bio` structure to the associated I/O device +driver, the :c:func:`submit_bio` function is used. The function receives as +argument an initialized :c:type:`struct bio` structure that will be added to +a request from the request queue of an I/O device. From that queue, it can be +processed by the I/O device driver using a specialized function. + + +.. _bio_completion: + +Wait for the completion of a :c:type:`struct bio` structure +----------------------------------------------------------- + +Submitting a :c:type:`struct bio` structure to a driver has the effect of +adding it to a request from the request queue from where it will be further +processed. Thus, when the :c:func:`submit_bio` function returns, it is not +guaranteed that the processing of the structure has finished. If you want to +wait for the processing of the request to be finished, use the +:c:func:`submit_bio_wait` function. + +To be notified when the processing of a :c:type:`struct bio` structure ends +(when we do not use :c:func:`submit_bio_wait` function), the +:c:member:`bi_end_io` field of the structure should be used. This field +specifies the function that will be called at the end of the +:c:type:`struct bio` structure processing. You can use the +:c:member:`bi_private` field of the structure to pass information to the +function. + +Initialize a :c:type:`struct bio` structure +------------------------------------------- + +Once a :c:type:`struct bio` structure has been allocated and before being +transmitted, it must be initialized. + +Initializing the structure involves filling in its important fields. As +mentioned above, the :c:member:`bi_end_io` field is used to specify the function +called when the processing of the structure is finished. The +:c:member:`bi_private` field is used to store useful data that can be accessed +in the function pointed by :c:member:`bi_end_io`. + +The :c:member:`bi_opf` field specifies the type of operation. + +.. code-block:: c + + struct bio *bio = bio_alloc(GFP_NOIO, 1); + //... + bio->bi_disk = bdev->bd_disk; + bio->bi_iter.bi_sector = sector; + bio->bi_opf = REQ_OP_READ; + bio_add_page(bio, page, size, offset); + //... + +In the code snippet above we specified the block device to which we sent the +following: :c:type:`struct bio` structure, startup sector, operation +(:c:data:`REQ_OP_READ` or :c:data:`REQ_OP_WRITE`) and content. The content of a +:c:type:`struct bio` structure is a buffer described by: a physical page, +the offset in the page and the size of the bufer. A page can be assigned using +the :c:func:`alloc_page` call. + +.. note:: The :c:data:`size` field of the :c:func:`bio_add_page` call must be + a multiple of the device sector size. + +.. _bio_content: + +How to use the content of a :c:type:`struct bio` structure +---------------------------------------------------------- + +To use the content of a :c:type:`struct bio` structure, the structure's +support pages must be mapped to the kernel address space from where they can be +accessed. For mapping /unmapping, use the :c:macro:`kmap_atomic` and +the :c:macro:`kunmap_atomic` macros. + +A typical example of use is: + +.. code-block:: c + + static void my_block_transfer(struct my_block_dev *dev, size_t start, + size_t len, char *buffer, int dir); + + + static int my_xfer_bio(struct my_block_dev *dev, struct bio *bio) + { + struct bio_vec bvec; + struct bvec_iter i; + int dir = bio_data_dir(bio); + + /* Do each segment independently. */ + bio_for_each_segment(bvec, bio, i) { + sector_t sector = i.bi_sector; + char *buffer = kmap_atomic(bvec.bv_page); + unsigned long offset = bvec.bv_offset; + size_t len = bvec.bv_len; + + /* process mapped buffer */ + my_block_transfer(dev, sector, len, buffer + offset, dir); + + kunmap_atomic(buffer); + } + + return 0; + } + +As it can be seen from the example above, iterating through a +:c:type:`struct bio` requires iterating through all of its segments. A segment +(:c:type:`struct bio_vec`) is defined by the physical address page, the offset +in the page and its size. + +To simplify the processing of a :c:type:`struct bio`, use the +:c:macro:`bio_for_each_segment` macrodefinition. It will iterate through all +segments, and will also update global information stored in an iterator +(:c:type:`struct bvec_iter`) such as the current sector as well as other +internal information (segment vector index, number of bytes left to be +processed, etc.) . + +You can store information in the mapped buffer, or extract information. + +In case request queues are used and you needed to process the requests +at :c:type:`struct bio` level, use the :c:macro:`rq_for_each_segment` +macrodefinition instead of the :c:macro:`bio_for_each_segment` macrodefinition. +This macrodefinition iterates through each segment of each +:c:type:`struct bio` structure of a :c:type:`struct request` structure and +updates a :c:type:`struct req_iterator` structure. The +:c:type:`struct req_iterator` contains the current :c:type:`struct bio` +structure and the iterator that traverses its segments. + +A typical example of use is: + +.. code-block:: c + + struct bio_vec bvec; + struct req_iterator iter; + + rq_for_each_segment(bvec, req, iter) { + sector_t sector = iter.iter.bi_sector; + char *buffer = kmap_atomic(bvec.bv_page); + unsigned long offset = bvec.bv_offset; + size_t len = bvec.bv_len; + int dir = bio_data_dir(iter.bio); + + my_block_transfer(dev, sector, len, buffer + offset, dir); + + kunmap_atomic(buffer); + } + +Free a :c:type:`struct bio` structure +------------------------------------- + +Once a kernel subsystem uses a :c:type:`struct bio` structure, it will have to +release the reference to it. This is done by calling :c:func:`bio_put` function. + +Set up a request queue at :c:type:`struct bio` level +---------------------------------------------------- + +The function :c:func:`blk_init_queue` may specify a function to be used to +process requests sent to the driver. The function receives as argument the +request queue as queries and carries out processing at +:c:type:`struct request` level. + +If, for flexibility reasons, it is needed to specify a function that carries +out processing at :c:type:`struct bio` structure level, the function +:c:func:`blk_queue_make_request` in conjunction with the +:c:func:`blk_alloc_queue` function should be used. + +Below is a typical example of initializing a function that carries out +processing at :c:type:`struct bio` structure level: + +.. code-block:: c + + // the declaration of the function that carries out processing + // :c:type:`struct bio` structures + static void my_make_request(struct request_queue *q, struct bio *bio); + + + // ... + // queue creation + dev->queue = blk_alloc_queue (GFP_KERNEL); + if (dev->queue == NULL) { + printk(KERN_ERR "cannot allocate block device queue\n"); + return -ENOMEM; + } + // the registration of the function that carries out processing + // :c:type:`struct bio` structures + blk_queue_make_request(dev->queue, my_make_request); + dev->queue->queuedata = dev; + +Further reading +=============== + +* `Linux Device Drivers 3rd Edition, Chapter 16. Block Drivers `_ +* Linux Kernel Development, Second Edition – Chapter 13. The Block I/O Layer +* `A simple block driver `_ +* `The gendisk interface `_ +* `The bio structure `_ +* `Request queues `_ +* `Documentation/block/request.txt - Struct request documentation `_ +* `Documentation/block/biodoc.txt - Notes on the Generic Block Layer `_ +* `drivers/block/brd/c - RAM backed block disk driver `_ +* `I/O Schedulers `_ + + +Exercises +========= + +.. include:: exercises-summary.hrst +.. |LAB_NAME| replace:: block_device_drivers + +0. Intro +-------- + +Using |LXR|_ find the definitions of the following symbols in the Linux kernel: + + * :c:type:`struct bio` + * :c:type:`struct bio_vec` + * :c:macro:`bio_for_each_segment` + * :c:type:`struct gendisk` + * :c:type:`struct block_device_operations` + * :c:type:`struct request` + +1. Block device +--------------- + +Create a kernel module that allows you to register or deregister a block device. +Start from the files in the :file:`1-2-3-6-ram-disk/kernel` directory in the +lab skeleton. + +Follow the comments marked with **TODO 1** in the laboratory skeleton. Use the +existing macrodefinitions (:c:macro:`MY_BLOCK_MAJOR`, +:c:macro:`MY_BLKDEV_NAME`). Check the value returned by the register function, +and in case of error, return the error code. + +Compile the module, copy it to the virtual machine and insert it into the +kernel. Verify that your device was successfully created inside the +:file:`/proc/devices`. +You will see a device with major 240. + +Unload the kernel module and check that the device was unregistered. + +.. hint:: Review the `Register a block I/O device`_ section. + +Change the :c:macro:`MY_BLOCK_MAJOR` value to 7. Compile the module, copy it to +the virtual machine, and insert it into the kernel. Notice that the insertion +fails because there is already another driver/device registered in the kernel +with the major 7. + +Restore the 240 value for the :c:macro:`MY_BLOCK_MAJOR` macro. + +2. Disk registration +-------------------- + +Modify the previous module to add a disk associated with the driver. Analyze the +macrodefinitions, :c:type:`my_block_dev` structure and existing functions from +the :file:`ram-disk.c` file. + +Follow the comments marked with **TODO 2**. Use the +:c:func:`create_block_device` and the :c:func:`delete_block_device` functions. + +.. hint:: Review the `Register a disk`_ and `Process a request`_ sections. + +Fill in the :c:func:`my_block_request` function to process the request queue +without actually processing your request: display the "request received" message +and the following information: start sector, total size, data size from the +current :c:type:`struct bio` structure, direction. To validate a request type, +use the :c:func:`blk_rq_is_passthrough` (the function returns 0 in the case in +which we are interested, i.e. when the request is generated by the file system). + +.. hint:: To retrieve the needed info, review the `Requests for block devices`_ + section. + +Use the :c:func:`__blk_end_request_all` function to finish processing the +request. + +Insert the module into the kernel and inspect the messages printed +by the module. When a device is added, a request is sent to the device. Check +the presence of :file:`/dev/myblock` and if it doesn't exist, create the device +using the command: + +.. code-block:: shell + + mknod /dev/myblock b 240 0 + +To generate writing requests, use the command: + +.. code-block:: shell + + echo "abc"> /dev/myblock + +Notice that a write request is preceded by a read request. The request +is done to read the block from the disk and "update" its content with the +data provided by the user, without overwriting the rest. After reading and +updating, writing takes place. + +3. RAM disk +----------- + +Modify the previous module to create a RAM disk: requests to the device will +result in reads/writes in a memory area. + +The memory area :c:data:`dev->data` is already allocated in the source code of +the module using :c:func:`vmalloc` and deallocated using :c:func:`vfree`. + +.. note:: Review the `Process a request`_ section. + +Follow the comments marked with **TODO 3** to complete the +:c:func:`my_block_transfer` function to write/read the request information +in/from the memory area. The function will be called for each request within +the queue processing function: :c:func:`my_block_request`. To write/read +to/from the memory area, use :c:func:`memcpy`. To determine the write/read +information, use the fields of the :c:type:`struct request` structure. + +.. hint:: To find out the size of the request data, use the + :c:macro:`blk_rq_cur_bytes` macro. Do not use the + :c:macro:`blk_rq_bytes` macro. + +.. hint:: To find out the buffer associated to the request, use + :c:data:`bio_data` (:c:data:`rq->bio`). + +.. hint:: A description of useful macros is in the `Requests for block devices`_ + section. + +.. hint:: You can find useful information in the + [block device driver example](https://github.com/martinezjavier/ldd3/blob/master/sbull/sbull.c) + from [Linux Device Driver](http://lwn.net/Kernel/LDD3/). + +For testing, use the test file :file:`user/ram-disk-test.c`. +The test program is compiled automatically at ``make build``, copied to the +virtual machine at ``make copy`` and can be run on the QEMU virtual machine +using the command: + +.. code-block:: shell + + ./ram-disk-test + +There is no need to insert the module into the kernel, it will be inserted by +the ``ram-disk-test`` command. + +Some tests may fail because of lack of synchronization between the transmitted +data (flush). + +4. Read data from the disk +-------------------------- + +The purpose of this exercise is to read data from the +:c:macro:`PHYSICAL_DISK_NAME` disk (:file:`/dev/vdb`) directly from the kernel. + +.. attention:: Before solving the exercise, we need to make sure the disk is + added to the virtual machine. + + Check the variable ``QEMU_OPTS`` from :file:`qemu/Makefile`. + There should already be two extra disks added using ``-drive ...``. + + If there are not, generate a file that we will use as + the disk image using the command: + :command:`dd if=/dev/zero of=qemu/mydisk.img bs=1024 count=1` + and add the following option: + :command:`-drive file=qemu/mydisk.img,if=virtio,format=raw` + to :file:`qemu/Makefile` (in the :c:data:`QEMU_OPTS` variable, + after the root disk). + +Follow the comments marked with **TODO 4** in the directory :file:`4-5-relay/` +and implement :c:func:`open_disk` and :c:func:`close_disk`. +Use the :c:func:`blkdev_get_by_path` and :c:func:`blkdev_put` functions. The +device must be opened in read-write mode exclusively +(:c:macro:`FMODE_READ` | :c:macro:`FMODE_WRITE` | :c:macro:`FMODE_EXCL`), and +as holder you must use the current module (:c:macro:`THIS_MODULE`). + +Implement the :c:func:`send_test_bio` function. You will have to create a new +:c:type:`struct bio` structure and fill it, submit it and wait for it. Read the +first sector of the disk. To wait, call the :c:func:`submit_bio_wait` function. + +.. hint:: The first sector of the disk is the sector with the index 0. + This value must be used to initialize the field + :c:member:`bi_iter.bi_sector` of the :c:type:`struct bio`. + + For the read operation, use the :c:macro:`REQ_OP_READ` macro to + initialize the :c:member:`bi_opf` field of the :c:type:`struct bio`. + +After finishing the operation, display the first 3 bytes of data read by +:c:type:`struct bio` structure. Use the format ``"% 02x"`` for :c:func:`printk` +to display the data and the :c:macro:`kmap_atomic` and :c:macro:`kunmap_atomic` +macros respectively. + +.. hint:: As an argument for the :c:func:`kmap_atomic` function, just use the + page which is allocated above in the code, in the :c:data:`page` + variable. + +.. hint:: Review the sections :ref:`bio_content` and :ref:`bio_completion`. + +For testing, use the :file:`test-relay-disk` script, which is copied on the +virtual machine when running :command:`make copy`. If it is not copied, make +sure it is executable: + +.. code-block:: shell + + chmod +x test-relay-disk + +There is no need to load the module into the kernel, it will be loaded by +:command:`test-relay-disk`. + +Use the command below to run the script: + +.. code-block:: shell + + ./test-relay-disk + +The script writes "abc" at the beginning of the disk indicated by +:c:macro:`PHYSICAL_DISK_NAME`. After running, the module will display 61 62 63 +(the corresponding hexadecimal values of letters "a", "b" and "c"). + +5. Write data to the disk +------------------------- + +Follow the comments marked with **TODO 5** to write a message +(:c:macro:`BIO_WRITE_MESSAGE`) on the disk. + +The :c:func:`send_test_bio` function receives as argument the operation type +(read or write). Call in the :c:func:`relay_init` function the function for +reading and in the :c:func:`relay_exit` function the function for writing. We +recommend using the :c:macro:`REQ_OP_READ` and the :c:macro:`REQ_OP_WRITE` +macros. + +Inside the :c:func:`send_test_bio` function, if the operation is write, fill in +the buffer associated to the :c:type:`struct bio` structure with the message +:c:macro:`BIO_WRITE_MESSAGE`. Use the :c:macro:`kmap_atomic` and the +:c:macro:`kunmap_atomic` macros to work with the buffer associated to the +:c:type:`struct bio` structure. + +.. hint:: You need to update the type of the operation associated to the + :c:type:`struct bio` structure by setting the :c:member:`bi_opf` field + accordingly. + +For testing, run the :file:`test-relay-disk` script using the command: + +.. code-block:: shell + + ./test-relay-disk + +The script will display the ``"read from /dev/sdb: 64 65 66"`` message at the +standard output. + +6. Processing requests from the request queue at :c:type:`struct bio` level +--------------------------------------------------------------------------- + +In the implementation from Exercise 3, we have only processed a +:c:type:`struct bio_vec` of the current :c:type:`struct bio` from the request. +We want to process all :c:type:`struct bio_vec` structures from all +:c:type:`struct bio` structures. +For this, we will iterate through all :c:type:`struct bio` requests and through +all :c:type:`struct bio_vec` structures (also called segments) of each +:c:type:`struct bio`. + +Add, within the ramdisk implementation (:file:`1-2-3-6-ram-disk/` directory), +support for processing the requests from the request queue at +:c:type:`struct bio` level. Follow the comments marked with **TODO 6**. + +Set the :c:macro:`USE_BIO_TRANSFER` macro to 1. + +Implement the :c:func:`my_xfer_request` function. Use the +:c:macro:`rq_for_each_segment` macro to iterate through the :c:type:`bio_vec` +structures of each :c:type:`struct bio` from the request. + +.. hint:: Review the indications and the code snippets from the + :ref:`bio_content` section. + +.. hint:: Use the :c:type:`struct bio` segment iterator to get the current + sector (:c:member:`iter.iter.bi_sector`). + +.. hint:: Use the request iterator to get the reference to the current + :c:type:`struct bio` (:c:member:`iter.bio`). + +.. hint:: Use the :c:macro:`bio_data_dir` macro to find the reading or writing + direction for a :c:type:`struct bio`. + +Use the :c:macro:`kmap_atomic` or the :c:macro:`kunmap_atomic` macros to map +the pages of each :c:type:`struct bio` structure and access its associated +buffers. For the actual transfer, call the :c:func:`my_block_transfer` function +implemented in the previous exercise. + +For testing, use the :file:`ram-disk-test.c` test file: + +.. code-block:: shell + + ./ram-disk-test + +There is no need to insert the module into the kernel, it will be inserted by +the :command:`ram-disk-test` executable. + +Some tests may crash because of lack of synchronization between the transmitted +data (flush). diff --git a/Documentation/teaching/labs/deferred_work.rst b/Documentation/teaching/labs/deferred_work.rst new file mode 100644 index 00000000000000..7acc5a576706ff --- /dev/null +++ b/Documentation/teaching/labs/deferred_work.rst @@ -0,0 +1,946 @@ +============= +Deferred work +============= + +Lab objectives +============== + +* Understanding deferred work (i.e. code scheduled to be executed at a + later time) +* Implementation of common tasks that uses deferred work +* Understanding the peculiarities of synchronization for deferred work + +Keywords: softirq, tasklet, struct tasklet_struct, bottom-half +handlers, jiffies, HZ, timer, struct timer_list, spin_lock_bh, +spin_unlock_bh, workqueue, struct work_struct, kernel thread, events/x + +Background information +====================== + +Deferred work is a class of kernel facilities that allows one to +schedule code to be executed at a later timer. This scheduled code can +run either in the context process or in interruption context depending +on the type of deferred work. Deferred work is used to complement the +interrupt handler functionality since interrupts have important +requirements and limitations: + +* The execution time of the interrupt handler must be as small as + possible +* In interrupt context we can not use blocking calls + +Using deferred work we can perform the minimum required work in the +interrupt handler and schedule an asynchronous action from the +interrupt handler to run at a later time and execute the rest of the +operations. + +Deferred work that runs in interrupt context is also known as +bottom-half, since its purpose is to execute the rest of the actions +from an interrupt handler (top-half). + +Timers are another type of deferred work that are used to schedule the +execution of future actions after a certain amount of time has passed. + +Kernel threads are not themselves deferred work, but can be used to +complement the deferred work mechanisms. In general, kernel threads +are used as "workers" to process events whose execution contains +blocking calls. + +There are three typical operations that are used with all types of +deferred work: + +1. **Initialization**. Each type is described by a structure whose + fields will have to be initialized. The handler to be scheduled is + also set at this time. +2. **Scheduling**. Schedules the execution of the handler as soon as + possible (or after expiry of a timeout). +3. **Masking** or **Canceling**. Disables the execution of the + handler. This action can be either synchronous (which guarantees + that the handler will not run after the completion of canceling) or + asynchronous. + +.. attention:: When doing deferred work cleanup, like freeing the + structures associated with the deferred work or + removing the module and thus the handler code from the + kernel, always use the synchronous type of canceling + the deferred work. + +The main types of deferred work are kernel threads and softirqs. Work +queues are implemented on top of kernel threads and tasklets and +timers on top of softirqs. Bottom-half handlers was the first +implementation of deferred work in Linux, but in the meantime it was +replaced by softirqs. That is why some of the functions presented +contain *bh* in their name. + +Softirqs +======== + +softirqs can not be used by device drivers, they are reserved for +various kernel subsystems. Because of this there is a fixed number of +softirqs defined at compile time. For the current kernel version we +have the following types defined: + +.. code-block:: c + + enum { + HI_SOFTIRQ = 0, + TIMER_SOFTIRQ, + NET_TX_SOFTIRQ, + NET_RX_SOFTIRQ, + BLOCK_SOFTIRQ, + IRQ_POLL_SOFTIRQ, + TASKLET_SOFTIRQ, + SCHED_SOFTIRQ, + HRTIMER_SOFTIRQ, + RCU_SOFTIRQ, + NR_SOFTIRQS + }; + + +Each type has a specific purpose: + +* *HI_SOFTIRQ* and *TASKLET_SOFTIRQ* - running tasklets +* *TIMER_SOFTIRQ* - running timers +* *NET_TX_SOFIRQ* and *NET_RX_SOFTIRQ* - used by the networking subsystem +* *BLOCK_SOFTIRQ* - used by the IO subsystem +* *BLOCK_IOPOLL_SOFTIRQ* - used by the IO subsystem to increase performance when the iopoll handler is invoked; +* *SCHED_SOFTIRQ* - load balancing +* *HRTIMER_SOFTIRQ* - implementation of high precision timers +* *RCU_SOFTIRQ* - implementation of RCU type mechanisms [1]_ + +.. [1] RCU is a mechanism by which destructive operations + (e.g. deleting an element from a chained list) are done in two + steps: (1) removing references to deleted data and (2) freeing + the memory of the element. The second setup is done only after + we are sure nobody uses the element anymore. The advantage of + this mechanism is that reading the data can be done without + synchronization. For more information see + Documentation/RCU/rcu.txt. + + +The highest priority is the *HI_SOFTIRQ* type softirqs, followed in +order by the other softirqs defined. *RCU_SOFTIRQ* has the lowest +priority. + +Softirqs are running in interrupt context which means that they can +not call blocking functions. If the sofitrq handler requires calls to +such functions, work queues can be scheduled to execute these blocking +calls. + +Tasklets +-------- + +A tasklet is a special form of deferred work that runs in interrupt +context, just like softirqs. The main difference between sofirqs and tasklets +is that tasklets can be allocated dynamically and thus they can be used +by device drivers. A tasklet is represented by :c:type:`struct +tasklet` and as many other kernel structures it needs to be +initialized before being used. A pre-initialized tasklet can defined +as following: + +.. code-block:: c + + void handler(unsigned long data); + + DECLARE_TASKLET(tasklet, handler, data); + DECLARE_TASKLET_DISABLED(tasklet, handler, data); + + +If we want to initialize the tasklet manually we can use the following +approach: + +.. code-block:: c + + void handler(unsigned long data); + + struct tasklet_struct tasklet; + + tasklet_init(&tasklet, handler, data); + +The *data* parameter will be sent to the handler when it is executed. + +Programming tasklets for running is called scheduling. Tasklets are +running from softirqs. Tasklets scheduling is done with: + +.. code-block:: c + + void tasklet_schedule(struct tasklet_struct *tasklet); + + void tasklet_hi_schedule(struct tasklet_struct *tasklet); + +When using *tasklet_schedule*, a *TASKLET_SOFTIRQ* softirq is +scheduled and all tasklets scheduled are run. For +*tasklet_hi_schedule*, a *HI_SOFTIRQ* softirq is scheduled. + +If a tasklet was scheduled multiple times and it did not run between +schedules, it will run once. Once the tasklet has run, it can be +re-scheduled, and will run again at a later timer. Tasklets can be +re-scheduled from their handlers. + +Tasklets can be masked and the following functions can be used: + +.. code-block:: c + + void tasklet_enable(struct tasklet_struct * tasklet ); + void tasklet_disable(struct tasklet_struct * tasklet ); + +Remember that since tasklets are running from softirqs, blocking calls +can not be used in the handler function. + +Timers +------ + +A particular type of deferred work, very often used, are timers. They +are defined by :c:type:`struct timer_list`. They run in interrupt +context and are implemented on top of softirqs. + +To be used, a timer must first be initialized by calling :c:func:`timer_setup`: + +.. code-block:: c + + #include + + void timer_setup(struct timer_list * timer, + void (*function)(struct timer_list *), + unsigned int flags); + +The above function initializes the internal fields of the structure +and associates *function* as the timer handler. Since timers are planned +over softirqs, blocking calls can not be used in the code associated +with the treatment function. + +Scheduling a timer is done with :c:func:`mod_timer`: + +.. code-block:: c + + int mod_timer(struct timer_list *timer, unsigned long expires); + +Where *expires* is the time (in the future) to run the handler +function. The function can be used to schedule or reschedule a timer. + +The time unit timers is *jiffie*. The absolute value of a jiffie +is dependent on the platform and it can be found using the +:c:type:`HZ` macro that defines the number of jiffies for 1 second. To +convert between jiffies (*jiffies_value*) and seconds (*seconds_value*), +the following formulas are used: + +.. code-block:: c + + jiffies_value = seconds_value * HZ ; + seconds_value = jiffies_value / HZ ; + +The kernel mantains a counter that contains the number of jiffies +since the last boot, which can be accessed via the :c:macro:`jiffies` +global variable or macro. We can use it to calculate a time in the +future for timers: + +.. code-block:: c + + #include + + unsigned long current_jiffies, next_jiffies; + unsigned long seconds = 1; + + current_jiffies = jiffies; + next_jiffies = jiffies + seconds * HZ; + +To stop a timer, use :c:func:`del_timer` and :c:func:`del_timer_sync`: + +.. code-block:: c + + int del_timer(struct timer_list *timer); + int del_timer_sync(struct timer_list *timer); + +Thse functions can be called for both a scheduled timer and an +unplanned timer. :c:func:`del_timer_sync` is used to eliminate the +races that can occur on multiprocessor systems, since at the end of +the call it is guaranteed that the timer processing function does not +run on any processor. + +A frequent mistake in using timers is that we forget to turn off +timers. For example, before removing a module, we must stop the timers +because if a timer expires after the module is removed, the handler +function will no longer be loaded into the kernel and a kernel oops +will be generated. + +The usual sequence used to initialize and schedule a one second +timeout is: + +.. code-block:: c + + #include + + void timer_function(struct timer_list *); + + struct timer_list timer ; + unsigned long seconds = 1; + + timer_setup(&timer, timer_function, 0); + mod_timer(&timer, jiffies + seconds * HZ); + +And to stop it: + +.. code-block:: c + + del_timer_sync(&timer); + +Locking +------- + +For synchronization between code running in process context (A) and +code running in softirq context (B) we need to use special locking +primitives. We must use spinlock operations augmented with +deactivation of bottom-half handlers on the current processor in (A), +and in (B) only basic spinlock operations. Using spinlocks makes sure +that we don't have races between multiple CPUs while deactivating the +softirqs makes sure that we don't deadlock in the softirq is scheduled +on the same CPU where we already acquired a spinlock. + +We can use the :c:func:`local_bh_disable` and +:c:func:`local_bh_enable` to disable and enable softirqs handlers (and +since they run on top of softirqs also timers and tasklets): + +.. code-block:: c + + void local_bh_disable(void); + void local_bh_enable(void); + +Nested calls are allowed, the actual reactivation of the softirqs is +done only when all local_bh_disable() calls have been complemented by +local_bh_enable() calls: + +.. code-block:: c + + /* We assume that softirqs are enabled */ + local_bh_disable(); /* Softirqs are now disabled */ + local_bh_disable(); /* Softirqs remain disabled */ + + local_bh_enable(); /* Softirqs remain disabled */ + local_bh_enable(); /* Softirqs are now enabled */ + +.. attention:: These above calls will disable the softirqs only on the + local processor and they are usually not safe to use, they must be + complemented with spinlocks. + + +Most of the time device drivers will use special versions of spinlocks +calls for synchronization like :c:func:`spin_lock_bh` and +:c:func:`spin_unlock_bh`: + +.. code-block:: c + + void spin_lock_bh(spinlock_t *lock); + void spin_unlock_bh(spinlock_t *lock); + + +Workqueues +========== + +Workqueues are used to schedule actions to run in process context. The +base unit with which they work is called work. There are two types of +work: + +* :c:type:`struct work_struct` - it schedules a task to run at + a later time +* :c:type:`struct delayed_work` - it schedules a task to run after at + least a given time interval + +A delayed work uses a timer to run after the specified time +interval. The calls with this type of work are similar to those for +:c:type:`struct work_struct`, but has **_delayed** in the functions +names. + +Before using them a work item must be initialized. There are two types +of macros that can be used, one that declares and initializes the work +item at the same time and one that only initializes the work item (and +the declaration must be done separately): + +.. code-block:: c + + #include + + DECLARE_WORK(name , void (*function)(struct work_struct *)); + DECLARE_DELAYED_WORK(name, void(*function)(struct work_struct *)); + + INIT_WORK(struct work_struct *work, void(*function)(struct work_struct *)); + INIT_DELAYED_WORK(struct delayed_work *work, void(*function)(struct work_struct *)); + +:c:func:`DECLARE_WORK` and :c:func:`DECLARE_DELAYED_WORK` declare and +initialize a work item, and :c:func:`INIT_WORK` and +:c:func:`INIT_DELAYED_WORK` initialize an already declared work item. + +The following sequence declares and initiates a work item: + +.. code-block:: c + + #include + + void my_work_handler(struct work_struct *work); + + DECLARE_WORK(my_work, my_work_handler); + +Or, if we want to initialize the work item separately: + +.. code-block:: c + + void my_work_handler(struct work_struct * work); + + struct work_struct my_work; + + INIT_WORK(&my_work, my_work_handler); + +Once declared and initialized, we can schedule the task using +:c:func:`schedule_work` and :c:func:`schedule_delayed_work`: + +.. code-block:: c + + schedule_work(struct work_struct *work); + + schedule_delayed_work(struct delayed_work *work, unsigned long delay); + +:c:func:`schedule_delayed_work` can be used to plan a work item for +execution with a given delay. The delay time unit is jiffies. + +Work items can not be masked but they can be canceled by calling +:c:func:`cancel_delayed_work_sync` or :c:func:`cancel_work_sync`: + +.. code-block:: c + + int cancel_work_sync(struct delayed_work *work); + int cancel_delayed_work_sync(struct delayed_work *work); + +The call only stops the subsequent execution of the work item. If the +work item is already running at the time of the call, it will continue +to run. In any case, when these calls return, it is guaranteed that +the task will no longer run. + +.. attention:: While there are versions of these functions that are + not synchronous (.e.g. :c:func:`cancel_work`) do not + use them when you are performing cleanup work otherwise + race condition could occur. + +We can wait for a workqueue to complete running all of its work items by calling :c:func:`flush_scheduled_work`: + +.. code-block:: c + + void flush_scheduled_work(void); + +This function is blocking and, therefore, can not be used in interrupt +context. The function will wait for all work items to be completed. +For delayed work items, :c:type:`cancel_delayed_work` must be called +before :c:func:`flush_scheduled_work`. + +Finally, the following functions can be used to schedule work items on +a particular processor (:c:func:`schedule_delayed_work_on`), or on all +processors (:c:func:`schedule_on_each_cpu`): + +.. code-block:: c + + int schedule_delayed_work_on(int cpu, struct delayed_work *work, unsigned long delay); + int schedule_on_each_cpu(void(*function)(struct work_struct *)); + +A usual sequence to initialize and schedule a work item is the following: + +.. code-block:: c + + void my_work_handler(struct work_struct *work); + + struct work_struct my_work; + + INIT_WORK(&my_work, my_work_handler); + + schedule_work(&my_work); + +And for waiting for termination of a work item: + +.. code-block:: c + + flush_scheduled_work(); + +As you can see, the *my_work_handler* function receives the task as +the parameter. To be able to access the module's private data, you can +use :c:func:`container_of`: + +.. code-block:: c + + struct my_device_data { + struct work_struct my_work; + // ... + }; + + void my_work_handler(struct work_struct *work) + { + struct my_device_data * my_data; + + my_data = container_of(work, struct my_device_data, my_work); + // ... + } + +Scheduling work items with the functions above will run the handler in +the context of a kernel thread called *events/x*, where x is the +processor number. The kernel will initialize a kernel thread (or a +pool of workers) for each processor present in the system: + +.. code-block:: shell + + $ ps -e + PID TTY TIME CMD + 1? 00:00:00 init + 2 ? 00:00:00 ksoftirqd / 0 + 3 ? 00:00:00 events / 0 <--- kernel thread that runs work items + 4 ? 00:00:00 khelper + 5 ? 00:00:00 kthread + 7? 00:00:00 kblockd / 0 + 8? 00:00:00 kacpid + +The above functions use a predefined workqueue (called events), and +they run in the context of the *events/x* thread, as noted +above. Although this is sufficient in most cases, it is a shared +resource and large delays in work items handlers can cause delays for +other queue users. For this reason there are functions for creating +additional queues. + +A workqueue is represented by :c:type:`struct workqueue_struct`. A new +workqueue can be created with these functions: + +.. code-block:: c + + struct workqueue_struct *create_workqueue(const char *name); + struct workqueue_struct *create_singlethread_workqueue(const char *name); + +:c:func:`create_workqueue` uses one thread for each processor in the +system, and :c:func:`create_singlethread_workqueue` uses a single +thread. + +To add a task in the new queue, use :c:func:`queue_work` or +:c:func:`queue_delayed_work`: + +.. code-block:: c + + int queue_work(struct workqueue_struct * queue, struct work_struct *work); + + int queue_delayed_work(struct workqueue_struct *queue, + struct delayed_work * work , unsigned long delay); + +:c:func:`queue_delayed_work` can be used to plan a work for execution +with a given delay. The time unit for the delay is jiffies. + +To wait for all work item to finish call :c:func:`flush_workqueue`: + +.. code-block:: c + + void flush_workqueue(struct worksqueue_struct * queue); + +And to destroy the workqueue call :c:func:`destroy_workqueue` + +.. code-block:: c + + void destroy_workqueue(struct workqueue_struct *queue); + +The next sequence declares and initializes an additional workqueue, +declares and initializes a work item and adds it to the queue: + +.. code-block:: c + + void my_work_handler(struct work_struct *work); + + struct work_struct my_work; + struct workqueue_struct * my_workqueue; + + my_workqueue = create_singlethread_workqueue("my_workqueue"); + INIT_WORK(&my_work, my_work_handler); + + queue_work(my_workqueue, &my_work); + +And the next code sample shows how to remove the workqueue: + +.. code-block:: c + + flush_workqueue(my_workqueue); + destroy_workqueue(my_workqueue); + +The work items planned with these functions will run in the context of +a new kernel thread called *my_workqueue*, the name passed to +:c:func:`create_singlethread_workqueue`. + +Kernel threads +============== + +Kernel threads have emerged from the need to run kernel code in +process context. Kernel threads are the basis of the workqueue +mechanism. Essentially, a kernel thread is a thread that only runs in +kernel mode and has no user address space or other user attributes. + +To create a kernel thread, use :c:func:`kthread_create`: + +.. code-block:: c + + #include + + struct task_struct *kthread_create(int (*threadfn)(void *data), + void *data, const char namefmt[], ...); + +* *threadfn* is a function that will be run by the kernel thread +* *data* is a parameter to be sent to the function +* *namefmt* represents the kernel thread name, as it is displayed in + ps/top ; Can contain sequences %d , %s etc. Which will be replaced + according to the standard printf syntax. + +For example, the following call: + +.. code-block:: c + + kthread_create (f, NULL, "%skthread%d", "my", 0); + +Will create a kernel thread with the name mykthread0. + +The kernel thread created with this function will be stopped (in the +*TASK_INTERRUPTIBLE* state). To start the kernel thread, call the +:c:func:`wake_up_process`: + +.. code-block:: c + + #include + + int wake_up_process(struct task_struct *p); + +Alternatively, you can use :c:func:`kthread_run` to create and run a +kernel thread: + +.. code-block:: c + + struct task_struct * kthread_run(int (*threadfn)(void *data) + void *data, const char namefmt[], ...); + +Even if the programming restrictions for the function running within +the kernel thread are more relaxed and scheduling is closer to +scheduling in userspace, there are, however, some limitations to be +taken into account. We will list below the actions that can or can not +be made from a kernel thread: + +* can't access the user address space (even with copy_from_user, + copy_to_user) because a kernel thread does not have a user address + space +* can't implement busy wait code that runs for a long time; if the + kernel is compiled without the preemptive option, that code will run + without being preempted by other kernel threads or user processes + thus hogging the system +* can call blocking operations +* can use spinlocks, but if the hold time of the lock is significant, + it is recommended to use mutexes + +The termination of a kernel thread is done voluntarily, within the +function running in the kernel thread, by calling :c:func:`do_exit`: + +.. code-block:: c + + fastcall NORET_TYPE void do_exit(long code); + +Most of the implementations of kernel threads handlers use the same +model and it is recommended to start using the same model to avoid +common mistakes: + +.. code-block:: c + + #include + + DECLARE_WAIT_QUEUE_HEAD(wq); + + // list events to be processed by kernel thread + struct list_head events_list; + struct spin_lock events_lock; + + + // structure describing the event to be processed + struct event { + struct list_head lh; + bool stop; + //... + }; + + struct event* get_next_event(void) + { + struct event *e; + + spin_lock(&events_lock); + e = list_first_entry(&events_list, struct event*, lh); + if (e) + list_del(&events->lh); + spin_unlock(&events_lock); + + return e + } + + int my_thread_f(void *data) + { + struct event *e; + + while (true) { + wait_event(wq, (e = get_next_event)); + + /* Event processing */ + + if (e->stop) + break; + } + + do_exit(0); + } + + /* start and start kthread */ + kthread_run(my_thread_f, NULL, "%skthread%d", "my", 0); + + +With the template above, the kernel thread requests can be issued +with: + +.. code-block:: c + + void send_event(struct event *ev) + { + spin_lock(&events_lock); + list_add(&ev->lh, &events_list); + spin_unlock(&events_lock); + wake_up(&wq); + } + +Further reading +=============== + +* `Linux Device Drivers, 3rd ed., Ch. 7: Time, Delays, and Deferred Work `_ +* `Scheduling Tasks `_ +* `Driver porting: the workqueue interface `_ +* `Workqueues get a rework `_ +* `Kernel threads made easy `_ +* `Unreliable Guide to Locking `_ + +Exercises +========= + +.. include:: exercises-summary.hrst +.. |LAB_NAME| replace:: deferred_work + +0. Intro +-------- + +Using |LXR|_, find the definitions of the following symbols: + +* :c:macro:`jiffies` +* :c:type:`struct timer_list` +* :c:func:`spin_lock_bh function` + + +1.Timer +------- + +We're looking at creating a simple kernel module that displays a +message at *TIMER_TIMEOUT* seconds after the module's kernel load. + +Generate the skeleton for the task named **1-2-timer** and follow the +sections marked with **TODO 1** to complete the task. + +.. hint:: Use `pr_info(...)`. Messages will be displayed on the + console and can also be viewed using dmesg. When scheduling + the timer we need to use the absolute time of the system (in + the future) in number of ticks. The current time of the + system in the number of ticks is given by :c:type:`jiffies`. + Thus the absolute time we need to pass to the timer is + ``jiffies + TIMER_TIMEOUT * HZ``. + + For more information review the `Timers`_ section. + + +2. Periodic timer +----------------- + +Modify the previous module to display the message in once every +TIMER_TIMEOUT seconds. Follow the section marked with **TODO 2** in the +skeleton. + +3. Timer control using ioctl +---------------------------- + +We plan to display information about the current process after N +seconds of receiving a ioctl call from user space. N is transmitted as +ioctl paramereter. + +Generate the skeleton for the task named **3-4-5-deferred** and +follow the sections marked with **TODO 1** in the skeleton driver. + +You will need to implement the following ioctl operations. + +* MY_IOCTL_TIMER_SET to schedule a timer to run after a number of + seconds which is received as an argument to ioctl. The timer does + not run periodically. + * This command receives directly a value, not a pointer. + +* MY_IOCTL_TIMER_CANCEL to deactivate the timer. + +.. note:: Review :ref:`ioctl` for a way to access the ioctl argument. + +.. note:: Review the `Timers`_ section for information on enabling / + disabling a timer. In the timer handler, display the current + process identifier (PID) and the process executable image name. + +.. hint:: You can find the current process identifier using the *pid* + and *comm* fields of the current process. For details, + review :ref:`proc-info`. + +.. hint:: To use the device driver from userspace you must create the + device character file */dev/deferred* using the mknod + utility. Alternatively, you can run the + *3-4-5-deferred/kernel/ makenode* script that performs this + operation. + +Enable and disable the timer by calling user-space ioctl +operations. Use the *3-4-5-deferred/user/test* program to test +planning and canceling of the timer. The program receives the ioctl +type operation and its parameters (if any) on the command line. + +.. hint:: Run the test executable without arguments to observe the + command line options it accepts. + + To enable the timer after 3 seconds use: + + .. code-block:: c + + ./test s 3 + + To disable the timer use: + + .. code-block:: c + + ./test c + + +Note that every time the current process the timer runs from is +*swapper/0* with PID 0. This process is the idle process. It is +running when there is nothing else to run on. Because the virtual +machine is very light and does not do much it is natural to see this +process most of the time. + +4. Blocking operations +---------------------- + +Next we want to see what happens when we perform blocking operations +in a timer routine. For this we try to call in the timer-handling +routines a function called alloc_io() that simulates a blocking +operation. + +Modify the module so that when you receive *MY_IOCTL_TIMER_ALLOC* +command the timer handler will call :c:func:`alloc_io`. Follow the +sections marked with **TODO 2** in the skeleton. + +Use the same timer. To differentiate functionality in the timer +handler, use a flag in the device structure. Use the +*TIMER_TYPE_ALLOC* and *TIMER_TYPE_SET* macros defined in the code +skeleton. For initialization, use TIMER_TYPE_NONE. + +Run the test program to verify the functionality of task 3. Run the +test program again to call :c:func:`alloc_io()`. + +.. note:: The driver causes an error because a blocking function is + called in the atomic context (the timer handler runs + interrupt context). + +5. Workqueues +------------- + +We will modify the module to prevent the error observed in the +previous task. + +To do so, lets call :c:func:`alloc_io` using workqueues. Schedule a +work item from the timer handler In the work handler (running in +process context) call the :c:func:`alloc_io`. Follow the sections +marked with **TODO 3** in the skeleton and review the `Workqueues`_ +section if needed. + +.. hint:: Add a new field with the type :c:type:`struct work_struct` + in your device structure. Initialize this field. Schedule + the work from the timer handler using :c:func:`schedule_work`. + Schedule the timer handler aften N seconds from the ioctl. + +6. Kernel thread +---------------- + +Implement a simple module that creates a kernel thread that shows the +current process identifier. + +Generate the skeleton for the task named **6-kthread** and follow the +TODOs from the skeleton. + + +.. note:: There are two options for creating and running a thread: + + * :c:func:`kthread_run` to create and run the thread + + * :c:func:`kthread_create` to create a suspended thread and + then start it running with :c:func:`wake_up_process`. + + Review the `Kernel Threads`_ section if needed. + +.. attention:: Synchronize the thread termination with module unloading: + + * The thread should finish when the module in unloaded + + * Wait for the kernel thread to exit before continuing + with unloading + + +.. hint:: For synchronization use two wait queues and two flags. + + Review :ref:`waiting-queues` on how to use waiting queue. + + Use atomic variables for flags. Review :ref:`atomic-variables`. + + +7. Buffer shared between timer and process +------------------------------------------ + +The purpose of this task is to exercise the synchronization between a +deferrable action (a timer) and process context. Setup a periodic +timer that monitors a list of processes. If one of the processes +terminate a message is printed. Processes can be dinamically added to +the list. Use the *3-4-5-deferred/kernel/* skeleton as a base and +follow the **TODO 4** markings to complete the task. + +When the *MY_IOCTL_TIMER_MON* command is received check that the given +process exists and if so added to the monitored list of +processed and then arm the timer after setting its type. + +.. hint:: Use :c:func:`get_proc` which checks the pid, finds the + associated :c:type:`struct task_struct` and allocates a + :c:type:`struct mon_proc` item you can add to your + list. Note that the function also increases the reference + counter of the task, so that its memory won't be free when + the task terminates. + +.. attention:: Use a spinlock to protect the access to the list. Note + that since we share data with the timer handler we need + to disable bottom-half handlers in addition to taking + the lock. Review the `Locking`_ section. + +.. hint:: Collect the information every second from a timer. Use the + existing timer and add new behaviour for it via the + TIMER_TYPE_ACCT. To set the flag, use the *t* argument of + the test program. + + +In the timer handler iterate over the list of monitored processes and +check if they have terminated. If so, print the process name and pid +then remove the process from the list, decrement the task usage +counter so that it's memory can be free and finally free the +:c:type:`struct mon_proc` structure. + +.. hint:: Use the *state* field of :c:func:`struct task_struct`. A + task has terminated if its state is *TASK_DEAD*. + +.. hint:: Use :c:func:`put_task_struct` to decrement the task usage + counter. + +.. attention:: Make sure you protect the list access with a + spinlock. The simple variant will suffice. + +.. attention:: Make sure to use the safe iteration over the list since + we may need to remove an item from the list. + +Rearm the timer after checking the list. diff --git a/Documentation/teaching/labs/device_drivers.rst b/Documentation/teaching/labs/device_drivers.rst new file mode 100644 index 00000000000000..e072944159134f --- /dev/null +++ b/Documentation/teaching/labs/device_drivers.rst @@ -0,0 +1,1038 @@ +======================== +Character device drivers +======================== + +Laboratory objectives +===================== + + * understand the concepts behind character device driver + * understand the various operations that can be performed on character devices + * working with waiting queues + +Overview +======== + +In UNIX, hardware devices are accessed by the user through special device +files. These files are grouped into the /dev directory, and system calls +``open``, ``read``, ``write``, ``close``, ``lseek``, ``mmap`` etc. are +redirected by the operating system to the device driver associated with the +physical device. The device driver is a kernel component (usually a module) +that interacts with a hardware device. + +In the UNIX world there are two categories of device files and thus +device drivers: character and block. This division is done by the speed, +volume and way of organizing the data to be transferred from the device to the +system and vice versa. In the first category, there are slow devices, which +manage a small amount of data, and access to data does not require frequent +seek queries. Examples are devices such as keyboard, mouse, serial ports, +sound card, joystick. In general, operations with these devices (read, write) +are performed sequentially byte by byte. The second category includes devices +where data volume is large, data is organized on blocks, and search is common. +Examples of devices that fall into this category are hard drives, cdroms, ram +disks, magnetic tape drives. For these devices, reading and writing is done at +the data block level. + +For the two types of device drivers, the Linux kernel offers different APIs. +If for character devices system calls go directly to device drivers, in case of +block devices, the drivers do not work directly with system calls. In +the case of block devices, communication between the user-space and the block +device driver is mediated by the file management subsystem and the block device +subsystem. The role of these subsystems is to prepare the device driver's +necessary resources (buffers), to keep the recently read data in the cache +buffer, and to order the read and write operations for performance reasons. + +Majors and minors +================= + +In UNIX, the devices traditionally had a unique, fixed identifier associated +with them. This tradition is preserved in Linux, although identifiers can be +dynamically allocated (for compatibility reasons, most drivers still use static +identifiers). The identifier consists of two parts: major and minor. The first +part identifies the device type (IDE disk, SCSI disk, serial port, etc.) +and the second one identifies the device (first disk, second serial port, +etc.). Most times, the major identifies the driver, while the minor identifies +each physical device served by the driver. In general, a driver will have a +major associate and will be responsible for all minors associated with that +major. + +.. code-block:: bash + + $ ls -la /dev/hda? /dev/ttyS? + brw-rw---- 1 root disk 3, 1 2004-09-18 14:51 /dev/hda1 + brw-rw---- 1 root disk 3, 2 2004-09-18 14:51 /dev/hda2 + crw-rw---- 1 root dialout 4, 64 2004-09-18 14:52 /dev/ttyS0 + crw-rw---- 1 root dialout 4, 65 2004-09-18 14:52 /dev/ttyS1 + +As can be seen from the example above, device-type information can be found +using the ls command. The special character files are identified by the ``c`` +character in the first column of the command output, and the block type by the +character ``b``. In columns ``5`` and ``6`` of the result you can see the +major, respectively the minor for each device. + +Certain major identifiers are statically assigned to devices (in the +``Documentation/admin-guide/devices.txt`` file from the kernel sources). When choosing the +identifier for a new device, you can use two methods: static (choose a number +that does not seem to be used already) or dynamically. In /proc/devices are the +loaded devices, along with the major identifier. + +To create a device type file, use the ``mknod`` command; the command receives the +type (``block`` or ``character``), ``major`` and ``minor`` of the device +(``mknod name type major minor``). Thus, if you want to create a character device +named ``mycdev`` with the major ``42`` and minor ``0``, use the command: + +.. code-block:: bash + + # mknod /dev/mycdev c 42 0 + +To create the block device with the name ``mybdev`` with the major 240 and minor 0 +the command will be: + +.. code-block:: bash + + # mknod /dev/mybdev b 240 0 + +Next, we'll refer to character devices as drivers. + +Data structures for a character device +====================================== + +In the kernel, a character-type device is represented by +:c:type:`struct cdev `, a structure used to register it in the +system. Most driver operations use three important structures: +``struct file_operations``, ``struct file`` and ``struct inode``. + +:c:type:`struct file_operations` +-------------------------------- + +As mentioned above, the character device drivers receive unaltered system calls +made by users over device-type files. Consequently, implementation of a character +device driver means implementing the system calls specific to files: ``open``, +``close``, ``read``, ``write``, ``lseek``, ``mmap``, etc. These operations are +described in the fields of the ``struct file_operations`` structure: + +.. code-block:: c + + #include + + struct file_operations { + struct module *owner; + loff_t (*llseek) (struct file *, loff_t, int); + ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); + ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); + [...] + long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); + [...] + int (*open) (struct inode *, struct file *); + int (*flush) (struct file *, fl_owner_t id); + int (*release) (struct inode *, struct file *); + [...] + +.. ** + +It can be noticed that the signature of the function differs from the system +call that the user uses. The operating system sits between the user and +the device driver to simplify implementation in the device driver. + +``open`` does not receive the parameter path or the various parameters that control +the file opening mode. Similarly, ``read``, ``write``, ``release``, ``ioctl``, ``lseek`` +do not receive as a parameter a file descriptor. Instead, these routines receive as +parameters two structures: ``file`` and ``inode``. Both structures represent a file, +but from different perspectives. + +Most parameters for the presented operations have a direct meaning: + * ``file`` and ``inode`` identifies the device type file; + * ``size`` is the number of bytes to be read or written; + * ``offset`` is the displacement to be read or written (to be updated + accordingly); + * ``user_buffer`` user buffer from which it reads / writes; + * ``whence`` is the way to seek (the position where the search operation starts); + * ``cmd`` and ``arg`` are the parameters sent by the users to the ioctl call (IO + control). + +``inode`` and ``file`` structures +--------------------------------- + +An ``inode`` represents a file from the point of view of the file system. Attributes +of an inode are the size, rights, times associated with the file. An inode uniquely +identifies a file in a file system. + +The ``file`` structure is still a file, but closer to the user's point of view. +From the attributes of the file structure we list: the inode, the file name, +the file opening attributes, the file position. All open files at a given time +have associated a ``file`` structure. + +To understand the differences between inode and file, we will use an analogy +from object-oriented programming: if we consider a class inode, then the files +are objects, that is, instances of the inode class. Inode represents the static +image of the file (the inode has no state), while the file represents the +dynamic image of the file (the file has state). + +Returning to device drivers, the two entities have almost always standard ways +of using: the inode is used to determine the major and minor of the device on +which the operation is performed, and the file is used to determine the flags +with which the file was opened, but also to save and access (later) private +data. + +The file structure contains, among many fields: + + * ``f_mode``, which specifies read ``FMODE_READ`` (``FMODE_READ``) or write + (``FMODE_WRITE``); + * ``f_flags``, which specifies the file opening flags (``O_RDONLY``, + ``O_NONBLOCK``, ``O_SYNC``, ``O_APPEND``, ``O_TRUNC``, etc.); + * ``f_op``, which specifies the operations associated with the file (pointer to + the ``file_operations`` structure ); + * ``private_data``, a pointer that can be used by the programmer to store + device-specific data; The pointer will be initialized to a memory location + assigned by the programmer. + * ``f_pos``, the offset within the file + +The inode structure contains, among many information, an ``i_cdev`` +field, which is a pointer to the structure that defines the character +device (when the inode corresponds to a character device). + +Implementation of operations +============================ + +To implement a device driver, it is recommended that you create a structure +that contains information about the device, information used in the module. In +the case of a driver for a character device, the structure will contain a cdev +structure field to refer to the device. The following example uses the struct +my_device_data: + +.. code-block:: c + + #include + #include + + struct my_device_data { + struct cdev cdev; + /* my data starts here */ + //... + }; + + static int my_open(struct inode *inode, struct file *file) + { + struct my_device_data *my_data; + + my_data = container_of(inode->i_cdev, struct my_device_data, cdev); + + file->private_data = my_data; + //... + } + + static int my_read(struct file *file, char __user *user_buffer, size_t size, loff_t *offset) + { + struct my_device_data *my_data; + + my_data = (struct my_device_data *) file->private_data; + + //... + } + +.. ** + +A structure like ``my_device_data`` will contain the data associated with a device. +The ``cdev`` field (``cdev`` type) is a character-type device and is used to record it +in the system and identify the device. The pointer to the ``cdev`` member can be +found using the ``i_cdev`` field of the ``inode`` structure (using the ``container_of`` +macro). In the private_data field of the file structure, information can be +stored at open which is then available in the ``read``, ``write``, ``release``, etc. +routines. + +Registration and unregistration of character devices +==================================================== + +The registration/unregistration of a device is made by specifying the major and +minor. The ``dev_t`` type is used to keep the identifiers of a device (both major +and minor) and can be obtained using the ``MKDEV`` macro. + +For the static assignment and unallocation of device identifiers, the +``register_chrdev_region`` and ``unregister_chrdev_region`` functions are used: + +.. code-block:: c + + #include + + int register_chrdev_region(dev_t first, unsigned int count, char *name); + void unregister_chrdev_region(dev_t first, unsigned int count); + +.. ** + +It is recommended that device identifiers be dynamically assigned to the +``alloc_chrdev_region`` function. + +The ``my_minor_count`` sequence reserves my_minor_count devices, starting with +``my_major`` major and my_first_minor minor (if the max value for minor is +exceeded, move to the next major): + +.. code-block:: c + + #include + ... + + err = register_chrdev_region(MKDEV(my_major, my_first_minor), my_minor_count, + "my_device_driver"); + if (err != 0) { + /* report error */ + return err; + } + ... + +.. ** + +After assigning the identifiers, the character device will have to be +initialized (``cdev_init``) and the kernel will have to be notified(``cdev_add``). The +``cdev_add`` function must be called only after the device is ready to receive +calls. Removing a device is done using the ``cdev_del`` function. + +.. code-block:: c + + #include + + void cdev_init(struct cdev *cdev, struct file_operations *fops); + int cdev_add(struct cdev *dev, dev_t num, unsigned int count); + void cdev_del(struct cdev *dev); + +.. ** + +The following sequence registers and initializes MY_MAX_MINORS devices: + +.. code-block:: c + + #include + #include + + #define MY_MAJOR 42 + #define MY_MAX_MINORS 5 + + struct my_device_data { + struct cdev cdev; + /* my data starts here */ + //... + }; + + struct my_device_data devs[MY_MAX_MINORS]; + + const struct file_operations my_fops = { + .owner = THIS_MODULE, + .open = my_open, + .read = my_read, + .write = my_write, + .release = my_release, + .unlocked_ioctl = my_ioctl + }; + + int init_module(void) + { + int i, err; + + err = register_chrdev_region(MKDEV(MY_MAJOR, 0), MY_MAX_MINORS, + "my_device_driver"); + if (err != 0) { + /* report error */ + return err; + } + + for(i = 0; i < MY_MAX_MINORS; i++) { + /* initialize devs[i] fields */ + cdev_init(&devs[i].cdev, &my_fops); + cdev_add(&devs[i].cdev, MKDEV(MY_MAJOR, i), 1); + } + + return 0; + } + +.. ** + +While the following sequence deletes and unregisters them: + +.. code-block:: c + + void cleanup_module(void) + { + int i; + + for(i = 0; i < MY_MAX_MINORS; i++) { + /* release devs[i] fields */ + cdev_del(&devs[i].cdev); + } + unregister_chrdev_region(MKDEV(MY_MAJOR, 0), MY_MAX_MINORS); + } + +.. ** + +.. note:: initialization of the struct my_fops used the initialization + of members by name, defined in C99 standard (see designated + initializers and the file_operations structure). Structure + members who do not explicitly appear in this initialization + will be set to the default value for their type. For + example, after the initialization above, ``my_fops.mmap`` will + be NULL. + +.. _access_to_process_address_space: + +Access to the address space of the process +========================================== + +A driver for a device is the interface between an application and hardware. As +a result, we often have to access a given user-space driver device. Accessing +process address space can not be done directly (by de-referencing a user-space +pointer). Direct access of a user-space pointer can lead to incorrect behavior +(depending on architecture, a user-space pointer may not be valid or mapped to +kernel-space), a kernel oops (the user-mode pointer can refer to a non-resident +memory area) or security issues. Proper access to user-space data is done by +calling the macros / functions below: + +.. code-block:: c + + #include + + put_user(type val, type *address); + get_user(type val, type *address); + unsigned long copy_to_user(void __user *to, const void *from, unsigned long n); + unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) + +.. ** + +All macros / functions return 0 in case of success and another value in case of +error and have the following roles: + + * ``put_user`` put in the user-space at the address address value of the val; + Type can be one on 8, 16, 32, 64 bit (the maximum supported type depends on the + hardware platform); + * ``get_user`` analogue to the previous function, only that val will be set to a + value identical to the value at the user-space address given by address; + * ``copy_to_user`` copies ``n`` bytes from the kernel-space, from the address + referenced by ``from`` in user-space to the address referenced by ``to``; + * ``copy_from_user`` copies ``n`` bytes from user-space from the address + referenced by ``from`` in kernel-space to the address referenced by ``to``. + +A common section of code that works with these functions is: + +.. code-block:: c + + #include + + /* + * Copy at most size bytes to user space. + * Return ''0'' on success and some other value on error. + */ + if (copy_to_user(user_buffer, kernel_buffer, size)) + return -EFAULT; + else + return 0; + +Open and release +================ + +The ``open`` function performs the initialization of a device. In most cases, +these operations refer to initializing the device and filling in specific data +(if it is the first open call). The release function is about releasing +device-specific resources: unlocking specific data and closing the device if +the last call is close. + +In most cases, the open function will have the following structure: + +.. code-block:: c + + static int my_open(struct inode *inode, struct file *file) + { + struct my_device_data *my_data = + container_of(inode->i_cdev, struct my_device_data, cdev); + + /* validate access to device */ + file->private_data = my_data; + + /* initialize device */ + ... + + return 0; + } + +.. ** + +A problem that occurs when implementing the ``open`` function is access control. +Sometimes a device needs to be opened once at a time; More specifically, do not +allow the second open before the release. To implement this restriction, you +choose a way to handle an open call for an already open device: it can return +an error (``-EBUSY``), block open calls until a release operation, or shut down +the device before do the open. + +At the user-space call of the open and close functions on the device, call +my_open and my_release in the driver. An example of a user-space call: + +.. code-block:: c + + int fd = open("/dev/my_device", O_RDONLY); + if (fd < 0) { + /* handle error */ + } + + /* do work */ + //.. + + close(fd); + +.. ** + +Read and write +============== + +The read and write operations are reaching the device driver as a +result of a userspace program calling the read or write system calls: + +.. code-block:: c + + if (read(fd, buffer, size) < 0) { + /* handle error */ + } + + if (write(fd, buffer, size) < 0) { + /* handle error */ + } + +.. ** + +The ``read`` and ``write`` functions transfer data between the device and the +user-space: the read function reads the data from the device and transfers it +to the user-space, while writing reads the user-space data and writes it to the +device. The buffer received as a parameter is a user-space pointer, which is +why it is necessary to use the ``copy_to_user`` or ``copy_from_user`` functions. + +The value returned by read or write can be: + + * the number of bytes transferred; if the returned value is less than the size + parameter (the number of bytes requested), then it means that a partial + transfer was made. Most of the time, the user-space app calls the system call + (read or write) function until the required data number is transferred. + * 0 to mark the end of the file in the case of read ; if write returns the + value 0 then it means that no byte has been written and that no error has + occurred; In this case, the user-space application retries the write call. + * a negative value indicating an error code. + +To perform a data transfer consisting of several partial transfers, the +following operations should be performed: + + * transfer the maximum number of possible bytes between the buffer received + as a parameter and the device (writing to the device/reading from the device + will be done from the offset received as a parameter); + * update the offset received as a parameter to the position from which the + next read / write data will begin; + * return the number of bytes transferred. + +The sequence below shows an example for the read function that takes +into account the internal buffer size, user buffer size and the offset: + +.. code-block:: c + + static int my_read(struct file *file, char __user *user_buffer, + size_t size, loff_t *offset) + { + struct my_device_data *my_data = (struct my_device_data *) file->private_data; + ssize_t len = min(my_data->size - *offset, size); + + if (len <= 0) + return 0; + + /* read data from my_data->buffer to user buffer */ + if (copy_to_user(user_buffer, my_data->buffer + *offset, len)) + return -EFAULT; + + *offset += len; + return len; + } + +.. ** + +The images below illustrate the read operation and how data is +transferred between the userspace and the driver: + + 1. when the driver has enough data available (starting with the OFFSET + position) to accurately transfer the required size (SIZE) to the user. + 2. when a smaller amount is transferred than required. + +.. image:: read.png + :width: 49 % +.. image:: read2.png + :width: 49 % + +We can look at the read operation implemented by the driver as a response to a +userpace read request. In this case, the driver is responsible for advancing +the offset according to how much it reads and returning the read size (which +may be less than what is required). + +The structure of the write function is similar: + +.. code-block:: c + + static int my_write(struct file *file, const char __user *user_buffer, + size_t size, loff_t * offset) + { + struct my_device_data *my_data = (struct my_device_data *) file->private_data; + ssize_t len = min(my_data->size - *offset, size); + + if (len <= 0) + return 0; + + /* read data from user buffer to my_data->buffer */ + if (copy_from_user(my_data->buffer + *offset, user_buffer, len)) + return -EFAULT; + + *offset += len; + return len; + } + +.. ** + +The write operation will respond to a write request from userspace. In +this case, depending on the maximum driver capacity (MAXSIZ), it can +write more or less than the required size. + +.. image:: write.png + :width: 49 % +.. image:: write2.png + :width: 49 % + +.. _ioctl: + +ioctl +===== + +In addition to read and write operations, a driver needs the ability to perform +certain physical device control tasks. These operations are accomplished by +implementing a ``ioctl`` function. Initially, the ioctl system call used Big Kernel +Lock. That's why the call was gradually replaced with its unlocked version +called ``unlocked_ioctl``. You can read more on LWN: +http://lwn.net/Articles/119652/ + +.. code-block:: c + + static long my_ioctl (struct file *file, unsigned int cmd, unsigned long arg); + +.. ** + +``cmd`` is the command sent from user-space. If a value is being sent to the +user-space call, it can be accessed directly. If a buffer is fetched, the arg +value will be a pointer to it, and must be accessed through the ``copy_to_user`` +or ``copy_from_user``. + +Before implementing the ``ioctl`` function, the numbers corresponding to the +commands must be chosen. One method is to choose consecutive numbers starting +at 0, but it is recommended to use ``_IOC(dir, type, nr, size)`` macrodefinition +to generate ioctl codes. The macrodefinition parameters are as follows: + + * ``dir`` represents the data transfer (``_IOC_NONE`` , ``_IOC_READ``, + ``_IOC_WRITE``). + * ``type`` represents the magic number (``Documentation/ioctl/ioctl-number.txt``); + * ``nr`` is the ioctl code for the device; + * ``size`` is the size of the transferred data. + +The following example shows an implementation for a ``ioctl`` function: + +.. code-block:: c + + #include + + #define MY_IOCTL_IN _IOC(_IOC_WRITE, 'k', 1, sizeof(my_ioctl_data)) + + static long my_ioctl (struct file *file, unsigned int cmd, unsigned long arg) + { + struct my_device_data *my_data = + (struct my_device_data*) file->private_data; + my_ioctl_data mid; + + switch(cmd) { + case MY_IOCTL_IN: + if( copy_from_user(&mid, (my_ioctl_data *) arg, + sizeof(my_ioctl_data)) ) + return -EFAULT; + + /* process data and execute command */ + + break; + default: + return -ENOTTY; + } + + return 0; + } + +.. ** + +At the user-space call for the ioctl function, the my_ioctl function of the +driver will be called. An example of such a user-space call: + +.. code-block:: c + + if (ioctl(fd, MY_IOCTL_IN, buffer) < 0) { + /* handle error */ + } + +.. ** + +Waiting queues +============== + +It is often necessary for a thread to wait for an operation to finish, +but it is desirable that this wait is not busy-waiting. Using waiting +queues we can block a thread until an event occurs. When the condition +is satisfied, elsewhere in the kernel, in another process, in an +interrupt or deferrable work, we will wake-up the process. + +A waiting queue is a list of processes that are waiting for a specific +event. A queue is defined with the ``wait_queue_head_t`` type and can +be used by the functions/macros: + +.. code-block:: c + + #include + + DECLARE_WAIT_QUEUE_HEAD(wq_name); + + void init_waitqueue_head(wait_queue_head_t *q); + + int wait_event(wait_queue_head_t q, int condition); + + int wait_event_interruptible(wait_queue_head_t q, int condition); + + int wait_event_timeout(wait_queue_head_t q, int condition, int timeout); + + int wait_event_interruptible_timeout(wait_queue_head_t q, int condition, int timeout); + + void wake_up(wait_queue_head_t *q); + + void wake_up_interruptible(wait_queue_head_t *q); + +.. ** + +The roles of the macros / functions above are: + + * :c:func:`init_waitqueue_head` initializes the queue; to initialize the + queue at compile time, you can use the :c:macro:`DECLARE_WAIT_QUEUE_HEAD` macro; + * :c:func:`wait_event` and :c:func:`wait_event_interruptible` adds the current thread to the + queue while the condition is false, sets it to TASK_UNINTERRUPTIBLE or + TASK_INTERRUPTIBLE and calls the scheduler to schedule a new thread; Waiting + will be interrupted when another thread will call the wake_up function; + * :c:func:`wait_event_timeout` and :c:func:`wait_event_interruptible_timeout` have the same + effect as the above functions, only waiting can be interrupted at the end of + the timeout received as a parameter; + * :c:func:`wake_up` puts all threads off from state TASK_INTERRUPTIBLE and + TASK_UNINTERRUPTIBLE in TASK_RUNNING status; Remove these threads from the + queue; + * :c:func:`wake_up_interruptible` same action, but only threads with TASK_INTERRUPTIBLE + status are woken up. + +A simple example is that of a thread waiting to change the value of a flag. The +initializations are done by the sequence: + +.. code-block:: c + + #include + + wait_queue_head_t wq; + int flag = 0; + + init_waitqueue_head(&wq); + +.. ** + +A thread will wait for the flag to be changed to a value other than zero: + +.. code-block:: c + + wait_event_interruptible(wq, flag != 0); + +.. ** + +While another thread will change the flag value and wake up the waiting threads: + +.. code-block:: c + + flag = 1 ; + wake_up_interruptible (&wq); + +.. ** + +Exercises +========= + +.. include:: exercises-summary.hrst +.. |LAB_NAME| replace:: device_drivers + +0. Intro +-------- + +Using `LXR `_ find the definitions +of the following symbols in the Linux kernel: + + * :c:type:`struct file` + * :c:type:`struct file_operations` + * :c:type:`generic_ro_fops` + * :c:func:`vfs_read` + + +1. Register/unregister +---------------------- + +The driver will control a single device with the ``MY_MAJOR`` major and +``MY_MINOR`` minor (the macros defined in the kernel/so2_cdev.c file). + + 1. Create **/dev/so2_cdev** character device node using **mknod**. + + .. hint:: Read `Majors and minors`_ section in the lab. + + 2. Implement the registration and deregistration of the device with the name + ``so2_cdev``, respectively in the init and exit module functions. Implement **TODO 1**. + + .. hint:: Read the section `Registration and unregistration of character devices`_ + + 3. Display, using ``pr_info``, a message after the registration and unregistration + operations to confirm that they were successful. Then load the module into the kernel: + + .. code-block:: bash + + $ insmod so2_cdev.ko + + And see character devices in ``/proc/devices``: + + .. code-block:: bash + + $ cat /proc/devices | less + + Identify the device type registered with major 42 . Note that ``/proc/devices`` + contains only the device types (major) but not the actual devices (i.e. minors). + + .. note:: Entries in /dev are not created by loading the module. These can be created + in two ways: + + * manually, using the ``mknod`` command as we will do in the following exercises. + * automatically using udev daemon + + 4. Unload the kernel module + + .. code-block:: bash + + rmmod so2_cdev + +2. Register an already registered major +--------------------------------------- + +Modify **MY_MAJOR** so that it points to an already used major number. + +.. hint:: See ``/proc/devices`` to get an already assigned major. + +See `errno-base.h `_ +and figure out what does the error code mean. +Return to the initial configuration of the module. + +3. Open and close +----------------- + +Run ``cat /dev/so2_cdev`` to read data from our char device. +Reading does not work because the driver does not have the open function implemented. +Follow comments marked with TODO 2 and implement them. + + 1. Initialize your device + + * add a cdev struct field to ``so2_device_data`` structure. + * Read the section `Registration and unregistration of character devices`_ in the lab. + + 2. Implement the open and release functions in the driver. + 3. Display a message in the open and release functions. + 4. Read again ``/dev/so2_cdev`` file. Follow the messages displayed by the kernel. + We still get an error because ``read`` function is not yet implemented. + +.. note:: The prototype of a device driver's operations is in the ``file_operations`` + structure. Read `Open and release`_ section. + +4. Access restriction +--------------------- + +Restrict access to the device with atomic variables, so that a single process +can open the device at a time. The rest will receive the "device busy" error +(``-EBUSY``). Restricting access will be done in the open function displayed by +the driver. Follow comments marked with **TODO 3** and implement them. + + 1. Add an ``atomic_t`` variable to the device structure. + 2. Initialize the variable at module initialization. + 3. Use the variable in the open function to restrict access to the device. We + recommend using :c:func:`atomic_cmpxchg`. + 4. Reset the variable in the release function to retrieve access to the device. + 5. To test your deployment, you'll need to simulate a long-term use of your + device. To simulate a sleep, call the scheduler at the end of the device opening: + +.. code-block:: bash + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(1000); + +.. ** + + 6. Test using ``cat /dev/so2_cdev`` & ``cat /dev/so2_cdev``. + + +.. note:: The advantage of the atomic_cmpxchg function is that it can check the + old value of the variable and set it up to a new value, all in one + atomic operation. Read more details about `atomic_cmpxchg `_ + An example of use is `here `_. + +5. Read operation +----------------- + +Implement the read function in the driver. Follow comments marked with ``TODO 4`` and implement them. + + 1. Keep a buffer in ``so2_device_data`` structure initialized with the value of ``MESSAGE`` macro. + Initializing this buffer will be done in module ``init`` function. + 2. At a read call, copy the contents of the kernel space buffer into the user + space buffer. + + * Use the :c:func:`copy_to_user` function to copy information from kernel space to + user space. + * Ignore the size and offset parameters at this time. You can assume that + the buffer in user space is large enough. You do not need to check the + validity of the size argument of the read function. + * The value returned by the read call is the number of bytes transmitted + from the kernel space buffer to the user space buffer. + + 3. After implementation, test using ``cat /dev/so2_cdev``. + +.. note:: The command ``cat /dev/so2_cdev`` does not end (use Ctrl+C). + Read the `read and write`_ sections and `Access to the address space of the process`_ + If you want to display the offset value use a construction of the form: + ``pr_info("Offset: %lld \n", *offset)``; The data type loff_t (used by offset ) is a typedef for long long int. + +The ``cat`` command reads to the end of the file, and the end of the file is +signaled by returning the value 0 in the read. Thus, for a correct implementation, +you will need to update and use the offset received as a parameter in the read +function and return the value 0 when the user has reached the end of the buffer. + +Modify the driver so that the ``cat`` commands ends: + + 1. Use the size parameter. + 2. For every read, update the offset parameter accordingly. + 3. Ensure that the read function returns the number of bytes that were copied + into the user buffer. + +.. note:: By dereferencing the offset parameter it is possible to read and move the current + position in the file. Its value needs to be updated every time a read is done + successfully. + +6. Write operation +------------------ + +Add the ability to write a message into kernel buffer to replace the predefined message. Implement +the write function in the driver. Follow comments marked with ``TODO 5`` + +Ignore the offset parameter at this time. You can assume that the driver buffer is +large enough. You do not need to check the validity of the write function size +argument. + +.. note:: The prototype of a device driver's operations is in the file_operations + structure. + Test using commands: + + .. code-block:: bash + + echo "arpeggio"> /dev/so2_cdev + cat /dev/so2_cdev + + Read the `read and write`_ sections and `Access to the address space of the process`_ + +7. ioctl operation +------------------ + +For this exercise, we want to add the ioctl ``MY_IOCTL_PRINT`` to display the +message from the ``IOCTL_MESSAGE`` macro in the driver. +Follow the comments marked with ``TODO 6`` + +For this: + + 1. Implement the ioctl function in the driver. + 2. We need to use ``user/so2_cdev_test.c`` to call the + ioctl function with the appropriate parameters. + 3. To test, we will use an user-space program (``user/so2_cdev_test.c``) + which will call the ``ioctl`` function with the required arguments. + +.. note:: The macro ``MY_IOCTL_PRINT`` is defined in the file ``include/so2_cdev.h``, + which is shared between the kernel module and the user-space program. + + Read the `ioctl`_ section in the lab. + +.. note:: The userspace code is compiled automatically at ``make build`` and + copied at ``make copy``. + + Because we need to compile the program for qemu machine which is 32 bit, + if your host is 64 bit then you need to install ``gcc-multilib`` package. + +Extra Exercises +=============== + +Ioctl with messaging +-------------------- + +Add two ioctl operations to modify the message associated with the +driver. Use fixed-length buffer ( BUFFER_SIZE ). + + 1. Add the ``ioctl`` function from the driver the following operations: + + * ``MY_IOCTL_SET_BUFFER`` for writing a message to the device; + * ``MY_IOCTL_GET_BUFFER`` to read a message from your device. + + 2. For testing, pass the required command line arguments to the + user-space program. + +.. note:: Read the `ioctl`_ and `Access to the address space of the process`_ + sections of the lab. + +Ioctl with waiting queues +------------------------- + +Add two ioctl operations to the device driver for queuing. + + 1. Add the ``ioctl`` function from the driver the following operations: + + * ``MY_IOCTL_DOWN`` to add the process to a queue; + * ``MY_IOCTL_UP`` to remove the process from a queue. + + 2. Fill the device structure with a ``wait_queue_head_t`` field and a + ``wait_queue_head_t`` flag. + 3. Do not forget to initialize the wait queue and flag. + 4. Remove exclusive access condition from previous exercise + 5. For testing, pass the required command line arguments to the + user-space program. + +When the process is added to the queue, it will remain blocked in execution; To +run the queue command open a new console in the virtual machine with Alt+F2 ; +You can return to the previous console with Alt+F1 . If you're connected via +SSH to the virtual machine, open a new console. + +.. note:: Read the `ioctl`_ and `Waiting queues`_ sections in the lab. + +O_NONBLOCK implementation +------------------------- + +.. note:: If a file is open with the ``O_NONBLOCK`` flag, then its + operations will be non-blocking. + + In case data is not available when performing a read, the following + happens: + + * if the file has been open with ``O_NONBLOCK``, the read call + will return ``-EWOULDBLOCK``. + * otherwise, the current task (process) will be placed in a waiting + queue and will be unblocked as soon as data becomes available + (in our case, at write). + +* To allow unblocking the read operation, remove the exclusive access + condition from previous exercises. +* You can use the queue defined for the previous exercise. +* You can ignore the file offset. +* Modify the initial size of data to ``0``, to allow testing. +* For testing, pass the required command line arguments to the + user-space program. + + * when using the ``n`` option, the test program will change the open flags + to ``O_NONBLOCK`` and then perform a ``read``. + +* What are the flags used to open the file when running ``cat /dev/so2_dev``? + diff --git a/Documentation/teaching/labs/device_model.rst b/Documentation/teaching/labs/device_model.rst new file mode 100644 index 00000000000000..58f971d766de13 --- /dev/null +++ b/Documentation/teaching/labs/device_model.rst @@ -0,0 +1,1286 @@ +================== +Linux Device Model +================== + +Overview +======== + +Plug and Play is a technology that offers support for automatically adding and +removing devices to the system. This reduces conflicts with the resources they +use by automatically configuring them at system startup. In order to achieve +these goals, the following features are required: + + * Automatic detection of adding and removing devices in the system (the device + and its bus must notify the appropriate driver that a configuration change + occurred). + * Resource management (addresses, irq lines, DMA channels, memory areas), + including resource allocation to devices and solving conflicts that may arise. + * Devices must allow for software configuration (device resources - ports, + interrupts, DMA resources - must allow for driver assignment). + * The drivers required for new devices must be loaded automatically by the + operating system when needed. + * When the device and its bus allow, the system should be able to add or + remove the device from the system while it is running, without having to reboot + the system (hotplug). + +For a system to support plug and play, the BIOS, operating system and the device +must support this technology. The device must have an ID that will provide to the +driver for identification, and the operating system must be able to identify +these configuration changes as they appear. + +Plug and play devices are: PCI devices (network cards), USB (keyboard, mouse, +printer), etc. + +Prior to version 2.6, the kernel did not have a unified model to get +information about devices. +For this reason, a model for Linux devices, Linux Device Model, was developed. + +The primary purpose of this model is to maintain internal data structures that +reflect the state and structure of the system. Such information includes what +devices are in the system, how they are in terms of power management, what bus +they are attached to, what drivers they have, along with the structure of the +buses, devices, drivers in the system. + +To maintain this information, the kernel uses the following entities: + + * device - a physical device that is attached to a bus + * driver - a software entity that can be associated with a device and performs + operations with it + * bus - a device to which other devices can be attached + * class - a type of device that has a similar behavior; There is a class for + disks, partitions, serial ports, etc. + * subsystem - a view on the structure of the system; Kernel subsystems + include devices (hierarchical view of all devices in the system), buses (bus + view of devices according to how they are attached to buses), classes, etc. + +sysfs +===== + +The kernel provides a representation of its model in userspace through the +sysfs virtual file system. It is usually mounted in the /sys directory and +contains the following subdirectories: + + * block - all block devices available in the system (disks, partitions) + * bus - types of bus to which physical devices are connected (pci, ide, usb) + * class - drivers classes that are available in the system (net, sound, usb) + * devices - the hierarchical structure of devices connected to the system + * firmware - information from system firmware (ACPI) + * fs - information about mounted file systems + * kernel - kernel status information (logged-in users, hotplug) + * modules - the list of modules currently loaded + * power - information related to the power management subsystem + +As you can see, there is a correlation between the kernel data structures +within the described model and the subdirectories in the sysfs virtual file +system. Although this likeness may lead to confusion between the two concepts, +they are different. The kernel device model can work without the sysfs file +system, but the reciprocal is not true. + +The sysfs information is found in files that contain an attribute. Some +standard attributes (represented by files or directories with the same name) +are as follows: + + * dev - Major and minor device identifier. It can be used to automatically + create entries in the /dev directory + * device - a symbolic link to the directory containing devices; It can be + used to discover the hardware devices that provide a particular service (for + example, the ethi PCI card) + * driver - a symbolic link to the driver directory (located in + /sys/bus/\*/drivers ) + +Other attributes are available, depending on the bus and driver used. + +.. ditaa:: + +------+ + | /sys | + +--+---+ + | + +----------------------------------------------------+-------------------------------------+-----------------------------------------+ + | | | | + v v v v + +-----+ +-------+ +---------+ +--------+ + | bus | | class | | devices | | module | + +--+--+ +---+---+ +----+----+ +---+----+ + | | | | + | | | +-------------+-----------------+ + | | | | | + v v v v v + +------------------------+ +-----------------------+ +-------------------------+ +----------------------+ +-------------------------+ + | mybus: struct bus_type | | myclass: struct class | | mybus0: struct device | | mybus: struct module | | mydriver: struct module | + +-------------+----------+ +----------+------------+ +-----------+-------------+ +----------------------+ +-------------------------+ + | | | + +--------+--------------+ v v + | | +-------------------------------+ +----------------------+ + v v | myclass0: struct class_device | | mydev: struct device | + +---------+ +---------+ +-------------------------------+ +----------------------+ + | devices | | drivers | + +---------+ +---+-----+ + | + v + +--------------------------------+ + | mydriver: struct device_driver | + +--------------------------------+ + + +Basic Structures in Linux Devices +================================= + +Linux Device Model provides a number of structures to ensure the interaction +between a hardware device and a device driver. The whole model is based on +kobject structure. Hierarchies are built using this structure and the following +structures are implemented: + + * struct bus_type + * struct device + * struct device_driver + + +.. ditaa:: + :--no-separation: + + +--+ +--+ +--+ + mydriver.c | | mybus.c | | bus/driver/device core | | kobject core + | | | | | | + | | | | | | + | | | | | | + | | +-----------------------------+ | | +-----------------------------+ | | + | | | my_bus_type +------=>+ struct bus_type | | | + | | +-----------------------------+ | | +-----------------------------+ | | + | | |name | | | |name | | | + | | |uevent() = my_uevent() | | | |uevent() | | | + | | |match() = my_match() | | | |match() | | | + | | +-----------------------------+ | | +-----------------------------+ | | + | | | | | | | | + | | | | +-----------------------------+ | | + | | | | | | + +----------------+ | | +-----------------------------+ | | +-----------------------------+ | | +-------------------+ + | mydriver +------=>+ struct my_driver +------->+ struct device_driver +-------+---->| struct kobject | + +----------------+ | | +-----------------------------+ | | +-----------------------------+ | | | +-------------------+ + | | | | | | | | | name | | | | | k_name | + +----------------+ | | +-----------------------------+ | | +-----------------------------+ | | | +-------------------+ + | | | my_register_driver() | | | | driver_register() | | | | | kobject_add() | + | | | my_unregister_driver() | | | | driver_unregister() | | | | | kobject_delete() | + | | +-----------------------------+ | | +-----------------------------+ | | | +-------------------+ + | | | | | | | + | | | | | | | + +----------------+ | | +-----------------------------+ | | +-----------------------------+ | | | + | mydevice +------=>+ struct my_device +------->+ struct device +-------+ + +----------------+ | | +-----------------------------+ | | +-----------------------------+ | | + | | | | | | | | | bus_id | | | + +----------------+ | | +-----------------------------+ | | +-----------------------------+ | | + | | | my_register_device() | | | | device_register() | | | + | | | my_unregister_device() | | | | device_unregister() | | | + | | +-----------------------------+ | | +-----------------------------+ | | + | | | | | | + +--+ +--+ +--+ + + +The kobject structure +--------------------- + +A kobject structure does not perform a single function. This structure is +usually integrated into a larger one. A kobject structure actually +incorporates a set of features that will be offered to a higher abstraction +object in the Linux Device Model hierarchy. + +For example, the cdev structure has the following definition: + +.. code-block:: c + + struct cdev { + struct kobject kob; + struct module *owner; + const struct file_operations *ops; + struct list_head list; + dev_t dev; + unsigned int count; + }; + + +Note that this structure includes a ``kobject`` structure field. + +A kobject structure is defined as follows: + +.. code-block:: c + + struct kobject { + const char *name; + struct list_head entry; + struct kobject *parent; + struct kset *kset; + struct kobj_type *ktype; + struct sysfs_dirent *sd; + struct kref kref; + unsigned int state_initialized:1; + unsigned int state_in_sysfs:1; + unsigned int state_add_uevent_sent:1; + unsigned int state_remove_uevent_sent:1; + unsigned int uevent_suppress:1; + }; + +As we can see, the kobject structures are in a hierarchy: an object has a +parent and holds a kset member, which contains objects on the same level. + +Working with the structure involves initializing it with the +:c:func:`kobject_init` function. +Also in the initialization process it is necessary to set the name of the +``kobject`` structure, which will appear in sysfs, using the +:c:func:`kobject_set_name` function. + +Any operation on a kobject is done by incrementing its internal counter using +:c:func:`kobject_get`, or decrementing if it is no longer used using +:c:func:`kobject_put`. +Thus, a kobject object will only be released when its internal counter reaches 0. +A method of notifying this is needed so that the resources associated with the +device structure which included the kobject structure are released +(for example, cdev). +The method is called ``release`` and is associated with the object via the ktype +field (:c:type:`struct kobj_type`). + +The kobject structure is the basic structure of the Linux Device Model. +The structures in the higher levels of the model are :c:type:`struct bus_type`, +:c:type:`struct device` and :c:type:`struct device_driver`. + +Buses +----- + +A bus is a communication channel between the processor and an input/output +device. To ensure that the model is generic, all input/output devices are +connected to the processor via such a bus (even if it can be a virtual one +without a physical hardware correspondent). + +When adding a system bus, it will appear in the sysfs file system in +``/sys/bus``. +As with kobjects, buses can be organized into hierarchies and will be represented +in sysfs. + +In the Linux Device Model, a bus is represented by the structure +:c:type:`struct bus_type`: + +.. code-block:: c + + struct bus_type { + const char *name; + const char *dev_name; + struct device *dev_root; + struct bus_attribute *bus_attrs; + struct device_attribute *dev_attrs; + struct driver_attribute *drv_attrs; + struct subsys_private *p; + + int (*match)(struct device *dev, struct device_driver *drv); + int (*uevent)(struct device *dev, struct kobj_uevent_env *env); + int (*probe)(struct device *dev); + int (*remove)(struct device *dev); + //... + }; + +It can be noticed that a bus has a name, lists of default attributes, a number +of specific functions, and the driver's private data. +The ``uevent`` function (formerly ``hotplug``) is used with hotplug devices. + +Bus operations are the registration, the implementation of the operations +described in the :c:type:`struct bus_type` structure and the iteration and +inspection of the devices connected to the bus. + +A bus is registered using :c:func:`bus_register`, and unregistered using +:c:func:`bus_unregister`. + +Implementation example: + +.. code-block:: c + + #include + /* mybus.c */ + + //bus type + struct bus_type my_bus_type = { + .name = "mybus", + .match = my_match, + .uevent = my_uevent, + }; + + static int __init my_bus_init(void) + { + int err; + + //... + err = bus_register(&my_bus_type); + if (err) + return err; + //... + } + + static void __exit my_bus_exit(void) + { + //... + bus_unregister(&my_bus_type); + //... + } + + +The functions that will normally be initialized within a bus_type structure are +``match`` and ``uevent``: + +.. code-block:: c + + #include + #include + /* mybus.c */ + + // match devices to drivers; just do a simple name test + static int my_match(struct device *dev, struct device_driver *driver) + { + return strncmp(dev_name(dev), driver->name, strlen(driver->name)); + } + + // respond to hotplug user events; add environment variable DEV_NAME + static int my_uevent(struct device *dev, struct kobj_uevent_env *env) + { + add_uevent_var(env, "DEV_NAME=%s", dev_name(dev)); + return 0; + } + +The ``match`` function is used when a new device or a new driver is added to the +bus. Its role is to make a comparison between the device ID and the driver ID. +The ``uevent`` function is called before generating a hotplug in user-space and +has the role of adding environment variables. + +Other possible operations on a bus are iterating over the drivers or devices +attached to it. +Although we can not directly access them (lists of drives and devices +being stored in the private data of the driver, the ``subsys_private *p`` field), +these can be iterated using the :c:macro:`bus_for_each_dev` and +:c:macro:`bus_for_each_drv` macros. + +The Linux Device Model interface allows you to create attributes for the +associated objects. These attributes will have a corresponding file in the +bus subdirectory in sysfs. The attributes associated with a bus are +described by the bus_attribute structure : + +.. code-block:: c + + struct bus_attribute { + struct attribute attr; + ssize_t (*show)(struct bus_type *, char *buf); + ssize_t (*store)(struct bus_type *, const char *buf, size_t count); + }; + +Typically, an attribute is defined by the :c:macro:`BUS_ATTR` macro. +The :c:func:`bus_create_file` and :c:func:`bus_remove_file` functions can be +used to add/delete an attribute within the bus structure. + +An example of defining an attribute for ``my_bus`` is shown below: + +.. code-block:: c + + /* mybus.c */ + + #define MY_BUS_DESCR "SO2 rules forever" + + // export a simple bus attribute + static ssize_t my_show_bus_descr(struct bus_type *bus, char *buf) + { + return snprintf(buf, PAGE_SIZE, "%s\n", MY_BUS_DESCR); + } + + /* + * define attribute - attribute name is descr; + * full name is bus_attr_descr; + * sysfs entry should be /sys/bus/mybus/descr + */ + BUS_ATTR(descr, 0444, my_show_bus_descr, NULL); + + // specify attribute - in module init function + static int __init my_bus_init(void) + { + int err; + //... + err = bus_create_file(&my_bus_type, &bus_attr_descr); + if (err) { + /* handle error */ + } + //... + } + + static void __exit my_bus_exit(void) + { + //... + bus_remove_file(&my_bus_type, &bus_attr_descr); + //... + } + +The bus is represented by both a ``bus_type`` object and a ``device`` object, +as we will see later (the bus is also a device). + + +Devices +------- + +Any device in the system has a :c:type:`struct device` structure associated +with it. +Devices are discovered by different kernel methods (hotplug, device drivers, +system initialization) and are registered in the system. Each device present in +the kernel has an entry in ``/sys/devices``. + +At the lowest level, a device in Linux Device Model is represented by a +:c:type:`struct device` structure: + +.. code-block:: c + + struct device { + //... + struct device *parent; + struct device_private *p; + struct kobject kobj; + + const char *init_name; /* initial name of the device */ + //... + struct bus_type *bus; /* type of bus device is on */ + struct device_driver *driver; /* which driver has allocated this + device */ + //... + void (*release)(struct device *dev); + }; + +Structure fields include the parent device that is usually a controller, the +associated ``kobject``, the bus it is connected to, the device driver, and a +function called when the device counter reaches 0 (``release``). + +As usual, we have the registration/unregistration functions +:c:func:`device_register` and :c:func:`device_unregister`. + +To work with attributes, we have structure :c:type:`struct device_attribute`, +the macro :c:macro:`DEVICE_ATTR` for definition, and the functions +:c:func:`device_create_file` and :c:func:`device_remove_file` for adding/removing +the attribute to/from the device. + +One important thing to note is that the :c:type:`struct device` structure is +usually not used directly, but it is added to another structure. For example: + +.. code-block:: c + + // my device type + struct my_device { + char *name; + struct my_driver *driver; + struct device dev; + }; + +Typically, a bus driver will export functions to add or remove such a +device, as shown below: + +.. code-block:: c + + /* mybus.c */ + + /* BUS DEVICE (parent) */ + + // parent device release + static void my_bus_device_release(struct device *dev) + { + } + + // parent device + static struct device my_bus_device = { + .init_name = "mybus0", + .release = my_bus_device_release + }; + + /* DEVICE */ + + /* + * as we are not using the reference count, we use a no-op + * release function + */ + static void my_dev_release(struct device *dev) + { + } + + int my_register_device(struct my_device *mydev) + { + mydev->dev.bus = &my_bus_type; + mydev->dev.parent = &my_bus_device; + mydev->dev.release = my_dev_release; + dev_set_name(&mydev->dev, mydev->name); + + return device_register(&mydev->dev); + } + + void my_unregister_device(struct my_device *mydev) + { + device_unregister(&mydev->dev); + } + + /* export register/unregister device functions */ + EXPORT_SYMBOL(my_register_device); + EXPORT_SYMBOL(my_unregister_device); + +As seen, the functions ``my_register_device`` and ``my_unregister_device``, used +to add/remove a device to/from a bus, are defined in the same file where the +bus is defined. Device structures are not initialized; they will be initialized +when the devices are discovered by the system (by hotplug or direct registration +from driver) and the function ``my_register_device`` will be called to add a +device to the bus. + +To use the bus defined above in the driver implementation, we must define a +structure of type ``my_device``, initialize it and register it using the function +exported by the bus (``my_register_device``). + +.. code-block:: c + + /* mydriver.c */ + + static struct my_device mydev; + char devname[NAME_SIZE]; + //... + + //register + int err; + + sprintf(devname, "mydev0"); + mydev.name = devname; + mydev.driver = &mydriver; + dev_set_drvdata(&mydev.dev, &mydev); + err = my_register_device(&mydev); + if (err < 0) { + /*handle error */ + } + + //.. + + //unregister + my_unregister_device(&mydev); + +Drivers +------- + +Linux Device Model is used to allow simple association between system +devices and drivers. Drivers can export information independent of the physical +device. + +In sysfs, driver information has no single subdirectory associated; They can be +found in the directory structure in different places: the loaded module is in +``/sys/module``, in ``devices`` you can find the driver associated with +each device, in ``classes`` the drivers belonging to a class, in ``/sys/bus`` +the drivers associated to each bus. + +A device driver is identified by the structure :c:type:`struct device_driver`: + +.. code-block:: c + + struct device_driver { + const char *name; + struct bus_type *bus; + + struct driver_private *p; + + struct module *owner; + const char *mod_name; /* used for built-in modules */ + + int (*probe) (struct device *dev); + int (*remove) (struct device *dev); + void (*shutdown) (struct device *dev); + int (*suspend) (struct device *dev, pm_message_t state); + int (*resume) (struct device *dev); + }; + +Among the structure fields we find the name of the driver (appears in ``sysfs``), +the bus with which the driver works, and functions called at various times in a +device's operation. + +As before, we have the functions :c:func:`driver_register` and +:c:func:`driver_unregister` to register/unregister a driver. + +To work with attributes, we have the :c:type:`struct driver_attribute` structure, +the macro :c:type:`DRIVER_ATTR` for definition, and the functions +:c:func:`driver_create_file` and :c:func:`driver_remove_file` functions for +adding the attribute to the device. + +As with devices, the structure :c:type:`struct device_driver` is usually +incorporated into another structure specific to a particular bus (PCI, USB, etc.): + +.. code-block:: c + + /* mybus.c */ + + // my driver type + struct my_driver { + struct module *module; + struct device_driver driver; + }; + + #define to_my_driver(drv) container_of(drv, struct my_driver, driver); + + int my_register_driver(struct my_driver *driver) + { + int err; + + driver->driver.bus = &my_bus_type; + err= driver_register(&driver->driver); + if (err) + return err; + return 0; + } + + void my_unregister_driver(struct my_driver *driver) + { + driver_unregister(&driver->driver); + } + + /* export register/unregister driver functions */ + EXPORT_SYMBOL(my_register_driver); + EXPORT_SYMBOL(my_unregister_driver); + +Driver registration/unregistration operations are exported for use in +other modules. + +As for devices, the operations for drivers are defined when the bus is +initialized and they are exported to be used by drivers. When implementing a +driver that works with devices attached to the bus, we will call the functions +``my_register_driver`` and ``my_unregister_driver`` to associate with the bus. + +To use the functions (in the driver implementation), we must declare a structure +of type ``my_driver``, initialize it and register using the function exported +by the bus. + +.. code-block:: c + + /* mydriver.c */ + + static struct my_driver mydriver = { + .module = THIS_MODULE, + .driver = { + .name = "mydriver", + }, + }; + //... + + //register + int err; + err = my_register_driver(&mydriver); + if (err < 0) { + /*handle error */ + } + //.. + + //unregister + my_unregister_driver(&mydriver); + + +Classes +------- + +A class is a high-level view of the Linux Device Model, which abstracts +implementation details. For example, there are drivers for SCSI and ATA +drivers, but all belong to the class of disks. Classes provide a grouping of +devices based on functionality, not how they are connected or how they work. +Classes have a correspondent in ``/sys/classes``. + +There are two main structures that describe the classes: :c:type:`struct class` +and :c:type:`struct device`. +The class structure describes a generic class, while the structure +:c:type:`struct device` describes a class associated with a device. +There are functions for initializing/deinitiating and adding attributes for each +of these, described in ``include/linux/device.h``. + +The advantage of using classes is that the ``udev`` program in userspace, which we +will discuss later, allows the automatic creation of devices in the ``/dev`` +directory based on class information. + +For this reason, we will continue to present a small set of functions that work +with classes to simplify the use of the plug and play mechanism. + +A generic class is described by structure class structure: + +.. code-block:: c + + struct class { + const char *name; + struct module *owner; + struct kobject *dev_kobj; + + struct subsys_private *p; + + struct class_attribute *class_attrs; + struct class_device_attribute *class_dev_attrs; + struct device_attribute *dev_attrs; + + int (*dev_uevent)(struct device *dev, struct kobj_uevent_env *env); + void (*class_release)(struct class *class); + void (*dev_release)(struct device *dev); + //... + }; + +The :c:func:`class_register` and :c:func:`class_unregister` functions can be +used for initialization/deinitialization. + +.. code-block:: c + + static struct class my_class = { + .name = "myclass", + }; + + static int __init my_init(void) + { + int err; + //... + err = class_register(&my_class); + if (err < 0) { + /* handle error */ + } + //... + } + + static void __exit my_cleanup(void) + { + //... + class_unregister(&my_class); + //... + } + +A class associated with a device is described by the :c:type:`struct device` +structure. +The :c:func:`device_create` and :c:func:`device_destroy` functions can be used +for initialization/deinitialization. +The :c:func:`device_create` function initializes the ``device`` structure, +and assigns the generic ``class`` structure and the device received as a +parameter to it; +In addition, it will create an attribute of the class, ``dev``, which contains +the minor and major of the device (``minor:major``). +Thus, udev utility in usermode can read the necessary data from this attribute +file to create a node in the ``/dev`` directory by calling ``makenod``. + +An example of initialization: + +.. code-block:: c + + struct device* my_classdev; + struct cdev cdev; + struct device dev; + + //init class for device cdev.dev + my_classdev = device_create(&my_class, NULL, cdev.dev, &dev, "myclass0"); + + //destroy class for device cdev.dev + device_destroy(&my_class, cdev.dev); + +When a new device is discovered, a class and a node will be assigned to it and +a node will be created in the ``/dev`` directory. +For the example above, the node ``/dev/myclass0`` will be generated. + +Hotplug +------- + +``Hotplug`` describes the mechanism for adding or removing a device from the +system while it is running without having to reboot the system. + +A hotplug event is a notification from the kernel to the user-space when something +changes in the system configuration. These events are generated when creating +or removing a kobject from the kernel. Since these objects are the basis of the +Linux Device Model, being included in all structures (``struct bus_type``, +``struct device``, ``struct device_driver``, ``struct class``, etc.), a hotplug event +will be generated when any of these structures is created or removed (``uevent``). + +When a device is discovered in the system, an event is generated. Depending on +the point where it resides in Linux Device Model, the functions corresponding +to the event will be called (usually, the ``uevent`` function associated to the +bus or the class). Using these functions, the driver has the ability to set +system variables for the user-space. +The generated event then reaches the user-space. Here is the ``udev`` +utility that captures these events. There are configuration files for this +utility in the ``/etc/udev/`` directory. Different rules can be specified to +capture only certain events and perform certain actions, depending on the +system variables set in the kernel or in ``uevent`` functions. + +An important consequence is that in this way the plug and play mechanism can be +achieved; with the help of ``udev`` and the classes (described above), entries +in the ``/dev/`` directories can be automatically created for devices, and using +``udev`` drivers can be automatically loaded for a device. + +Rules for ``udev`` are located ``/etc/udev/rules.d``. +Any file that ends with ``.conf`` in this directory will be parsed when an event +occurs. For more details on how to write rules in these files see +`Writing udev rules `_. +For testing, there are utilities such as ``udevmonitor``, ``udevinfo`` and +``udevtest``. + +For a quick example, consider the situation where we want to automatically load +a driver for a device when an event occurs. We can create a new file +/etc/udev/rules.d/myrules.rules, we will have the following line: + +.. code-block:: bash + + SUBSYSTEM=="pnp", ATTRS{id}=="PNP0400", RUN+="/sbin/insmod /root/mydriver.ko" + +This will choose from the events generated only those belonging to the ``pnp`` +subsystem (connected to ``PNP`` bus) and having an id attribute with the value +``PNP0400``. + +When this rule will be found, the command specified under ``RUN`` will be +executed to insert the appropriate driver in the kernel. + + +Plug and Play +============= + +As noted above, in Linux Device Model all devices are connected by a bus, even if +it has a corresponding physical hardware or it is virtual. + +The kernel already has implemented most buses using a ``bus_type`` structure +and functions to register/unregister drivers and devices. +To implement a driver, we must first determine the bus to which the supported +devices are connected and use the structures and functions exported by this bus. +The main buses are ``PCI``, ``USB``, ``PNP``, ``IDE``, ``SCSI``, ``platform``, +``ACPI``, etc. + +PNP bus +------- + +The plug and play mechanism provides a means of detecting and setting the resources +for legacy driver that may not be configured or otherwise. All plug and play +drivers, protocols, services are based on Plug and Play level. It is responsible +for the exchange of information between drivers and protocols. The following +protocols are available: + + * ``PNPBIOS`` - used for systems such as serial and parallel ports + * ``ISAPNP`` - offers support for the ISA bus + * ``ACPI`` - offering, among other things, information about system-level devices + +The kernel contains a bus, called ``pnp_bus``, that is used for connecting by +many drivers. +The implementation and working with the bus follow the model Linux Device Model +and is very similar to what we discussed above. + +The main functions and structures exported by the bus, which can be used by +drivers, are: + + * :c:type:`struct pnp_driver` - driver type associated to the bus + * :c:func:`pnp_register_driver` - function used to record a PNP driver in the system + * :c:func:`pnp_unregister_driver` - function used to unregister a PNP driver from the system + +As noted in previous sections, the bus has a function called ``match`` used to +associate the devices with the appropriate drivers. +For example, when discovering a new device, a driver which meets the condition +given by the ``match`` function regarding to the new device. Usually, this +condition is a comparation of IDs (driver id and device id). +A common approach is using a static table in each driver, which holds information +about the devices supported by the driver, which will be used by the bus +when verifying the condition. For example, for a parallel port device we have +the table ``parport_pc_pnp_tbl``: + +.. code-block:: c + + static const struct pnp_device_id parport_pc_pnp_tbl[] = { + /* Standard LPT Printer Port */ + {.id = "PNP0400", .driver_data = 0}, + /* ECP Printer Port */ + {.id = "PNP0401", .driver_data = 0}, + }; + + MODULE_DEVICE_TABLE(pnp, parport_pc_pnp_tbl); + +Each driver declares and initializes a structure ``pnp_driver``, such as +``parport_pc_pnp_driver``: + +.. code-block:: c + + static int parport_pc_pnp_probe(struct pnp_dev *dev, const struct pnp_id *card_id, + const struct pnp_id *dev_id); + static void parport_pc_pnp_remove(struct pnp_dev* dev); + + static struct pnp_driver parport_pc_pnp_driver = { + .name = "parport_pc", + .id_table = parport_pc_pnp_tbl, + .probe = parport_pc_pnp_probe, + .remove = parport_pc_pnp_remove, + }; + +We can notice that the structure has as fields a pointer to the table declared +above and two functions, which are called when a new device is detected and when +it is removed from the system. +As all the structures presented above, the driver must be registered to the +system: + +.. code-block:: c + + static int __init parport_pc_init(void) + { + err = pnp_register_driver(&parport_pc_pnp_driver); + if (err < 0) { + /* handle error */ + } + } + + static void __exit parport_pc_exit(void) + { + pnp_unregister_driver(&parport_pc_pnp_driver); + } + +PNP operations +-------------- + +So far we have discussed the Linux Device Model and its API. To +implement a plug and play driver, we must respect the Linux Device Model model. + +Most often, adding a bus in the kernel is not necessary, as most of the existing +buses are already implemented (PCI, USB, etc.). Thus, we must first identify the +bus to which the device is attached. +In the examples below, we will consider that this bus is bus PNP and we will +use the structures and functions described above. + +.. ditaa:: + + + Kernel space | User space + | + | + +-------------+ +-------------+ +---------------+ | +--------+ + | | | | | | | | | + | my_device | | my_driver | | my_bus_type | | | udev | + | | | | | | | | | + +-----+-------+ +------+------+ +-------+-------+ | +---+----+ + | | | | | + : : : | : + | | 1.my_register_driver() | 2.call_usermodehelper() | + | +-+------------------------->+-+------------------------->+-+ + | | | | | | | | + | | | | | | | | + | | | | | | | | + | 3.my_uevent() | | | | 4.call_usermodehelper() | | + +++-------------------------| |--------------------------> +------------------------->| | + | | | | | | | | | + | | | | 6.my_probe() | | 5.my_match() | | + | | | |<=------------------------| |<=------------------------| | + | | | | | | | | | + | | | | | | | | | + | | | | | | | | | + | | 7.my_remove() | | 8.my_uevent() | | 9.call_usermodehelper() | | +---------------------------+ + +-+------------------------>| |------------------------->| |------------------------->| | | | + | | | | | | | | | 1 - 2 -> add driver | + | | | | | | | | | 3 - 6 -> add device | + | | | | | | | | | 7 - 9 -> remove device | + | | | 10.my_unregister_driver()| | 11.call_usermodehelper() | | | 10 - 11 -> remove driver | + | +-+------------------------->+-+------------------------->+-+ | | + | | | | | +---------------------------+ + : : : | : + + +Adding a driver +--------------- + +In addition to the usual operations, a driver must follow the Linux Device Model. +Thus, it will be registered in the system using the functions provided by +the bus for this purpose. +Usually, the bus provides a particular driver structure containing a +:c:type:`struct device_driver` structure, that the driver must initialize and +register using a function ``*_register_driver``. +For example, for the ``PNP`` bus, the driver must declare and initialize a +structure of type :c:type:`struct pnp_driver` and register it using +``pnp_register_drvier``: + +.. code-block:: c + + static struct pnp_driver my_pnp_driver = { + .name = "mydriver", + .id_table = my_pnp_tbl, + .probe = my_pnp_probe, + .remove = my_pnp_remove, + }; + + static int __init my_init(void) + { + err = pnp_register_driver(&my_pnp_driver); + } + +Unlike legacy drivers, plug and play drivers don't register devices at +initialization in the init function (``my_init`` in the example above) using +:c:func:`register_device`. + +As described above, each bus has a `match` function which is called when a new +device is detected in the system to determine the associated driver. +Thus, there must be a way for each driver to export information about the +devices it supports, to allow this check to pass and have its functions further +called. +In the examples presented in this lab, the match function does a simple +comparison between the device name and the driver name. Most drivers use a table +containing information devices and store a pointer to this table in the +driver structure. +For example, a driver associated to a ``PNP`` bus defines a table of type +:c:type:`struct pnp_device_id` and initializes the field ``id_table`` from the +structure ``pnp_driver my_pnp_driver`` with a pointer to it: + +.. code-block:: c + + static const struct pnp_device_id my_pnp_tbl[] = { + /* Standard LPT Printer Port */ + {.id = "PNP0400", .driver_data = 0}, + /* ECP Printer Port */ + {.id = "PNP0401", .driver_data = 0}, + { } + }; + + MODULE_DEVICE_TABLE(pnp,my_pnp_tbl); + + static struct pnp_driver my_pnp_driver = { + //... + .id_table = my_pnp_tbl, + //... + }; + +In the example above, the driver supports multiple parallel port devices, +defined in the table ``my_pnp_tbl``. This information is used by the bus in +the ``match_device`` function. +When adding a driver, the bus driver will be associated to it and new entires +in ``sysfs`` will be created based on the driver name. +Then the bus ``match`` function will be called for every supported device, +to associate the driver with any connected device that it supports. + +Removing a driver +----------------- + +To remove a driver from the kernel, in addition to operations required for a +legacy driver, we must unregister the ``device_driver`` structure. +For a driver associated with the ``PNP`` bus, we must unregister the ``pnp_driver`` +structure using the :c:func:`pnp_unregister_driver` function: + +.. code-block:: c + + static struct pnp_driver my_pnp_driver; + + static void __exit my_exit(void) + { + pnp_unregister_driver(&my_pnp_driver); + } + +Unlike legacy drivers, plug and play drivers don't unregister devices in the +module unload function (``my_exit``). When a driver is removed, all the +references to it will be removed for all the devices it supports, and entries +from ``sysfs`` will also be removed. + +Adding a new device +------------------- + +As we saw above, plug and play drivers do not register devices at initialization. +This operation will take place in the ``probe`` function, which is called when +a new device is detected. A device attached to the ``PNP`` bus will be added to +the system by the function ``probe`` from the ``pnp_driver`` structure: + +.. code-block:: c + + static int my_pnp_probe(struct pnp_dev *dev, const struct pnp_id *card_id, + const struct pnp_id *dev_id) { + int err, iobase, nr_ports, irq; + + //get irq & ports + if (pnp_irq_valid(dev, 0)) + irq = pnp_irq(dev, 0); + if (pnp_port_valid(dev, 0)) { + iobase = pnp_port_start(dev, 0); + } else + return -ENODEV; + nr_ports = pnp_port_len(dev, 0); + + /* register device dev */ + } + + static struct pnp_driver my_pnp_driver = { + //... + .probe = my_pnp_probe, + //... + }; + +Upon detection of a device in the kernel (at boot or by the insertion of the +device through ``hotplug``), an interrupt is generated and reaches the bus +driver. +The device is registered using the function :c:func:`device_register` and it is +attached to the bus. A call to the user space will also be generated, and the +event can be treated by ``udev``. Then, the list of drivers associated with the +bus is iterated and the ``match`` function is called for each of them. +The ``match`` function tries to find a driver for the new device. After a +suitable driver is found for the device, the ``probe`` function of the driver +is called. If the function ends successfully, the device is added to the driver's +list of devices and new entries are created in ``sysfs`` based on the device name. + +Removing a device +----------------- + +As we saw above, the plug and play drivers don't unregister devices when the +driver is unloaded. This operation is done in the ``remove`` function, which +is called when a device is removed from the system. +In case of a device attached to the ``PNP`` bus, the unregister will be done +in the ``remove`` function specified in the ``pnp_driver`` structure: + +.. code-block:: c + + static void my_pnp_remove(struct pnp_dev *dev) { + /* unregister device dev */ + } + + static struct pnp_driver my_pnp_driver = { + //... + .remove = my_pnp_remove, + }; + +As seen in the example above, when the removal of a device is detected, the +``my_pnp_remove`` function is called. A user-space call is also generated, which +can be detected by ``udev``, and entries are removed from ``sysfs``. + +Exercises +========= + +.. include:: exercises-summary.hrst +.. |LAB_NAME| replace:: device_model + +0. Intro +--------- + +Find the definitions of the following symbols in the Linux kernel: + + * functions ``dev_name``, ``dev_set_name``. + * functions ``pnp_device_probe``, ``pnp_bus_match``, ``pnp_register_driver`` + and the ``pnp_bus_type`` variable. + +1. Bus implementation +--------------------- + +Analyze the contents of the ``bex.c``, a module that implements a bus +driver. Follow the comments marked with **TODO 1** and implement the missing +functionality: register the bus driver and add a new device named ``root`` +with type ``none`` and version 1. + +.. hint:: See :c:func:`bex_add_dev`. + +.. hint:: The register and unregister must be done using :c:func:`bus_register` + and :c:func:`bus_unregister`. + +Load the module and verify that the bus is visible in ``/sys/bus``. Verify +that the device is visible in ``/sys/bus/bex/devices``. + +Remove the module and notice that the ``sysfs`` entries are removed. + +2. Add type and version device attributes +----------------------------------------- + +Add two read-only device attributes, ``type`` and ``version``. Follow the +**TODO 2** markings. + +.. hint:: You will need to add the two attributes in the structure + ``bex_dev_attrs``, as follows: + + ``&dev_attr_.attr,`` + +.. hint:: + + A possible implementation for the show function is the following: + + .. code-block:: c + + static ssize_t + type_show(struct device *dev, struct device_attribute *attr, char *buf) + { + struct bex_device *bex_dev = to_bex_device(dev); + + return sprintf(buf, "%s\n", bex_dev->type); + } + DEVICE_ATTR_RO(type); + +Observe that two new attributes are visible in +/sys/bus/bex/devices/root. Check the contents of these attributes. + +3. Add del and add bus attributes +--------------------------------- + +Add two write-only bus attributes, ``del`` and ``add``. del expects the name +of a device to delete, while add expects the name, type and version to +create a new device. Follow the **TODO 3** markings and review +`Buses`_. + +.. hint:: Use :c:func:`sscanf` to parse the input from sysfs and + :c:func:`bex_del_dev` and :c:func:`bex_add_dev` to delete + and create a new device. + +An example for the store function is the following: + +.. code-block:: c + + static ssize_t add_store(struct bus_type *bt, const char *buf, size_t count) + { + char name[32]; + int ret; + + ret = sscanf(buf, "%31s", name); + if (ret != 1) + return -EINVAL; + + ... + } + BUS_ATTR(add, S_IWUSR, NULL, add_store); + +.. hint:: The store functions should return ``0`` if + ``bex_add_dev``/``bex_del_dev`` fail and ``count`` otherwise. + +Create a new device and observe that is visible in +``/sys/bus/devices``. Delete it and observe it disapears from ``sysfs``. + +.. hint:: Use echo to write into the bus attributes: + + .. code-block:: shell + + $ echo "name type 1" > /sys/bus/bex/add + $ echo "name" > /sys/bus/bex/del + +4. Register the bex misc driver +------------------------------- + +Modify **bex-misc.c** so that it registers the driver with the bex +bus. Insert the ``bmx_misc.ko`` module and create a new bex device from +sysfs with the name "test", type "misc", version 2. Follow the **TODO +4** markings. + +Observe that the driver is visible in ``/sys/bus/bex/drivers``. + +Why isn't the probe function called? + +.. hint:: Notice that the bus match function in **bex.c** is not + implemented. + +Implement the bus matching function in **bex.c**. Follow the **TODO 5** +markings. Try again to create a new bex device and observe that this +time the ``probe`` function from the ``bex_misc`` driver is called. + +5. Register misc device in the bex_misc probe function +------------------------------------------------------ + +Modify **bex_misc.c** to refuse probing if ``version > 1``. Also, register the +defined misc device in ``bex_misc_probe`` and deregister it in +``bex_misc_remove``. Follow the **TODO 6** markings. + +.. hint:: Use :c:func:`misc_register` and :c:func:`misc_deregister`. + +Create a new device with the name "test", type "misc" and version 2 +and observe that the probe fails. Create a new device with the name +"test", type "misc" and version 1 and observe that the probe is +successful. + +Inspect ``/sys/bus/bex/devices/test`` and observe that we have a new +entry. Identify the major and minor for the misc device, create a +character device file and try to read and write from the misc device +buffer. + +.. hint:: The major and minor should be visible in the dev attribute + of the misc device + +6. Monitor uevent notifications +------------------------------- + +Use the ``udevadm monitor`` command and observe what happens when: + +* the ``bex.ko`` and ``bex_misc.ko`` modules are inserted + +* a new device with the type "test" is created + +* a new device with the type "misc" and version 2 is created + +* a new device with the type "misc" and version 1 is created + +* all of the above are removed diff --git a/Documentation/teaching/labs/exercises-summary.hrst b/Documentation/teaching/labs/exercises-summary.hrst new file mode 100644 index 00000000000000..b9f84fce5d6b9a --- /dev/null +++ b/Documentation/teaching/labs/exercises-summary.hrst @@ -0,0 +1,56 @@ +.. important:: + + To solve exercises, you need to perform these steps: + + * prepare skeletons from templates + * build modules + * copy modules to the VM + * start the VM and test the module in the VM. + + The current lab name is |LAB_NAME|. See the exercises for the task name. + + The skeleton code is generated from full source examples located in + :file:`tools/labs/templates`. To solve the tasks, start by generating + the skeleton code for a complete lab: + + .. code-block:: shell + + tools/labs $ make clean + tools/labs $ LABS= make skels + + You can also generate the skeleton for a single task, using + + .. code-block:: shell + + tools/labs $ LABS=/ make skels + + Once the skeleton drivers are generated, build the source: + + .. code-block:: shell + + tools/labs $ make build + + Then, copy the modules and start the VM: + + .. code-block:: shell + + tools/labs $ make copy + tools/labs $ make boot + + The modules are placed in /home/root/skels/|LAB_NAME|/. + + Alternatively, we can copy files via :command:`scp`, in order to avoid restarting the VM. + For additional details about connecting to the VM via the network, please check :ref:`vm_interaction_link`. + + Review the `Exercises`_ section for more detailed information. + +.. warning:: + + Before starting the exercises or generating the skeletons, please run **git pull** inside the Linux repo, + to make sure you have the latest version of the exercises. + + If you have local changes, the pull command will fail. Check for local changes using ``git status``. + If you want to keep them, run ``git stash`` before ``pull`` and ``git stash pop`` after. + To discard the changes, run ``git reset --hard master``. + + If you already generated the skeleton before ``git pull`` you will need to generate it again. \ No newline at end of file diff --git a/Documentation/teaching/labs/filesystems_part1.rst b/Documentation/teaching/labs/filesystems_part1.rst new file mode 100644 index 00000000000000..cea16514c73931 --- /dev/null +++ b/Documentation/teaching/labs/filesystems_part1.rst @@ -0,0 +1,794 @@ +============================ +File system drivers (Part 1) +============================ + +Lab objectives +============== + + * acquiring knowledge about the Virtual Filesystem (VFS) in Linux and understanding concepts regarding 'inode', 'dentry', 'file', superblock and data block. + * understanding the process of mounting a file system inside VFS. + * knowledge regarding various file system types and understanding differences between file systems with physical support (on disk) and the ones without physical support. + +Virtual Filesystem (VFS) +======================== + +The Virtual Filesystem (also known as VFS) is a component of the kernel that handles all system calls related to files and file systems. +VFS is a generic interface between the user and a particular file system. +This abstraction simplifies the implementation of file systems and provides an easier integration of multiple file systems. This way, the implementation of a file system is accomplished by using the API provided by the VFS, and the generic hardware and I/O subsystem communication parts are handled by VFS. + +From a functional point of view, file systems can be grouped into: + + * disk file systems (ext3, ext4, xfs, fat, ntfs, etc.) + * network file systems (nfs, smbfs/cifs, ncp, etc.) + * virtual filesystems (procfs, sysfs, sockfs, pipefs, etc.) + +A Linux kernel instance will use VFS for the hierarchy (a tree) of directories and files. +A new file system will be added as a VFS subtree using the mount operation. +A file system is usually mounted from the environment for which it was built (from a block type device, from network, etc.). +In particular, however, the VFS can use a normal file as a virtual block device, so it is possible to mount disk file systems over normal files. This way, stacks of file systems can be created. + +The basic idea of VFS is to provide a single file model that can represent files from any file system. +The file system driver is responsible for bringing to the common denominator. +This way the kernel can create a single directory structure that contains the entire system. +There will be a file system that will be the root, the rest being mounted in its various directories. + +The general file system model +============================= + +The general file system model, to which any implemented file system needs to be reduced, consists of several well-defined entities: :c:type:`superbloc`, :c:type:`inode`, :c:type:`file`, and :c:type:`dentry`. +These entities are file system metadata (they contain information about data or other metadata). + +Model entities interact using some VFS or kernel subsystems: dentry cache, inode cache, buffer cache. +Each entity is treated as an object: it has a associated data structure and a pointer to a table of methods. The induction of particular behavior for each component is done by replacing the associated methods. + +superblock +---------- + +The superblock stores the information needed for a mounted file system: + + * inode and blocks locations + * file system block size + * maximum filename length + * maximum file size + * the location of the root inode + +Localization: +~~~~~~~~~~~~~ + + * In the case of disk file systems, the superblock has a correspondent in the first block of the disk. (Filesystem Control Block). + * In VFS, all superblocks of filesystems are retained in a list of structures of type :c:type:`struct super_block` and the methods in structures of type :c:type:`struct super_operations`. + +inode +----- + +The inode (index node) keeps information about a file in the general sense (abstraction): regular file, directory, special file (pipe, fifo), block device, character device, link, or anything that can be abstracted as a file. + +An inode stores information like: + + * file type; + * file size; + * access rights; + * access or modify time; + * location of data on the disk (pointers to disk blocks containing data). + +.. note:: + Usually, the inode does not contain the file name. The name is stored by the :c:type:`dentry` entity. This way, an inode can have multiple names (hardlinks). + +Localization: +~~~~~~~~~~~~~ + +Like the superblock, the :c:type:`inode` has a disk correspondent. +The inodes on disk are generally grouped into a specialized area (inode area) separated from the data blocks area; In some file systems, the equivalents of the inodes are spread in the file system structure (FAT); +As a VFS entity, an inode is represented by the structure :c:type:`struct inode` and by the operations with it defined in the structure :c:type:`struct inode_operations`. + +Each inode is generally identified by a number. On Linux, the ``-i`` argument of the ``ls`` command shows the inode number associated with each file: + +.. code-block:: console + + razvan@valhalla:~/school/so2/wiki$ ls -i + 1277956 lab10.wiki 1277962 lab9.wikibak 1277964 replace_lxr.sh + 1277954 lab9.wiki 1277958 link.txt 1277955 homework.wiki + +file +---- + +File is the component of the file system model that is closest to the user. +The structure exists only as a VFS entity in memory and has no physical correspondent on disk. + +While the inode abstracts a file on the disk, the file structure abstracts an open file. +From the point of view of the process, the file entity abstracts the file. From the point of view of the file system implementation, however, the inode is the entity that abstracts the file. + +The file structure maintains information such as: + + * file cursor position; + * file opening rights; + * pointer to the associated inode (eventually its index). + +Localization: +~~~~~~~~~~~~~ + + * The structure :c:type:`struct file` is the associated VFS entity, and the structure :c:type:`struct file_operations` represents the operations associated with it. + +dentry +------ + +The dentry (directory entry) associates an inode with a file name. + +Generally, a dentry structure contains two fields: + + * an integer that identifies the inode; + * a string representing its name. + +The dentry is a specific part of a path that can be a directory or a file. For example, for the path ``/bin/vi``, dentry objects will be created for ``/``, ``bin``, and ``vi`` (a total of 3 dentry objects). + + * the dentry has a correspondent on the disk, but the correspondence is not direct because each file system keeps the dentries in a specific way + * in VFS, the dentry entity is represented by the structure :c:type:`struct dentry` and the operations with it are defined in the :c:type:`struct dentry_operations` structure. + +.. _RegisterUnregisterSection: + +Register and unregister filesystems +=================================== + +In the current version, the Linux kernel supports about 50 file systems, including: + + * ext2/ ext4 + * reiserfs + * xfs + * fat + * ntfs + * iso9660 + * udf for CDs and DVDs + * hpfs + +On a single system, however, it is unlikely that there will be more than 5-6 file systems. For this reason, file systems (or, more correctly, file system types) are implemented as modules and can be loaded or unloaded at any time. + +In order to be able to dynamically load / unload a file system module, a file system registration / deregistration API is required. The structure describing a particular file system is :c:type:`struct file_system_type`: + + .. code-block:: c + + #include + + struct file_system_type { + const char *name; + int fs_flags; + struct dentry *(*mount) (struct file_system_type *, int, + const char *, void *); + void (*kill_sb) (struct super_block *); + struct module *owner; + struct file_system_type * next; + struct hlist_head fs_supers; + struct lock_class_key s_lock_key; + struct lock_class_key s_umount_key; + //... + }; + + * ``name`` is a string representing the name that will identify a file system (the argument passed to ``mount -t``). + * ``owner`` is ``THIS_MODULE`` for file systems implemented in modules, and ``NULL`` if they are written directly into the kernel. + * The ``mount`` function reads the superblock from the disk in memory when loading the file system. The function is unique to each file system. + * The ``kill_sb`` function releases the super-block from memory. + * ``fs_flags`` specifies the flags with which the file system must be mounted. An example of such flag is ``FS_REQUIRES_DEV`` that specifies to VFS that the file system needs a disk (it is not a virtual file system). + * ``fs_supers`` is a list containing all the superblocks associated with this file system. Since the same file system can be mounted multiple times, there will be a separate superblock for each mount. + +The *registration of a file system* into the kernel is generally performed in the module initialization function. For registration, the programmer will have to + + #. initialize a structure of type :c:type:`struct file_system_type` with the name, the flags, the function that implements the superblock reading operation and the reference to the structure that identifies the current module + #. call the :c:func:`register_filesystem` function. + +When unloading the module, you must unregister the file system by calling the :c:func:`unregister_filesystem` function. + +An example of registering a virtual file system is found in the code for ``ramfs``: + +.. code-block:: c + + static struct file_system_type ramfs_fs_type = { + .name = "ramfs", + .mount = ramfs_mount, + .kill_sb = ramfs_kill_sb, + .fs_flags = FS_USERNS_MOUNT, + }; + + static int __init init_ramfs_fs(void) + { + if (test_and_set_bit(0, &once)) + return 0; + return register_filesystem(&ramfs_fs_type); + } + +.. _FunctionsMountKillSBSection: + +Functions mount, kill_sb +------------------------ + +When mounting the file system, the kernel calls the mount function defined within the structure :c:type:`file_system_type`. The function makes a set of initializations and returns a dentry (the structure :c:type:`struct dentry`) that represents the mount point directory. Usually :c:func:`mount` is a simple function that calls one of the functions: + + * :c:func:`mount_bdev`, which mounts a file system stored on a block device + * :c:func:`mount_single`, which mounts a file system that shares an instance between all mount operations + * :c:func:`mount_nodev`, which mounts a file system that is not on a physical device + * :c:func:`mount_pseudo`, a helper function for pseudo-file systems (``sockfs``, ``pipefs``, generally file systems that can not be mounted) + +These functions get as parameter a pointer to a function :c:func:`fill_super` that will be called after the superblock initialization to finish its initialization by the driver. An example of such a function can be found in the ``fill_super`` section. + +When unmounting the file system, the kernel calls :c:func:`kill_sb`, which performs cleanup operations and invokes one of the functions: + + * :c:func:`kill_block_super`, which unmounts a file system on a block device + * :c:func:`kill_anon_super`, which unmounts a virtual file system (information is generated when requested) + * :c:func:`kill_litter_super`, which unmounts a file system that is not on a physical device (the information is kept in memory) + +An example for a file system without disk support is the :c:func:`ramfs_mount` function in the ``ramfs`` file system: + +.. code-block:: c + + struct dentry *ramfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) + { + return mount_nodev(fs_type, flags, data, ramfs_fill_super); + } + +An example for a file system from disk is the :c:func:`minix_mount` function in the ``minix`` file system: + +.. code-block:: c + + struct dentry *minix_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) + { + return mount_bdev(fs_type, flags, dev_name, data, minix_fill_super); + } + +Superblock in VFS +================= + +The superblock exists both as a physical entity (entity on disk) and as a VFS entity (within the :c:type:`struct super_block` structure). +The superblock contains only metainformation and is used to write and read metadata from the disk (inodes, directory entries). +A superblock (and implicitly the :c:type:`struct super_block` structure) will contain information about the block device used, the list of inodes, a pointer to the inode of the file system root directory, and a pointer to the superblock operations. + +The :c:type:`struct super_block` structure +------------------------------------------ + +Part of the :c:type:`struct super_block` structure definition is presented below: + +.. code-block:: c + + struct super_block { + //... + dev_t s_dev; /* identifier */ + unsigned char s_blocksize_bits; /* block size in bits */ + unsigned long s_blocksize; /* block size in bytes */ + unsigned char s_dirt; /* dirty flag */ + loff_t s_maxbytes; /* max file size */ + struct file_system_type *s_type; /* filesystem type */ + struct super_operations *s_op; /* superblock methods */ + //... + unsigned long s_flags; /* mount flags */ + unsigned long s_magic; /* filesystem’s magic number */ + struct dentry *s_root; /* directory mount point */ + //... + char s_id[32]; /* informational name */ + void *s_fs_info; /* filesystem private info */ + }; + +The superblock stores global information for an instance of a file system: + * the physical device on which it resides + * block size + * the maximum size of a file + * file system type + * the operations it supports + * magic number (identifies the file system) + * the root directory ``dentry`` + +Additionally, a generic pointer (``void *``) stores the private data of the file system. +The superblock can be viewed as an abstract object to which its own data is added when there is a concrete implementation. + +.. _SuperblockSection: + +Superblock operations +--------------------- + +The superbloc operations are described by the :c:type:`struct super_operations` structure: + +.. code-block:: c + + struct super_operations { + //... + int (*write_inode) (struct inode *, struct writeback_control *wbc); + struct inode *(*alloc_inode)(struct super_block *sb); + void (*destroy_inode)(struct inode *); + + void (*put_super) (struct super_block *); + int (*statfs) (struct dentry *, struct kstatfs *); + int (*remount_fs) (struct super_block *, int *, char *); + //... + }; + +The fields of the structure are function pointers with the following meanings: + + * ``write_inode``, ``alloc_inode``, ``destroy_inode`` write, allocate, respectively release resources associated with an inode and are described in the next lab + * ``put_super`` is called when the superblock is released at ``umount``; within this function, any resources (generally memory) from the file system's private data must be released; + * ``remount_fs`` is called when the kernel detects a remount attempt (mount flag ``MS_REMOUNTM``); most of the time here must be detected if a switch from read-only to read-write or vice versa is attempted; this can be done simply because both the old flags (in ``sb->s_flags``) and the new flags (the ``flags`` argument) can be accessed; ``data`` is a pointer to the data sent by :c:func:`mount` that represent file system specific options; + * ``statfs`` is called when a ``statfs`` system call is done (try ``stat –f`` or ``df``); this call must fill the fields of the :c:type:`struct kstatfs` structure, as it is done, for example, in the :c:func:`ext4_statfs` function. + +.. _FillSuperSection: + +The :c:func:`fill_super` function +===================================== + +As specified, the :c:func:`fill_super` function is called to terminate the superblock initialization. This initialization involves filling the :c:type:`struct super_block` structure fields and the initialization of the root directory inode. + +An example of implementation is the :c:func:`ramfs_fill_super` function which is called to initialize the remaining fields in the superblock: + +.. code-block:: c + + #include + + #define RAMFS_MAGIC 0x858458f6 + + static const struct super_operations ramfs_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, + .show_options = ramfs_show_options, + }; + + static int ramfs_fill_super(struct super_block *sb, void *data, int silent) + { + struct ramfs_fs_info *fsi; + struct inode *inode; + int err; + + save_mount_options(sb, data); + + fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL); + sb->s_fs_info = fsi; + if (!fsi) + return -ENOMEM; + + err = ramfs_parse_options(data, &fsi->mount_opts); + if (err) + return err; + + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_blocksize = PAGE_SIZE; + sb->s_blocksize_bits = PAGE_SHIFT; + sb->s_magic = RAMFS_MAGIC; + sb->s_op = &ramfs_ops; + sb->s_time_gran = 1; + + inode = ramfs_get_inode(sb, NULL, S_IFDIR | fsi->mount_opts.mode, 0); + sb->s_root = d_make_root(inode); + if (!sb->s_root) + return -ENOMEM; + + return 0; + } + + +The kernel provides generic function to implement operations with file system structures. +The :c:func:`generic_drop_inode` and :c:func:`simple_statfs` functions used in the above code are such functions and can be used to implement the drivers if their functionality is sufficient. + +The :c:func:`ramfs_fill_super` function in the above code fills some fields in the superblock, then reads the root inode and allocates the root dentry. +Reading the root inode is done in the :c:func:`ramfs_get_inode` function, and consists of allocating a new inode using :c:func:`new_inode` and initializing it. In order to free the inode, :c:func:`iput` is used, and :c:func:`d_make_root` is used to allocate the root dentry. + +An example implementation for a disk file system is the :c:func:`minix_fill_super` function in the minix file system. +The functionality for the disk file system is similar to that of the virtual file system, with the exception of using the buffer cache. +Also, the minix file system keeps private data using the :c:type:`struct minix_sb_info` structure. +A large part of this function deals with the initialization of these private data (not included in the code snippet above for clarity). +The private data is allocated using the :c:func:`kzalloc` function and stored in the ``s_fs_info`` field of the superblock structure. + +VFS functions typically get as arguments the superblock, an inode and/or a dentry that contain a pointer to the superblock so that these private data can be easily accessed. + +.. _BufferCacheSection: + +Buffer cache +============ + +Buffer cache is a kernel subsystem that handles caching (both read and write) blocks from block devices. +The base entity used by cache buffer is the :c:type:`struct buffer_head` structure. +The most important fields in this structure are: + + * ``b_data``, pointer to a memory area where the data was read from or where the data must be written to + * ``b_size``, buffer size + * ``b_bdev``, the block device + * ``b_blocknr``, the number of block on the device that has been loaded or needs to be saved on the disk + * ``b_state``, the status of the buffer + +There are some important functions that work with these structures: + + * :c:func:`__bread`: reads a block with the given number and given size in a ``buffer_head`` structure; in case of success returns a pointer to the ``buffer_head`` structure, otherwise it returns ``NULL``; + * :c:func:`sb_bread`: does the same thing as the previous function, but the size of the read block is taken from the superblock, as well as the device from which the read is done; + * :c:func:`mark_buffer_dirty`: marks the buffer as dirty (sets the ``BH_Dirty`` bit); the buffer will be written to the disk at a later time (from time to time the ``bdflush`` kernel thread wakes up and writes the buffers to disk); + * :c:func:`brelse`: frees up the memory used by the buffer, after it has previously written the buffer on disk if needed; + * :c:func:`map_bh`: associates the buffer-head with the corresponding sector. + +Functions and useful macros +=========================== + +The super block typically contains a map of occupied blocks (by inodes, dentries, data) in the form of a bitmap (vector of bits). To work with such maps, it is recommend to use the following features: + + * :c:func:`find_first_zero_bit`, to find the first zero bit in a memory area. The size parameter means the number of bits in the search area; + * :c:func:`test_and_set_bit`, to set a bit and get the old value; + * :c:func:`test_and_clear_bit`, to delete a bit and get the old value; + * :c:func:`test_and_change_bit`, to invert the value of a bit and get the old value. + +The following macrodefinitions can be used to verify the type of an inode: + + * ``S_ISDIR`` (``inode->i_mode``) to check if the inode is a directory; + * ``S_ISREG`` (``inode->i_mode``) to check if the inode is a regular file (not a link or device file). + +Further reading +=============== + +#. Robert Love -- Linux Kernel Development, Second Edition -- Chapter + 12. The Virtual Filesystem +#. Understanding the Linux Kernel, 3rd edition - Chapter 12. The Virtual + Filesystem +#. `Linux Virtual File System (presentation)`_ +#. `Understanding Unix/Linux Filesystem`_ +#. `Creating Linux virtual filesystems`_ +#. `The Linux Documentation Project - VFS`_ +#. `The "Virtual File System" in Linux`_ +#. `A Linux Filesystem Tutorial`_ +#. `The Linux Virtual File System`_ +#. `Documentation/filesystems/vfs.txt`_ +#. `File systems sources`_ + +.. _Linux Virtual File System (presentation): http://www.coda.cs.cmu.edu/doc/talks/linuxvfs/ +.. _Understanding Unix/Linux Filesystem: http://www.cyberciti.biz/tips/understanding-unixlinux-file-system-part-i.html +.. _Creating Linux virtual filesystems: http://lwn.net/Articles/57369/ +.. _The Linux Documentation Project - VFS: http://www.tldp.org/LDP/tlk/fs/filesystem.html +.. _The "Virtual File System" in Linux: http://www.linux.it/~rubini/docs/vfs/vfs.html +.. _A Linux Filesystem Tutorial: http://inglorion.net/documents/tutorials/tutorfs/ +.. _The Linux Virtual File System: http://www.win.tue.nl/~aeb/linux/lk/lk-8.html +.. _Documentation/filesystems/vfs.txt: http://lxr.free-electrons.com/source/Documentation/filesystems/vfs.txt +.. _File systems sources: http://lxr.free-electrons.com/source/fs/ + +Exercises +========= + +.. include:: exercises-summary.hrst +.. |LAB_NAME| replace:: filesystems + + +myfs +---- + +To begin, we plan to get familiar with the interface exposed by the Linux kernel and the Virtual File System (VFS) component. That is why, for the beginning, we will work with a simple, virtual file system (i.e. without physical disk support). The file system is called ``myfs``. + +For this we will access the ``myfs/`` subdirectory in the laboratory skeleton. We will implement the superblock operations within this lab, and the next lab will continue with the inode operations. + +1. Register and unregister the myfs file system +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The first step in working with the file system is to register and unregister it. We want to do this for the file system described in ``myfs.c``. Check the file contents and follow the directions marked with ``TODO 1``. + +The steps you need to take are described in the section :ref:`RegisterUnregisterSection`. Use the ``"myfs"`` string for the file system name. + +.. note:: + Within the file system structure, use the ``myfs_mount`` function present in the code skeleton to fill the superblock (done when mounting). In ``myfs_mount`` call the function specific to a file system without disk support. As an argument for the specific mount function, use the function of type ``fill_super`` defined in the code skeleton. You can review the :ref:`FunctionsMountKillSBSection` section. + + To destroy the superblock (done at unmounting) use ``kill_litter_super``, also a function specific to a file system without disk support. The function is already implemented, you need to fill it in the :c:type:`struct file_system_type` structure. + + +After completing the sections marked with ``TODO 1`` , compile the module, copy it to the QEMU virtual machine, and start the virtual machine. Load the kernel module and then check the presence of the ``myfs`` file system within the ``/proc/filesystems`` file. + +At the moment, the file system is only registered, it does not expose operations to use it. If we try to mount it, the operation will fail. To try mounting, we create mount point ``/mnt/myfs/``. + +.. code-block:: console + + # mkdir -p /mnt/myfs + +and then we use the ``mount`` command: + +.. code-block:: console + + # mount -t myfs none /mnt/myfs + +The error message we get shows that we have not implemented the operations that work on the superblock. We will have to implement the operations on the superblock and initialize the root inode. We will do this further. + +.. note:: + + The ``none`` argument sent to the ``mount`` command indicates that we do not have a device from which to mount, the file system being a virtual one. Similarly, this is how the ``procfs`` or ``sysfs`` filesystems are mounted on Linux systems. + + +2. Completing myfs superblock +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To be able to mount the file system, we need to fill its superblock's fields, that is, a generic VFS structure of type :c:type:`struct super_block`. +We will fill out the structure within the :c:func:`myfs_fill_super` function; the superblock is represented by the variable ``sb`` passed as an argument to the function. +Follow the hints marked with ``TODO 2``. + +.. note:: + + To fill the ``myfs_fill_super`` function, you can start from the example in the section :ref:`FillSuperSection`. + + For the superblock structure fields, use the macros defined within the code skeleton wherever possible. + + +The ``s_op`` field in the superblock structure must be initialized to the superblock operations structures (type :c:type:`struct super_operations`). You need to define such a structure. + +For information on defining the :c:type:`struct super_operations` structure and filling the superblock, see the section :ref:`SuperblockSection`. + +.. note:: + + Initialize the ``drop_inode`` and ``statfs`` fields of :c:type:`struct super_operations` structure. + + +Although the superblock will be properly initialized at this time, the mount operation will continue to fail. +In order for the operation to be successfully completed, the root inode will have to be initialized, which we will do for the next exercise. + + +3. Initialize myfs root inode +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The root inode is the inode of the file system root directory (i.e. ``/``). +Initialization is done when the file system is mounted. +The ``myfs_fill_super`` function, called at mount, is the one that calls the ``myfs_get_inode`` function that creates and initializes an inode. +Typically, this function is used to create and initialize all inodes; In this exercise, however, we will only create the root inode. + +The :c:type:`inode` is allocated inside the ``myfs_get_inode`` function (local variable ``inode``, allocated using the :c:func:`new_inode` function call). + +To successfully complete mounting the file system, you will need to fill the ``myfs_get_inode`` function. Follow directions marked with ``TODO 3``. A starting point is the `ramfs_get_inode `_ function. + +.. note:: + + To initialize ``uid``, ``gid`` and ``mode`` , you can use the :c:func:`inode_init_owner` function as it is used in :c:func:`ramfs_get_inode`. + When you call :c:func:`inode_init_owner`, use ``NULL`` as the second parameter because there is no parent directory for the created inode. + + Initialize the ``i_atime``, ``i_ctime``, and ``i_mtime`` of the VFS inode to the value returned by the :c:func:`current_time` function. + + You will need to initialize the operations for the inode of type directory. To do this, follow the steps: + + #. Check if this is a directory type inode using the ``S_ISDIR`` macro. + #. For the ``i_op`` and ``i_fop`` fields, use kernel functions that are already implemented: + + * for ``i_op``: :c:type:`simple_dir_inode_operations`. + * for ``i_fop``: :c:type:`simple_dir_operations` + + #. Increase the number of links for the directory using the :c:func:`inc_nlink` function. + +4. Test myfs mount and unmount +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Now we can mount the filesystem. +Follow the steps above to compile the kernel module, copy to the virtual machine, and start the virtual machine, then insert the kernel module, create the mount point ``/mnt/myfs/``, and mount the file system. +We verify that the file system was mounted by inspecting the ``/proc/mounts`` file. + +What inode number does the ``/mnt/myfs`` directory have? Why? + +.. note:: + + To display the inode number of a directory, use the command: + + .. code-block:: console + + ls -di /path/to/directory + + where ``/path/to/directory/`` is the path to the directory whose inode number we want to display. + +We check myfs file system statistics using the following command: + +.. code-block:: console + + stat -f /mnt/myfs + +We want to see what the mount point ``/mnt/myfs`` contains and if we can create files. +For this we run the commands: + +.. code-block:: console + + # ls -la /mnt/myfs + # touch /mnt/myfs/a.txt + +We can see that we can not create the ``a.txt`` file on the file system. +This is because we have not implemented the operations to work with inodes in the :c:type:`struct super_operations` structure. +We will implement these operations within the next lab. + +Unmount the file system using the command + +.. code-block:: console + + umount /mnt/myfs + +Unload the kernel module corresponding to the file system as well. + +.. note:: + + To test the entire functionality, you can use the ``test-myfs.sh`` script: + + .. code-block:: console + + ./test-myfs.sh + + The script is copied to the virtual machine using ``make copy`` only if it is executable: + + .. code-block:: console + + student@workstation:~/linux/tools/labs$ chmod +x skels/filesystems/myfs/test-myfs.sh + + +.. note:: + + The statistics displayed for the file system are minimal because the information is provided by the simple_statfs function. + +minfs +----- + +Next, we will implement the basics of a very simple file system, called ``minfs``, with disk support. +We will use a disk in the virtual machine that we will format and mount with the ``minfs`` filesystem. + +For this we will access the ``minfs/kernel`` directory from the laboratory skeleton and work with the code in ``minfs.c``. +Just like ``myfs`` we will not implement the operations for working with inodes. We will just limit to working with the superblock and, therefore, mounting. +The rest of the operations will be implemented in the next lab. + +Follow the diagram below to clarify the role of structures within the ``minfs`` file system. + +.. image:: minfs.png + +1. Registering and unregister the minfs file system +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: + + Before solving the exercise, we need to add a disk to the virtual machine. To do this, generate a file that we will use as the disk image using the following command: + + .. code-block:: console + + dd if=/dev/zero of=mydisk.img bs=1M count=100 + + and add the ``-drive file=qemu/mydisk.img,if=virtio,format=raw`` argument to the ``qemu`` command in ``qemu/Makefile`` (in the ``QEMU_OPTS`` variable). + The new argument for the ``qemu`` command must be added after the one for the existing disk (``YOCTO_IMAGE``). + +To register and register the file system, you will need to fill the ``minfs_fs_type`` and ``minfs_mount`` functions in ``minfs.c``. Follow the directions marked with ``TODO 1``. + +.. note:: + + In the file system structure, for mount, use the ``minfs_mount`` function from in the code skeleton. + In this function, call the function to mount a file system with disk support (See the :ref:`FunctionsMountKillSBSection` section. Use :c:func:`mount_bdev`). + Choose the most suitable function for destroying the superblock (done at unmount); keep in mind that it is a file system with disk support. Use the :c:func:`kill_block_super` function. + + Initialize the ``fs_flags`` field of the :c:type:`minfs_fs_type` structure with the appropriate value for a file system with disk support. See the section :ref:`RegisterUnregisterSection`. + + The function for filling the superblock is ``minfs_fill_super``. + +After completing the sections marked with ``TODO 1``, compile the module, copy it into the QEMU virtual machine, and start the virtual machine. +Load the kernel module and then check the presence of the ``minfs`` file system within the ``/proc/filesystems`` file. + +To test the mounting of the ``minfs`` file system we will need to format the disk with its structure. Formatting requires the ``mkfs.minfs`` formatting tool from the ``minfs/user`` directory. The utility is automatically compiled when running ``make build`` and copied to the virtual machine at ``make copy``. + +After compiling, copying, and starting the virtual machine, format the ``/dev/vdb`` using the formatting utility: + +.. code-block:: console + + # ./mkfs.minfs /dev/vdb + +Load the kernel module: + +.. code-block:: console + + # insmod minfs.ko + +Create mount point ``/mnt/minfs/``: + +.. code-block:: console + + # mkdir -p /mnt/minfs/ + +and mount the filesystem + +.. code-block:: console + + # mount -t minfs /dev/vdX /mnt/minfs/ + +The operation fails because the root inode is not initialized. + +2. Completing minfs superblock +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To be able to mount the file system, you will need to fill the superblock (i.e a structure with type :c:type:`struct super_block`) within the ``minfs_fill_super`` function; it is the ``s`` argument of the function. +The structure of operations on the superblock is already defined: ``minfs_ops``. +Follow the directions marked with ``TODO 2``. You can also follow the implementation of the `minix_fill_super `_ function. + +.. note:: + + Some structures are found in the header file ``minfs.h``. + + For information on working with buffers, go to the :ref:`BufferCacheSection` section. + + Read the first block on the disk (block with index 0). + To read the block, use the :c:func:`sb_bread` function. + Cast the read data (the ``b_data`` field in the :c:type:`struct buffer_head` structure) to the structure storing the ``minfs`` superblock information on the disk: :c:type:`struct minfs_super_block`, defined in the source code file. + + Structure :c:type:`struct minfs_super_block` holds file system-specific information that is not found in the :c:type:`struct super_block` generic structure (in this case only version). + Those additional information (found in :c:type:`struct minfs_super_block` (on disk) but not in :c:type:`struct super_block` (VFS)) will be stored in the :c:type:`struct minfs_sb_info` structure. + +To check the functionality, we need a function for reading the root inode. +For the time being, use the ``myfs_get_inode`` function from ``myfs`` file system exercises. +Copy the function into the source code and call it the same as you did for myfs. +The second argument when calling the ``myfs_get_inode`` function is the inode creation permissions, similar to the virtual file system exercise (myfs). + +Validate the implementation by executing the commands from the previous exercise. + +3. Creating and destroying minfs inodes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For mounting, we need to initialize the root inode, and to get the root inode, we need to implement the functions to work with inodes. +That is, you need to implement the ``minfs_alloc_inode`` and ``minfs_destroy_inode`` functions. +Follow the directions marked with ``TODO 3``. You can use the :c:func:`minix_alloc_inode` and :c:func:`minix_destroy_inode` functions as a model. + +For the implementation, look at the macros and structures in the ``minfs.h`` header file. + +.. note:: + + For memory allocation/deallocation in ``minfs_alloc_inode`` and ``minfs_destroy_inode``, we recommend using :c:func:`kzalloc` and :c:func:`kfree`. + + In ``minfs_alloc_inode`` allocate structures with type :c:type:`struct minfs_inode_info`, but only return structures with type :c:type:`struct inode`, i.e. return those given by the ``vfs_inode`` field. + + In the ``minfs_alloc_inode`` function, call :c:func:`inode_init_once` to initialize the inode. + + In the ``destroy_inode`` function, you can access the structure with type :c:type:`struct minfs_inode_info` using the ``container_of`` macro. + +.. note:: + + In this exercise, you have implemented the ``minfs_alloc_inode`` and ``minfs_destroy_inode`` functions, but they are not yet called. The correctness of the implementation will be checked at the end of the next exercise. + +4. Initialize minfs root inode +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Initializing the root inode is required in order to mount the file system. +For this, you will need to complete the ``minfs_ops`` structure with the ``minfs_alloc_inode`` and ``minfs_destroy_inode`` functions and fill the ``minfs_iget`` function. + +The ``minfs_iget`` function is the function called to allocate a VFS inode (i.e. :c:type:`struct inode`) and fill it with minfs inode-specific information from the disk (i.e. ``struct minfs_inode``). + +Follow the directions marked with ``TODO 4``. +Fill out the ``alloc_inode`` and ``destroy_inode`` fields of :c:type:`struct super_operations` structure with the functions implemented in the previous step. + +The information about the root inode is found in the second block on the disk (the inode with index 1). +Make ``minfs_iget`` read the root minfs inode from the disk (:c:type:`struct minfs_inode`) and fill in the VFS inode (:c:type:`struct inode`). + +In the ``minfs_fill_super`` function, replace the ``myfs_get_inode`` call with the ``minfs_iget`` function call. + +.. note:: + To implement the ``minfs_iget`` function, follow the implementation of `V1_minix_iget `_. + To read a block, use the :c:func:`sb_bread` function. + Cast the read data (the ``b_data`` field of the :c:type:`struct structure_head` structure) to the minfs inode from the disk (:c:type:`struct minfs_inode`). + + The ``i_uid``, ``i_gid``, ``i_mode``, ``i_size`` must be filled in the VFS inode with the values in the minfs inode structure read from disk. + To initialize the ``i_uid`` and ``i_gid fields``, use the functions :c:func:`i_uid_write` , and :c:func:`i_gid_write`. + + Initialize the ``i_atime`` , ``i_atime``, and ``i_mtime`` fields of the VFS inode to the value returned by the :c:func:`current_time` function. + + You will need to initialize the operations for the inode with type directory. To do this, follow the steps: + + #. Check if this is a directory type inode using the ``S_ISDIR`` macro. + #. For the ``i_op`` and ``i_fop`` fields, use kernel functions already implemented: + + * for ``i_op``: :c:func:`simple_dir_inode_operations` . + * for ``i_fop``: :c:func:`simple_dir_operations` + + #. Increment the number of links for the directory using the :c:func:`inc_nlink` function. + +5. Testing of minfs mount and unmount +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Now we can mount the filesystem. +Follow the steps above to compile the kernel module, copy to the virtual machine, start the virtual machine, and then insert the kernel module, create mount point ``/mnt/minfs/`` and mount the file system. +We verify that the file system was mounted by investigating the ``/proc/mounts`` file. + +We check that everything is fine by listing the mount point contents ``/mnt/minfs/``: + +.. code-block:: console + + # ls /mnt/minfs/ + +After mount and verification, unmount the file system and unload the module from the kernel. + +.. note:: + Alternatively, to test the entire functionality, you can use the ``test-minfs.sh`` script: + + .. code-block:: console + + # ./test-minfs.sh + + The script is copied to the virtual machine when running the ``make copy`` command only if is executable. + + .. code-block:: console + + student@workstation:~/linux/tools/labs$ chmod +x skels/filesystems/minfs/user/test-minfs.sh + diff --git a/Documentation/teaching/labs/filesystems_part2.rst b/Documentation/teaching/labs/filesystems_part2.rst new file mode 100644 index 00000000000000..90d08995b13207 --- /dev/null +++ b/Documentation/teaching/labs/filesystems_part2.rst @@ -0,0 +1,1075 @@ +============================ +File system drivers (Part 2) +============================ + +Lab objectives +============== + + * Improving the knowledge about inode, file and dentry. + * Acquiring knowledge about adding support for working with regular files and directories in VFS (*Virtual File System*). + * Acquiring knowledge about the internal implementation of a file system. + +Inode +===== + +The inode is an essential component of a UNIX file system and, at the same time, an important component of VFS. An inode is a metadata (it has information about information). +An inode uniquely identifies a file on disk and holds information about it (uid, gid, access rights, access times, pointers to data blocks, etc.). +An important aspect is that an inode does not have information about the file name (it is retained by the associated :c:type:`struct dentry` structure). + +The inode refers to a file on the disk. To refer an open file (associated with a file descriptor within a process), the :c:type:`struct file` structure is used. +An inode can have any number of (zero or more) ``file`` structures associated (multiple processes can open the same file, or a process can open the same file several times). + +Inode exists both as a VFS entity (in memory) and as a disk entity (for UNIX, HFS, NTFS, etc.). +The inode in VFS is represented by the structure :c:type:`struct inode`. +Like the other structures in VFS, :c:type:`struct inode` is a generic structure that covers the options for all supported file types, even those that do not have an associated disk entity (such as FAT). + +The inode structure +------------------- + +The inode structure is the same for all file systems. In general, file systems also have private information. These are referenced through the ``i_private`` field of the structure. +Conventionally, the structure that keeps that particular information is called ``_inode_info``, where ``fsname`` represents the file system name. For example, minix and ext4 filesystems store particular information in structures :c:type:`struct minix_inode_info`, or :c:type:`struct ext4_inode_info`. + +Some of the important fields of :c:type:`struct inode` are: + + * ``i_sb`` : The superblock structure of the file system the inode belongs to. + * ``i_rdev``: the device on which this file system is mounted + * ``i_ino`` : the number of the inode (uniquely identifies the inode within the file system) + * ``i_blkbits``: number of bits used for the block size == log\ :sub:`2`\ (block size) + * ``i_mode``, ``i_uid``, ``i_gid``: access rights, uid, gid + + * ``i_size``: file/directory/etc. size in bytes + * ``i_mtime``, ``i_atime``, ``i_ctime``: change, access, and creation time + * ``i_nlink``: the number of names entries (dentries) that use this inode; for file systems without links (either hard or symbolic) this is always set to 1 + * ``i_blocks``: the number of blocks used by the file (all blocks, not just data); this is only used by the quota subsystem + * ``i_op``, ``i_fop``: pointers to operations structures: :c:type:`struct inode_operations` and :c:type:`struct file_operations`; ``i_mapping->a_ops`` contains a pointer to :c:type:`struct address_space_operations`. + * ``i_count``: the inode counter indicating how many kernel components use it. + +Some functions that can be used to work with inodes: + + * :c:func:`new_inode`: creates a new inode, sets the ``i_nlink`` field to 1 and initializes ``i_blkbits``, ``i_sb`` and ``i_dev``; + * :c:func:`insert_inode_hash`: adds the inode to the hash table of inodes; an interesting effect of this call is that the inode will be written to the disk if it is marked as dirty; + + .. warning:: + + An inode created with :c:func:`new_inode` is not in the hash table, and unless you have serious reasons not to, you must enter it in the hash table; + + * :c:func:`mark_inode_dirty`: marks the inode as dirty; at a later moment, it will be written on the disc; + * :c:func:`iget_locked`: loads the inode with the given number from the disk, if it is not already loaded; + * :c:func:`unlock_new_inode`: used in conjunction with :c:func:`iget_locked`, releases the lock on the inode; + * :c:func:`iput`: tells the kernel that the work on the inode is finished; if no one else uses it, it will be destroyed (after being written on the disk if it is maked as dirty); + * :c:func:`make_bad_inode`: tells the kernel that the inode can not be used; It is generally used from the function that reads the inode when the inode could not be read from the disk, being invalid. + +Inode operations +---------------- + +Getting an inode +^^^^^^^^^^^^^^^^ + +One of the main inode operations is obtaining an inode (the :c:type:`struct inode` in VFS). +Until version ``2.6.24`` of the Linux kernel, the developer defined a ``read_inode`` function. +Starting with version ``2.6.25``, the developer must define a ``_iget`` where ```` is the name of the file system. +This function is responsible with finding the VFS inode if it exists or creating a new one and filling it with the information from the disk. + +Generally, this function will call :c:func:`iget_locked` to get the inode structure from VFS. If the inode is newly created then it will need to read the inode from the disk (using :c:func:`sb_bread`) and fill in the useful information. + +An example of such a function is :c:func:`minix_iget`: + +.. code-block:: c + + static struct inode *V1_minix_iget(struct inode *inode) + { + struct buffer_head * bh; + struct minix_inode * raw_inode; + struct minix_inode_info *minix_inode = minix_i(inode); + int i; + + raw_inode = minix_V1_raw_inode(inode->i_sb, inode->i_ino, &bh); + if (!raw_inode) { + iget_failed(inode); + return ERR_PTR(-EIO); + ... + } + + struct inode *minix_iget(struct super_block *sb, unsigned long ino) + { + struct inode *inode; + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + if (INODE_VERSION(inode) == MINIX_V1) + return V1_minix_iget(inode); + ... + } + +The minix_iget function gets the VFS inode using :c:func:`iget_locked`. +If the inode is already existing (not new == the ``I_NEW`` flag is not set) the function returns. +Otherwise, the function calls the :c:func:`V1_minix_iget` function that will read the inode from the disk using :c:func:`minix_V1_raw_inode` and then complete the VFS inode with the read information. + +Superoperations +^^^^^^^^^^^^^^^ + +Many of the superoperations (components of the :c:type:`struct super_operations` structure used by the superblock) are used when working with inodes. These operations are described next: + + * ``alloc_inode``: allocates an inode. + Usually, this funcion allocates a :c:type:`struct _inode_info` structure and performs basic VFS inode initialization (using :c:func:`inode_init_once`); + minix uses for allocation the :c:func:`kmem_cache_alloc` function that interacts with the SLAB subsystem. + For each allocation, the cache construction is called, which in the case of minix is the :c:func:`init_once` function. + Alternatively, :c:func:`kmalloc` can be used, in which case the :c:func:`inode_init_once` function should be called. + The :c:func:`alloc_inode` function will be called by the :c:func:`new_inode` and :c:func:`iget_locked` functions. + * ``write_inode`` : saves/updates the inode received as a parameter on disk; to update the inode, though inefficient, for beginners it is recommended to use the following sequence of operations: + + * load the inode from the disk using the :c:func:`sb_bread` function; + * modify the buffer according to the saved inode; + * mark the buffer as dirty using :c:func:`mark_buffer_dirty`; the kernel will then handle its writing on the disk; + * an example is the :c:func:`minix_write_inode` function in the ``minix`` file system + + * ``evict_inode``: removes any information about the inode with the number received in the ``i_ino`` field from the disk and memory (both the inode on the disk and the associated data blocks). This involves performing the following operations: + + * delete the inode from the disk; + * updates disk bitmaps (if any); + * delete the inode from the page cache by calling :c:func:`truncate_inode_pages`; + * delete the inode from memory by calling :c:func:`clear_inode` ; + * an example is the :c:func:`minix_evict_inode` function from the minix file system. + + * ``destroy_inode`` releases the memory occupied by inode + +inode_operations +^^^^^^^^^^^^^^^^ + +The inode operations are described by the :c:type:`struct inode_operations` structure. + +Inodes are of several types: file, directory, special file (pipe, fifo), block device, character device, link etc. +For this reason, the operations that an inode needs to implement are different for each type of inode. +Below are detailed operations for a :ref:`file type inode ` and a :ref:`directory inode `. + +The operations of an inode are initialized and accessed using the ``i_op`` field of the structure :c:type:`struct inode`. + +The file structure +================== + +The ``file`` structure corresponds to a file open by a process and exists only in memory, being associated with an inode. +It is the closest VFS entity to user-space; the structure fields contain familiar information of a user-space file (access mode, file position, etc.) and the operations with it are performed by known system calls (``read``, ``write`` , etc.). + +The file operations are described by the :c:type:`struct file_operations` structure. + +The file operations for a file system are initialized using the ``i_fop`` field of the :c:type:`struct inode` structure. +When opening a file, the VFS initializes the ``f_op`` field of the :c:type:`struct file` structure with address of ``inode->i_fop``, such that subsequent system calls use the value stored in the ``file->f_op``. + +.. _FileInodes: + +Regular files inodes +==================== + +To work with the inode, the ``i_op`` and ``i_fop`` fields of the inode structure must be filled in. +The type of the inode determines the operations that it needs to implement. + +.. _FileOperations: + +Regular files inode operations +------------------------------ + +In the ``minix`` file system, the ``minix_file_inode_operations`` structure is defined for the operations on an inode and for the file operations the ``minix_file_operations structure`` is defined: + +.. code-block:: c + + const struct file_operations minix_file_operations = { + .llseek = generic_file_llseek, + .read_iter = generic_file_read_iter, + //... + .write_iter = generic_file_write_iter, + //... + .mmap = generic_file_mmap, + //... + }; + + const struct inode_operations minix_file_inode_operations = { + .setattr = minix_setattr, + .getattr = minix_getattr, + }; + + //... + if (S_ISREG(inode->i_mode)) { + inode->i_op = &minix_file_inode_operations; + inode->i_fop = &minix_file_operations; + } + //... + + + +The functions :c:func:`generic_file_llseek` , :c:func:`generic_file_mmap` , :c:func:`generic_file_read_iter` and :c:func:`generic_file_write_iter` are implemented in the kernel. + +For simple file systems, only the truncation operation (``truncate`` system call) must be implemented. +Although initially there was a dedicated operation, starting with 3.14 the operation was embedded in ``setattr``: if the paste size is different from the current size of the inode, then a truncate operation must be performed. +An example of implementing this verification is in the :c:func:`minix_setattr` function: + +.. code-block:: c + + static int minix_setattr(struct dentry *dentry, struct iattr *attr) + { + struct inode *inode = d_inode(dentry); + int error; + + error = setattr_prepare(dentry, attr); + if (error) + return error; + + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + error = inode_newsize_ok(inode, attr->ia_size); + if (error) + return error; + + truncate_setsize(inode, attr->ia_size); + minix_truncate(inode); + } + + setattr_copy(inode, attr); + mark_inode_dirty(inode); + return 0; + } + +The truncate operation involves: + + * freeing blocks of data on the disk that are now extra (if the new dimension is smaller than the old one) or allocating new blocks (for cases where the new dimension is larger) + * updating disk bit maps (if used); + * updating the inode; + * filling with zero the space that was left unused from the last block using the :c:func:`block_truncate_page` function. + +An example of the implementation of the cropping operation is the :c:func:`minix_truncate` function in the ``minix`` file system. + +.. _AddressSpaceOperations: + +Address space operations +------------------------ + +There is a close link between the address space of a process and files: the execution of the programs is done almost exclusively by mapping the file into the process address space. +Because this approach works very well and is quite general, it can also be used for regular system calls such as ``read`` and ``write``. + +The structure that describes the address space is :c:type:`struct address_space`, and the operations with it are described by the structure :c:type:`struct address_space_operations`. To initialize the address space operations, fill ``inode->i_mapping->a_ops`` of the file type inode. + +An example is the ``minix_aops`` structure in the minix file system: + +.. code-block:: c + + static const struct address_space_operations minix_aops = { + .readpage = minix_readpage, + .writepage = minix_writepage, + .write_begin = minix_write_begin, + .write_end = generic_write_end, + .bmap = minix_bmap + }; + + //... + if (S_ISREG(inode->i_mode)) { + inode->i_mapping->a_ops = &minix_aops; + } + //... + +The :c:func:`generic_write_end` function is already implemented. +Most of the specific functions are very easy to implement, as follows: + +.. code-block:: c + + static int minix_writepage(struct page *page, struct writeback_control *wbc) + { + return block_write_full_page(page, minix_get_block, wbc); + } + + static int minix_readpage(struct file *file, struct page *page) + { + return block_read_full_page(page, minix_get_block); + } + + static void minix_write_failed(struct address_space *mapping, loff_t to) + { + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + truncate_pagecache(inode, inode->i_size); + minix_truncate(inode); + } + } + + static int minix_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) + { + int ret; + + ret = block_write_begin(mapping, pos, len, flags, pagep, + minix_get_block); + if (unlikely(ret)) + minix_write_failed(mapping, pos + len); + + return ret; + } + + static sector_t minix_bmap(struct address_space *mapping, sector_t block) + { + return generic_block_bmap(mapping, block, minix_get_block); + } + +All that needs to be done is to implement :c:type:`minix_get_block`, which has to translate a block of a file into a block on the device. +If the flag ``create`` received as a parameter is set, a new block must be allocated. +In case a new block is created, the bit map must be updated accordingly. +To notify the kernel not to read the block from the disk, ``bh`` must be marked with :c:func:`set_buffer_new`. The buffer must be associated with the block through :c:func:`map_bh`. + +Dentry structure +================ + +Directories operations use the :c:type:`struct dentry` structure. +Its main task is to make links between inodes and filenames. +The important fields of this structure are presented below: + +.. code-block:: c + + struct dentry { + //... + struct inode *d_inode; /* associated inode */ + //... + struct dentry *d_parent; /* dentry object of parent */ + struct qstr d_name; /* dentry name */ + //... + + struct dentry_operations *d_op; /* dentry operations table */ + struct super_block *d_sb; /* superblock of file */ + void *d_fsdata; /* filesystem-specific data */ + //... + }; + +Fields meaning: + + * ``d_inode``: the inode referenced by this dentry; + * ``d_parent``: the dentry associated with the parent directory; + * ``d_name``: a :c:type:`struct qstr` structure that contains the fields ``name`` and ``len`` (the name and the length of the name). + * ``d_op``: operations with dentries, represented by the :c:type:`struct dentry_operations` structure. + The kernel implements default operations so there is no need to (re)implement them. Some file systems can do optimizations based on the specific structure of the dentries. + * ``d_fsdata``: field reserved for the file system that implements dentry operations; + +Dentry operations +----------------- + +The most commonly operations applied to dentries are: + + * ``d_make_root``: allocates the root dentry. It is generally used in the function that is called to read the superblock (``fill_super``), which must initialize the root directory. + So the root inode is obtained from the superblock and is used as an argument to this function, to fill the ``s_root`` field from the :c:type:`struct super_block` structure. + * ``d_add``: associates a dentry with an inode; the dentry received as a parameter in the calls discussed above signifies the entry (name, length) that needs to be created. This function will be used when creating/loading a new inode that does not have a dentry associated with it and has not yet been introduced to the hash table of inodes (at ``lookup``); + * ``d_instantiate``: The lighter version of the previous call, in which the dentry was previously added in the hash table. + +.. warning:: + + ``d_instantiate`` must be used to implement create calls (``mkdir``, ``mknod``, ``rename``, ``symlink``) and NOT ``d_add``. + +.. _DirectoryInodes: + +Directory inodes operations +=========================== + +The operations for directory type inodes have a higher complexity level than the ones for files. +The developer must define operations for inodes and operations for files. +In ``minix``, these operations are defined in :c:type:`minix_dir_inode_operations` and :c:type:`minix_dir_operations`: + +.. code-block:: c + + struct inode_operations minix_dir_inode_operations = { + .create = minix_create, + .lookup = minix_lookup, + .link = minix_link, + .unlink = minix_unlink, + .symlink = minix_symlink, + .mkdir = minix_mkdir, + .rmdir = minix_rmdir, + .mknod = minix_mknod, + //... + }; + + struct file_operations minix_dir_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .iterate = minix_readdir, + //... + }; + + //... + if (S_ISDIR(inode->i_mode)) { + inode->i_op = &minix_dir_inode_operations; + inode->i_fop = &minix_dir_operations; + inode->i_mapping->a_ops = &minix_aops; + } + //... + +The only function already implemented is :c:func:`generic_read_dir`. + +The functions that implement the operations on directory inodes are the ones described below. + +Creating an inode +----------------- + +The inode creation function is indicated by the field ``create`` in the ``inode_operations`` structure. +In the minix case, the function is :c:func:`minix_create`. +This function is called by the ``open`` and ``creat`` system calls. Such a function performs the following operations: + + #. Introduces a new entry into the physical structure on the disk; the update of the bit maps on the disk must not be forgotten. + #. Configures access rights to those received as a parameter. + #. Marks the inode as dirty with the :c:func:`mark_inode_dirty` function. + #. Instantiates the directory entry (``dentry``) with the ``d_instantiate`` function. + +Creating a directory +-------------------- + +The directory creation function is indicated by the ``mkdir`` field in the ``inode_operations`` structure. +In the minix case, the function is :c:func:`minix_mkdir`. +This function is called by the ``mkdir`` system call. Such a function performs the following operations: + + #. Calls :c:func:`minix_create`. + #. Allocates a data block for the directory. + #. Creates the ``"."`` and ``".."`` entries. + +Creating a link +--------------- + +The link creation function (hard link) is indicated by the ``symlink`` field in the ``inode_operations`` structure. +In the minix case, the function is :c:func:`minix_link`. +This function is called by the ``link`` system call. Such a function performs the following operations: + + * Binds the new dentry to the inode. + * Increments the ``i_nlink`` field of the inode. + * Marks the inode as dirty using the :c:func:`mark_inode_dirty` function. + +Creating a symbolic link +------------------------ + +The symbolic link creation function is indicated by the ``symlink`` field in the ``inode_operations`` structure. +In the minix case, the function is :c:func:`minix_symlink`. +The operations to be performed are similar to ``minix_link`` with the differences being given by the fact that a symbolic link is created. + +Deleting a link +--------------- + +The link delete function (hard link) is indicated by the ``unlink`` field in the ``inode_operations`` structure. +In the minix case, the function is :c:func:`minix_unlink`. +This function is called by the ``unlink`` system call. Such a function performs the following operations: + + #. Deletes the directory entry given as a parameter from the physical disk structure. + #. Decrements the ``i_nlink`` counter of the inode to which the entry points (otherwise the inode will never be deleted). + +Deleting a directory +-------------------- + +The directory delete function is indicated by the ``rmdir`` field in the ``inode_operations`` structure. +In the minix case, the function is :c:func:`minix_rmdir`. +This function is called by the ``rmdir`` system call. +Such a function performs the following operations: + + #. Performs the operations done by ``minix_unlink``. + #. Ensures that the directory is empty; otherwise, returns ``ENOTEMPTY``. + #. Also deletes the data blocks. + +Searching for an inode in a directory +------------------------------------- + +The function that searches for an entry in a directory and extracts the inode is indicated by the ``lookup`` field in the ``inode_operations`` structure. +In the minix case, the function is ``minix_lookup``. +This function is called indirectly when information about the inode associated with an entry in a directory is needed. +Such a function performs the following operations: + + #. Searces in the directory indicated by ``dir`` the entry having the name ``dentry->d_name.name``. + #. If the entry is found, it will return ``NULL`` and associate the inode with the name using the :c:func:`d_add` function. + #. Otherwise, returns ``ERR_PTR``. + +Iterating through entries in a directory +---------------------------------------- + +The function which iterates through the entries in a directory (lists the directory contents) is indicated by the field ``iterate`` in the ``struct file_operations`` structure. +In the minix case, the function is ``minix_readdir``. +This function is called by the ``readdir`` system call. + +The function returns either all entries in the directory or just a part when the buffer allocated for it is not available. +A call of this function can return: + + * a number equal to the existing number of entries if there is enough space in the corresponding user space buffer; + * a number smaller than the actual number of entries, as much as there was space in the corresponding user space buffer; + * ``0``, where there are no more entries to read. + +The function will be called consecutively until all available entries are read. The function is called at least twice. + + * It is only called twice if: + + * the first call reads all entries and returns their number; + * the second call returns 0, having no other entries to read. + + * It is called more than twice if the first call does not return the total number of entries. + +The function performs the following operations: + + #. Iterates over the entries (the dentries) from the current directory. + #. For each dentry found, increments ``ctx->pos``. + #. For each valid dentry (an inode other than ``0``, for example), calls the :c:func:`dir_emit` function. + #. If the :c:func:`dir_emit` function returns a value other than zero, it means that the buffer in the user space is full and the function returns. + +The arguments of the ``dir_emit`` function are: + + * ``ctx`` is the directory iteration context, passed as an argument to the ``iterate`` function; + * ``name`` is the name of the entry (a string of characters); + * ``name_len`` is the length of the entry name; + * ``ino`` is the inode number associated with the entry; + * ``type`` identifies the entry type: ``DT_REG`` (file), ``DT_DIR`` (directory), ``DT_UNKNOWN`` etc. ``DT_UNKNOWN`` can be used when the entry type is unknown. + +.. _BitmapOperations: + +Bitmap operations +================= + +When working with the file systems, management information (what block is free or busy, what inode is free or busy) is stored using bitmaps. +For this we often need to use bit operations. Such operations are: + + * searching the first 0 bit: representing a free block or inode + * marking a bit as 1: marking a busy block or inode + +The bitmap operations are found in headers from ``include/asm-generic/bitops``, especially in ``find.h`` and ``atomic.h``. Usual functions, with names indicating their role, are: + + * :c:func:`find_first_zero_bit` + * :c:func:`find_first_bit` + * :c:func:`set_bit` + * :c:func:`clear_bit` + * :c:func:`test_and_set_bit` + * :c:func:`test_and_clear_bit` + +These functions usually receive the address of the bitmap, possibly its size (in bytes) and, if necessary, the index of the bit that needs to be activated (set) or deactivated (clear). + +Some usage examples are listed below: + +.. code-block:: c + + unsigned int map; + unsigned char array_map[NUM_BYTES]; + size_t idx; + int changed; + + /* Find first zero bit in 32 bit integer. */ + idx = find_first_zero_bit(&map, 32); + printk (KERN_ALERT "The %zu-th bit is the first zero bit.\n", idx); + + /* Find first one bit in NUM_BYTES bytes array. */ + idx = find_first_bit(array_map, NUM_BYTES * 8); + printk (KERN_ALERT "The %zu-th bit is the first one bit.\n", idx); + + /* + * Clear the idx-th bit in integer. + * It is assumed idx is less the number of bits in integer. + */ + clear_bit(idx, &map); + + /* + * Test and set the idx-th bit in array. + * It is assumed idx is less the number of bits in array. + */ + changed = __test_and_set_bit(idx, &sbi->imap); + if (changed) + printk(KERN_ALERT "%zu-th bit changed\n", idx); + +Further reading +=============== + +#. Robert Love -- Linux Kernel Development, Second Edition -- Chapter + 12. The Virtual Filesystem +#. Understanding the Linux Kernel, 3rd edition - Chapter 12. The Virtual + Filesystem +#. `Linux Virtual File System (presentation)`_ +#. `Understanding Unix/Linux Filesystem`_ +#. `Creating Linux virtual filesystems`_ +#. `The Linux Documentation Project - VFS`_ +#. `The "Virtual File System" in Linux`_ +#. `A Linux Filesystem Tutorial`_ +#. `The Linux Virtual File System`_ +#. `Documentation/filesystems/vfs.txt`_ +#. `File systems sources`_ + +.. _Linux Virtual File System (presentation): http://www.coda.cs.cmu.edu/doc/talks/linuxvfs/ +.. _Understanding Unix/Linux Filesystem: http://www.cyberciti.biz/tips/understanding-unixlinux-file-system-part-i.html +.. _Creating Linux virtual filesystems: http://lwn.net/Articles/57369/ +.. _The Linux Documentation Project - VFS: http://www.tldp.org/LDP/tlk/fs/filesystem.html +.. _The "Virtual File System" in Linux: http://www.linux.it/~rubini/docs/vfs/vfs.html +.. _A Linux Filesystem Tutorial: http://inglorion.net/documents/tutorials/tutorfs/ +.. _The Linux Virtual File System: http://www.win.tue.nl/~aeb/linux/lk/lk-8.html +.. _Documentation/filesystems/vfs.txt: http://lxr.free-electrons.com/source/Documentation/filesystems/vfs.txt +.. _File systems sources: http://lxr.free-electrons.com/source/fs/ + +Exercises +========= + +.. include:: exercises-summary.hrst +.. |LAB_NAME| replace:: filesystems + +.. important:: + + In this lab, we will continue the implementation of the file systems started in the previous one. + For this, we will generate the laboratory skeleton using the following command: + + .. code-block:: console + + TODO=5 LABS=filesystems make skels + + After this, we will start the implementation from ``TODO 5``. + +myfs +---- + +For the exercises below, we will use the ``myfs`` file system whose implementation we started with the previous lab. +We stopped after mounting the file system and now we will continue with the operations for regular files and directories. +At the end of these exercises, we will be able to create, modify and delete regular directories and files. + +We will mostly use the ``inode`` and ``dentry`` VFS structures. +The ``inode`` structure defines a file (of any type: regular, directory, link), while the ``dentry`` structure defines a name, which is an entry in a directory. + +For this we will access the ``myfs/kernel`` directory in the lab skeleton. +The previously generated skeleton contains the solution for the previous lab; we will start from this. As in the previous lab, we will use the ``ramfs`` file system as a starting point. + +1. Directory operations +^^^^^^^^^^^^^^^^^^^^^^^ + +To begin with, we will implement the operations for working with directories. +The operations of creating a file or deleting a file are also directory operations; these operations result in adding or deleting a directory entry (*dentry*). + +At the end of this exercise we will be able to create and delete entries in the file system. We will not be able to read and write to regular files; we will do so in the next exercise. + +Follow directions marked with ``TODO 5`` which will guide you through the steps you need to take. + +You will need to specify the following directory operations: + + * create a file (``create`` function) + * search (``lookup`` function) + * link (``link`` function) + * create directory (``mkdir`` function) + * deletion (``rmdir`` and ``unlink`` functions) + * create node (``mknod``) + * rename (``rename`` function) + +For this, define the ``myfs_dir_inode_operations`` structure in the code, where marked with ``TODO 5``. +To begin, just define the structure ``myfs_dir_inode_operations``; you will define the structures ``myfs_file_operations``, ``myfs_file_inode_operations`` , and ``myfs_aops`` in the next exercise. + +.. tip:: + + Read the section :ref:`DirectoryInodes` + + As a model, you are following the ``ramfs_dir_inode_operations`` structure. + +Implement the ``mkdir``, ``mknod`` and ``create`` operations inside ``myfs_mkdir``, ``myfs_mknod`` and ``myfs_create``. +These operations will allow you to create directories and files in the file system. + +.. tip:: + + We recommend making the code modular using a ``mknod`` function, which you can also use for the next exercise. + For inode reading and allocation, use ``myfs_get_inode``, which is already implemented. + + As a model, follow the next functions implemented in the ``ramfs`` file system: + + * :c:func:`ramfs_mknod` + * :c:func:`ramfs_mkdir` + * :c:func:`ramfs_create` + +For the other functions, use generic calls (``simple_*``) already defined in VFS. + +In the ``myfs_get_inode`` function, initialize the operations fields of the directory inodes: + + * ``i_op`` must be initialized to the address of the structure ``myfs_dir_inode_operations``; + * ``i_fop`` must be initialized to the address of the structure ``simple_dir_operations``, defined in VFS. + +.. note:: + + ``i_op`` is a pointer to a structure of type :c:type:`struct inode_operations` containing operations that have to do with the inode, which are, for a directory, creating a new entry, listing entries, deleting entries, etc. + + ``i_fop`` is a pointer to a structure of type :c:type:`struct file_operations` containing operations that have to do with the ``file`` structure associated with the inode, such as ``read``, ``write``, and ``lseek``. + +Testing +""""""" + +Once the module is done, we can test the creation of files and directories. +To do this, we compile the kernel module (using ``make build``) and copy the resulting file (``myfs.ko``) and the test scripts (``test-myfs-{1,2}.sh``) in the virtual machine directory (using ``make copy``). + +.. note:: + + The test scripts are copied to the virtual machine using ``make copy`` only if they are executable: + + .. code-block:: console + + student@workstation:~/linux/tools/labs$ chmod +x skels/filesystems/myfs/test-myfs-*.sh + +After starting the virtual machine, insert the module, create the mount point and mount the file system: + +.. code-block:: console + + # insmod myfs.ko + # mkdir -p /mnt/myfs + # mount -t myfs none /mnt/myfs + +Now we can create file hierarchies and subdirectories in the mounted directory (``/mnt/myfs``). +We use commands like the ones below: + +.. code-block:: console + + # touch /mnt/myfs/peanuts.txt + # mkdir -p /mnt/myfs/mountain/forest + # touch /mnt/myfs/mountain/forest/tree.txt + # rm /mnt/myfs/mountain/forest/tree.txt + # rmdir /mnt/myfs/mountain/forest + +At this time we can not read or write files. When running commands such as the following ones we will get errors. + +.. code-block:: console + + # echo "chocolate" > /mnt/myfs/peanuts.txt + # cat /mnt/myfs/peanuts.txt + +This happens because we have not implemented the operations for working with files; we will do so further. + +To unload the kernel module, use the command + +.. code-block:: console + + rmmod myfs + +To test the functionality provided by the kernel module, we can use the dedicated script ``test-myfs-1.sh``. +If the implementation is correct, no error messages will be displayed. + +2. File operations +^^^^^^^^^^^^^^^^^^ + +We want to implement the operations for working with files, which are used for accessing a file's content: read, write, truncate, etc. +For this you will specify the operations described in the structures :c:type:`struct inode_operations`, :c:type:`struct file_operations` and :c:type:`struct address_space_operations`. + +Follow the locations marked with ``TODO`` 6 which will guide you through the steps you need to take. + +Start by defining ``myfs_file_inode_operations`` and ``myfs_file_operations``. + +.. tip:: + + Read the section :ref:`FileOperations`. + + Use the generic function provided by VFS. + + An example of implementation is the ``ramfs`` file system. + Follow the implementation of ``ramfs_file_inode_operations`` and ``ramfs_file_operations``. + +Inside the function ``myfs_get_inode``, initialize the operations fields for the regular file inodes: + + * ``i_op`` must be initialized to ``myfs_file_inode_operations``; + * ``i_fop`` msust be initialized to ``myfs_file_operations``. + +Continue with defining the structure ``myfs_aops``. + +.. tip:: + + Read the section :ref:`AddressSpaceOperations`. + + Use the generic functions provided by VFS. + + An implementation example is the ``ramfs`` file system: the ``ramfs_aops`` structure. + + You do not need to define the function of type ``set_page_dirty``. + +Initialize the ``i_mapping->a_ops`` field of the inode structure to ``myfs_aops``. + +Testing +""""""" + +For testing, we use the steps described in the previous exercise. +In addition to those steps, we will now be able to read, write and modify a file using commands like the ones below: + +.. code-block:: console + + # echo "chocolate" > /mnt/myfs/peanuts.txt + # cat /mnt/myfs/peanuts.txt + +To test the functionality provided by the module, we can use the dedicated script: + +.. code-block:: console + + # ./test-myfs-2.sh + +If the implementation is correct, no error messages will be displayed when running the above script. + +minfs +----- + +For the exercises below, we will use the minfs file system whose development we started in the previous lab. +This is a file system with disk support. +We stopped after mounting the file system and now we will continue with the operations on regular files and directories. +At the end of these exercises we will be able to create and delete entries in the file system. + +We will mainly use the :c:type:`inode` and :c:type:`dentry` VFS structures. +The inode structure defines a file (of any type: regular, directory, link), while the dentry structure defines a name, which is a directory entry. + +For this we will access the ``minfs/kernel`` directory from the laboratory skeleton. +The generated skeleton contains the solution from the previous lab; we will start from this. +As in the previous lab, we will use the ``minix`` file system as a starting point. + +We will use the formatting tool ``mkfs.minfs`` in the ``minfs/user`` directory which is automatically compiled when running ``make build`` and copied to the virtual machine at ``make copy``. + +The formatting tool prepares a virtual machine disk using a command like + +.. code-block:: console + + # ./mkfs.minfs /dev/vdb + +After formatting, the disk has a structure like the one in the diagram below: + +.. image:: minfs_arch.png + +As shown in the diagram, ``minfs`` is a minimalist file system. +``minfs`` contains a maximum of 32 inodes, each inode having a single data block (the file size is limited to block size). +The super block contains a 32-bit map (``imap``), each bit indicating the use of an inode. + +.. note:: + + Before you start working, go through the ``minfs/kernel/minfs.h`` header file. + This file contains the structures and macros that will be used in these exercises. + These structures and macros define the file system as described in the diagram above. + +1. Iterate operation +^^^^^^^^^^^^^^^^^^^^ + +At first we want to be able to list the contents of the root directory. +For this we must be able to read the entries in the root directory, which means implementing the ``iterate`` operation. +The ``iterate`` operation is a field within the ``minfs_dir_operations`` structure (of type ``file_operations``) and is implemented by the function ``minfs_readdir``. We need to implement this function. + +Follow directions marked with ``TODO 5`` which will guide you through the steps you need to take. + +.. tip:: + + Read the section :ref:`DirectoryInodes` + + As a starting point, follow the :c:func:`minix_readdir` function. + The function is rather complicated, but it gives you an insight into the steps you have to do. + + Follow, in ``minfs.c`` and ``minfs.h``, the definitions of structures ``struct minfs_inode_info``, ``struct minfs_inode`` and ``struct minfs_dir_entry``. + You will use them in the ``minfs_readdir`` implementation. + +Obtain the inode and the structure ``struct minfs_inode_info`` associated with the directory. +The structure ``struct minfs_inode_info`` is useful to find out the directory's data block. +From this structure you get the ``data_block`` field, representing the data block index on the disk. + +.. tip:: + + To get the structure ``struct minfs_inode_info`` structure, use :c:func:`list_entry` or :c:func:`container_of`. + +Use :c:func:`sb_bread` to read the directory data block. + +.. tip:: + + The data block of the directory is indicated by the ``data_block`` field of the structure ``struct minfs_inode_info`` corresponding to the directory. + + The data in the block is referenced by the ``b_data`` field of the ``buffer_head`` structure (the usual code will be ``bh->b_data``). + This block (being the data block of a directory) contains an array of at most ``MINFS_NUM_ENTRIES`` entries of type ``struct minfs_dir_entry`` (directory entries specific to ``minfs``). + Use casting to ``struct minfs_dir_entry *`` to work with the data in the block. + +Iterate over all the entries in the data block and fill the user space buffer inside the ``for`` loop. + +.. tip:: + + For each index, get the corresponding entry of the ``struct minfs_dir_entry`` by using pointer arithmetics on the ``bh->b_data`` field. + Ignore dentries that have an ``ino`` field equal to 0. Such a dentry is a free slot in the director's dentry list. + + For each valid entry, there is an existing call :c:func:`dir_emit` with the appropriate parameters. This is the call that sends the dentries to the caller (and then to user space). + + Check the call examples in :c:func:`qnx6_readdir` and :c:func:`minix_readdir`. + +Testing +""""""" + +Once the module is done, we can test the listing of the root directory contents. +To do this, we compile the kernel module (``make build``) and copy the result to the virtual machine together with the test scripts (``minfs/user/test-minfs-{0,1}.sh``) and the formatting utility (``minfs/user/mkfs.minfs``) using ``make copy``, then start the machine. + +.. note:: + + The test scripts are copied to the virtual machine only if they are executable: + + .. code-block:: console + + student@eg106:~/src/linux/tools/labs$ chmod +x skels/filesystems/minfs/user/test-minfs*.sh + +After we start the virtual machine, we format the ``/dev/vdb`` disk, create the mount point and mount the file system: + +.. code-block:: console + + # ./mkfs.minfs /dev/vdb + # mkdir -p /mnt/minfs + # mount -t minfs /dev/vdb /mnt/minfs + +Now we can list the contents of the root directory: + +.. code-block:: console + + # ls -l /mnt/minfs + +We notice that there is already a file (``a.txt``); it is created by the formatting utility. + +We also notice that we are not allowed to display information for a file using the ``ls`` command. +This is because we have not implemented the ``lookup`` function. We will implement it in the next exercise. + +To test the functionality provided by the module, we can use the dedicated script: + +.. code-block:: console + + # ./test-minfs-0.sh + # ./test-minfs-1.sh + +2. Lookup operation +^^^^^^^^^^^^^^^^^^^ + +To properly list the contents of a directory, we need to implement the search functionality, ie the ``lookup`` operation. +The ``lookup`` operation is a field within the ``minfs_dir_inode_operations`` structure (of type ``inode_operations``) and is implemented by the ``minfs_lookup`` function. +This function (``minfs_lookup``) needs to be implemented. +We will actually implement the ``minfs_find_entry`` function called by ``minfs_lookup`` . + +Follow directions marked with ``TODO 6`` which will tell you the steps you need to take. + +.. tip:: + + Read the section :ref:`DirectoryInodes` + + As a starting point, read the functions :c:func:`qnx6_find_entry` and :c:func:`minix_find_entry`. + +In the ``minfs_find_entry`` function, iterate over the directory where the dentry is: ``dentry->d_parent->d_inode``. +Iterating means going through the entries in the directory's data block (of type ``struct minfs_dir_entry``) and locate, if it exists, the requested entry. + +.. tip:: + + From the structure of type ``struct minfs_inode_info`` corresponding to the directory, find out the data block index and read it (``sb_read``). + You will access the block contents using ``bh->b_data``. + The directory data block contains an array of at most ``MINFS_NUM_ENTRIES`` entries of type ``struct minfs_dir_entry``. + Use pointer arithmetics to get entries of type ``struct minfs_dir_entry`` from the data block (``bh->b_data``). + + Check the presence of the name (stored in the local variable ``name``) in the directory (if there is an entry in the data block whose name is a string equal to the given name). Use :c:func:`strcmp` to verify. + + Ignore dentries that have an ``ino`` field equal to ``0``. Those dentries are free slots in the directory dentry list. + + Store in the ``final_de`` variable the dentry found. + If you do not find any dentry, then the ``final_de`` variable will have the value ``NULL``, the value with which it was initialized. + +Comment the ``simple_lookup`` call in the ``minfs_lookup`` function to invoke the implementation of ``minfs_readdir``. + +Testing +""""""" + +For testing, we use the steps described in the previous exercise. +The long file listing (``ls -l``) of the contents of a directory (root directory) will display permissions and other file-specific information: + +.. code-block:: console + + # ls -l /mnt/minfs + +To test the functionality provided by the module, we can use the dedicated scripts: + +.. code-block:: console + + # ./test-minfs-0.sh + # ./test-minfs-1.sh + +If the implementation is correct, no error messages will be displayed when running the scripts above. + +.. note:: + + After mounting the file system using the command + + .. code-block:: console + + # mount -t minfs /dev/vdb /mnt/minfs + + we try to create a file using the command + + .. code-block:: console + + # touch /mnt/minfs/peanuts.txt + + We notice that we get an error because we did not implement the directory operations that allow us to create a file. + We will do this for the next exercise. + +3. Create operation +^^^^^^^^^^^^^^^^^^^ + +In order to allow the creation of a file in a directory, we must implement the ``create`` operation. +The ``create`` operation is a field in the ``minfs_dir_inode_operations`` structure (of type :c:type:`inode_operations`) and is implemented by the ``minfs_create`` function. We need to implement this function. +In fact, we will implement the ``minfs_new_inode`` (which creates and initializes an inode) and ``minfs_add_link`` which adds a link (or name or *dentry*) for the created inode. + +Follow directions marked with ``TODO 7`` which will guide you through the steps you need to take. + +.. tip:: + + Read the section :ref:`DirectoryInodes` + + Inspect the code in the ``minfs_create`` and the skeleton of functions ``minfs_new_inode`` and ``minfs_add_link``. + +Implement the function ``minfs_new_inode``. Inside this function you will create (using :c:func:`new_inode`) and initialize an inode. The initialization is done using the data from disk. + +.. tip:: + + Use the :c:func:`minix_new_inode` function as a model. + Find the first free inode in imap (``sbi->imap``). + Use bitwise operations (``find_first_zero_bit`` and ``set_bit``). + Read the :ref:`BitmapOperations` section. + + The buffer for the superblock (``sbi->sbh``) must be marked as dirty . + + You must initialize the usual fields as it is done for the ``myfs`` file system. + Initialize the ``i_mode`` field to ``0`` in the call to ``inode_init_owner``. It will be initialized in the caller later. + +Implement the ``minfs_add_link`` function. The function adds a new dentry (``struct minfs_dir_entry``) to the parent directory data block (``dentry->d_parent->d_inode``). + +.. tip:: + + Use the function ``minix_add_link`` function as a model. + +In ``minfs_add_link`` we want to find the first free place for the dentry. +For this, you will iterate over the directory data block and you will find the first free entry. A free dentry has the ``ino`` field equal to ``0``. + +.. tip:: + + In order to work with the directory, get the inode of type ``struct minfs_inode_info`` corresponding to the parent directory (the **dir** inode). + Do not use the variable ``inode`` to get ``struct_minfs_inode_info``; that inode belongs to the file, not to the parent directory inside which you want to add the link/dentry. + To get the ``struct minfs_inode_info`` structure, use :c:func:`container_of`. + + The structure ``struct minfs_inode_info`` is useful for finding the directory data block (the one indicated by the ``dentry->d_parent->d_inode``, which is the ``dir`` variable). + From this structure, get the ``data_block`` field, representing index of the data block on the disk. + This block contains the entries in the directory. Use :c:func:`sb_bread` to read the block and then ``bh->b_data`` to refer to the data. + The block contains at most ``MINFS_NUM_ENTRIES`` entries of type ``struct minfs_dir_entry``. + + If all entries are occupied, return ``-ENOSPC``. + + Iterate over the entries in the data block using the variable ``de`` and extract the first free entry (for which the ``ino`` field is ``0``). + + When you have found a free place, fill in the corresponding entry: + + * the ``ino`` field in ``de->ino`` + * the ``dentry->d_name.name`` field in ``de->name`` + + Then mark the buffer dirty. + + +Testing +""""""" + +For testing, we use the steps described in the previous exercise. +Now we can create files within the file system: + +.. code-block:: console + + # touch /mnt/minfs/peanuts.txt + +To test the functionality provided by the module, we can use the dedicated script: + +.. code-block:: console + + # ./test-minfs-2.sh + +If the deployment is valid, no error messages will be displayed following the above script run. + +.. note:: + + The current implementation of the ``minfs`` file system is not definitive. + To be complete, the implementations needs function to delete files, create and delete directories, rename entries, and modify the contents of a file. + diff --git a/Documentation/teaching/labs/infrastructure.rst b/Documentation/teaching/labs/infrastructure.rst new file mode 100644 index 00000000000000..b3b4873c01e114 --- /dev/null +++ b/Documentation/teaching/labs/infrastructure.rst @@ -0,0 +1,82 @@ +Infrastructure +============== + +In order to facilitate learning each topic has a hands-on exercises +section which will contain in-depth, incremental clues on how to solve +one or multiple tasks. To focus on a particular issue most of the +tasks will be performed on existing skeleton drivers. Each skeleton +driver has clearly marked sections that needs to be filled in order to +complete the tasks. + +The skeleton drivers are generated from full source examples located +in tools/labs/templates. To solve tasks you start by generating the +skeleton drivers, running the **skels** target in *tools/labs*. To +keep the workspace clean it is recommended to generate the skeletons +for one lab only and clean the workspace before start working on a new +lab. Labs can be selected by using the **LABS** variable: + +.. code-block:: shell + + tools/labs $ make clean + tools/labs $ LABS=kernel_modules make skels + + tools/labs $ ls skels/kernel_modules/ + 1-2-test-mod 3-error-mod 4-multi-mod 5-oops-mod 6-cmd-mod \ + 7-list-proc 8-kprobes 9-kdb + +You can also use the same variable to generate skeletons for specific +tasks: + +.. code-block:: shell + + tools/labs $ LABS="kernel_modules/6-cmd-mod kernel_modules/8-kprobes" make skels + + tools/labs$ ls skels/kernel_modules + 6-cmd-mod 8-kprobes + + +For each task you may have multiple steps to perform, usually +incremental. These steps are marked in the source code as well as in +the lab exercises with the keyword *TODO*. If we have multiple steps +to perform they will be prefixed by a number, like *TODO1*, *TODO2*, +etc. If no number is used it is assumed to be the one and only +step. If you want to resume a task from a certain step, you can using +the **TODO** variable. The following example will generate the +skeleton with the first *TODO* step resolved: + +.. code-block:: shell + + tools/labs $ TODO=2 LABS="kernel_modules/8-kprobes" skels + +Once the skelton drivers are generated you can build them with the +**build** make target: + +.. code-block:: shell + + tools/labs $ make build + echo "# autogenerated, do not edit " > skels/Kbuild + for i in ./kernel_modules/8-kprobes; do echo "obj-m += $i/" >> skels/Kbuild; done + make -C /home/tavi/src/linux M=/home/tavi/src/linux/tools/labs/skels ARCH=x86 modules + make[1]: Entering directory '/home/tavi/src/linux' + CC [M] /home/tavi/src/linux/tools/labs/skels/./kernel_modules/8-kprobes/kprobes.o + Building modules, stage 2. + MODPOST 1 modules + CC /home/tavi/src/linux/tools/labs/skels/./kernel_modules/8-kprobes/kprobes.mod.o + LD [M] /home/tavi/src/linux/tools/labs/skels/./kernel_modules/8-kprobes/kprobes.ko + make[1]: Leaving directory '/home/tavi/src/linux' + + +To copy the drivers to the VM you can use either use ssh or update the +VM image directly using the **copy** target: + +.. code-block:: shell + + tools/labs $ make copy + ... + 'skels/kernel_modules/8-kprobes/kprobes.ko' -> '/tmp/tmp.4UMKcISmQM/home/root/skels/kernel_modules/8-kprobes/kprobes.ko' + +.. attention:: The **copy** target will fail if the VM is + running. This is intentional so that we avoid corrupting the + filesystem. + + diff --git a/Documentation/teaching/labs/interrupts.rst b/Documentation/teaching/labs/interrupts.rst new file mode 100644 index 00000000000000..d8d8cab1b75b89 --- /dev/null +++ b/Documentation/teaching/labs/interrupts.rst @@ -0,0 +1,1118 @@ +========================== +I/O access and Interrupts +========================== + +Lab objectives +============== + +* communication with peripheral devices +* implement interrupt handlers +* synchronizing interrupts with process context + +Keywords: IRQ, I/O port, I/O address, base address, UART, request_region, release_region, inb, outb + +Background information +====================== + +A peripheral device is controlled by writing and reading its +registers. Often, a device has multiple registers that can be accessed +at consecutive addresses either in the memory address space or in the +I/O address space. Each device connected to the I/O bus has a set of +I/O addresses, called I/O ports. I/O ports can be mapped to physical +memory addresses so that the processor can communicate with the device +through instructions that work directly with the memory. For +simplicity we will directly use I/O ports (without mapping to physical +memory addresses) to communicate with physical devices. + +The I/O ports of each device are structured into a set of specialized +registers to provide a uniform programming interface. Thus, most +devices will have the following types of registers: + +* **Control** registers that receive device commands +* **Status** registers, which contain information about the device's + internal status +* **Input** registers from which data is taken from the device +* **Output** registers in which the data is written to transmit it to the + device + +Physical ports are differentiated by the number of bits: they can be +8, 16 or 32-bit ports. + +For example, the parallel port has 8 8-bit I/O ports starting at base +address 0x378. The data log is found at base address (0x378), status +register at base + 1 (0x379), and control at base address + 2 +(0x37a). The data log is both an entry and exit log. + +Although there are devices that can be fully controlled using I/O +ports or special memory areas, there are situations where this is +insufficient. The main problem that needs to be addressed is that +certain events occur at undefined moments in time and it is +inefficient for the processor (CPU) to interrogate the status of the +device repeatedly (polling). The way to solve this problem is using an +Interrupt ReQuest (IRQ) which is a hardware notification by which the +processor is announced that a particular external event happened. + +For IRQs to be useful device drivers must implement handlers, i.e. a +particular sequence of code that handles the interrupt. Because in +many situations the number of interrupts available is limited, a +device driver must behave in an orderly fashion with interruptions: +interrupts must be requested before being used and released when they +are no longer needed. In addition, in some situations, device drivers +must share an interrupt or synchronize with interrupts. All of these will be +discussed further. + +When we need to access shared resources between an interrupt +routine (A) and code running in process context or in bottom-half +context (B), we must use a special synchronization technique. In (A) +we need to use a spinlock primitive, and in (B) we must disable +interrupts AND use a spinlock primitive. Disabling interrupts is not +enough because the interrupt routine can run on a processor other than +the one running (B). + +Using only a spinlock can lead to a deadlock. The classic example of +deadlock in this case is: + +1. We run a process on the X processor, and we acquire the lock +2. Before releasing the lock, an interrupt is generated on the X processor +3. The interrupt handling routine will try to acquire the lock and it + will go into an infinite loop + + +Accessing the hardware +====================== + +In Linux, the I/O ports access is implemented on all architectures and +there are several APIs that can be used. + +Request access to I/O ports +--------------------------- + +Before accessing I/O ports we first must request access to them, to +make sure there is only one user. In order to do so, one must use the +:c:func:`request_region` function: + +.. code-block:: c + + #include + + struct resource *request_region(unsigned long first, unsigned long n, + const char *name); + +To release a reserved region one must use the :c:func:`release_region` function: + +.. code-block:: c + + void release_region(unsigned long start, unsigned long n); + + +For example, the serial port COM1 has the base address 0x3F8 and it +has 8 ports and this is a code snippet of how to request access to +these ports: + +.. code-block:: c + + #include + + #define MY_BASEPORT 0x3F8 + #define MY_NR_PORTS 8 + + if (!request_region(MY_BASEPORT, MY_NR_PORTS, "com1")) { + /* handle error */ + return -ENODEV; + } + +To release the ports one would use something like: + +.. code-block:: c + + release_region(MY_BASEPORT, MY_NR_PORTS); + +Most of the time, port requests are done at the driver initialization +or probe time and the port releasing is done at the removal of the +device or module. + +All of the port requests can be seen from userspace via the +:file:`/proc/ioports` file: + +.. code-block:: shell + + $ cat /proc/ioports + 0000-001f : dma1 + 0020-0021 : pic1 + 0040-005f : timer + 0060-006f : keyboard + 0070-0077 : rtc + 0080-008f : dma page reg + 00a0-00a1 : pic2 + 00c0-00df : dma2 + 00f0-00ff : fpu + 0170-0177 : ide1 + 01f0-01f7 : ide0 + 0376-0376 : ide1 + 0378-037a : parport0 + 037b-037f : parport0 + 03c0-03df : vga+ + 03f6-03f6 : ide0 + 03f8-03ff : serial + ... + + +Accessing I/O ports +------------------- + +After a driver has obtained the desired I/O port range, one can +perform read or write operations on these ports. Since physical ports +are differentiated by the number of bits (8, 16, or 32 bits), there +are different port access functions depending on their size. The +following port access functions are defined in asm/io.h: + + +* *unsigned inb(int port)*, reads one byte (8 bits) from port +* *void outb(unsigned char byte, int port)*, writes one byte (8 bits) to port +* *unsigned inw(int port)*, reads two bytes (16-bit) ports +* *void outw(unsigned short word, int port)*, writes two bytes (16-bits) to port +* *unsigned inl (int port)*, reads four bytes (32-bits) from port +* *void outl(unsigned long word, int port)*, writes four bytes (32-bits) to port + +The port argument specifies the address of the port where the reads or +writes are done, and its type is platform dependent (may be unsigned +long or unsigned short). + +Some devices may have problems when the processor is trying to +transfer data too fast to and from the device. To avoid this issue we +may need to insert a delay after an I/O operation and there are functions +you can use that introduce this delay. Their names are similar to +those described above, with the exception that it ends in _p: inb_p, +outb_p, etc. + +For example, the following sequence writes a byte on COM1 serial port +and then reads it: + +.. code-block:: c + + #include + #define MY_BASEPORT 0x3F8 + + unsigned char value = 0xFF; + outb(value, MY_BASEPORT); + value = inb(MY_BASEPORT); + +5. Accessing I/O ports from userspace +------------------------------------- + +Although the functions described above are defined for device drivers, +they can also be used in user space by including the +header. In order to be used, ioperm or iopl must first be called to +get permission to perform port operations. The ioperm function obtains +permission for individual ports, while iopl for the entire I/O address +space. To use these features, the user must be root. + +The following sequence used in user space gets permission for the +first 3 ports of the serial port, and then releases them: + +.. code-block:: c + + #include + #define MY_BASEPORT 0x3F8 + + if (ioperm(MY_BASEPORT, 3, 1)) { + /* handle error */ + } + + if (ioperm(MY_BASEPORT, 3, 0)) { + /* handle error */ + } + +The third parameter of the ioperm function is used to request or +release port permission: 1 to get permission and 0 to release. + +Interrupt handling +================== + +Requesting an interrupt +----------------------- + +As with other resources, a driver must gain access to an interrupt +line before it can use it and release it at the end of the execution. + +In Linux, the request to obtain and release an interrupt is done using +the :c:func:`requests_irq` and :c:func:`free_irq` functions: + +.. code-block:: c + + #include + + typedef irqreturn_t (*irq_handler_t)(int, void *); + + int request_irq(unsigned int irq_no, irq_handler_t handler, + unsigned long flags, const char *dev_name, void *dev_id); + + void free_irq(unsigned int irq_no, void *dev_id); + +Note that to get an interrupt, the developer calls +:c:func:`request_irq`. When calling this function you must specify the +interrupt number (*irq_no*), a handler that will be called when the +interrupt is generated (*handler*), flags that will instruct the +kernel about the desired behaviour (*flags*), the name of the device +using this interrupt (*dev_name*), and a pointer that can be +configured by the user at any value, and that has no global +significance (*dev_id*). Most of the time, *dev_id* will be +pointer to the device driver's private data. When the interrupt is +released, using the :c:func:`free_irq` function, the developer must +send the same pointer value (*dev_id*) along with the same interrupt +number (*irq_no*). The device name (*dev_name*) is used to display +statistics in */proc/interrupts*. + +The value that :c:func:`request_irq` returns is 0 if the entry was +successful or a negative error code indicating the reason for the +failure. A typical value is *-EBUSY* which means that the interrupt +was already requested by another device driver. + +The *handler* function is executed in interrupt context which means +that we can't call blocking APIs such as :c:func:`mutex_lock` or +:c:func:`msleep`. We must also avoid doing a lot of work in the +interrupt handler and instead use deferred work if needed. The actions +performed in the interrupt handler include reading the device +registers to get the status of the device and acknowledge the +interrupt, operations that most of the time can be performed with +non-blocking calls. + +There are situations where although a device uses interrupts we can't +read the device's registers in a non-blocking mode (for example a +sensor connected to an I2C or SPI bus whose driver does not guarantee +that bus read / write operations are non-blocking ). In this +situation, in the interruption, we must plan a work-in-process action +(work queue, kernel thread) to access the device's registers. Because +such a situation is relatively common, the kernel provides the +:c:func:`request_threaded_irq` function to write interrupt handling +routines running in two phases: a process-phase and an interrupt +context phase: + +.. code-block:: c + + #include + + int request_threaded_irq(unsigned int irq, irq_handler_t handler, + irq_handler_t thread_fn, + unsigned long flags, const char *name, void *dev); + +*handler* is the function running in interrupt context, and will +implement critical operations while the thread_fn function runs in +process context and implements the rest of the operations. + +The flags that can be transmitted when an interruption is made are: + +* *IRQF_SHARED* announces the kernel that the interrupt can be + shared with other devices. If this flag is not set, then if there is + already a handler associated with the requested interrupt, the + request for interrupt will fail. A shared interrupt is handled in a + special way by the kernel: all of the associated interrupt handlers + will be executed until the device that generated the interrupt will + be identified. But how can a device driver know if the interrupt + handling routine was activated by an interrupt generated by the + device it manages? Virtually all devices that offer interrupt + support have a status register that can be interrogated in the + handling routine to see if the interrupt was or was not generated by + the device (for example, in the case of the 8250 serial port, this + status register is IIR - Interrupt Information Register). When + requesting a shared interrupt, the dev_id argument must be unique + and it must not be NULL. Usually it is set to module's private + data. + +* *IRQF_ONESHOT* interrupt will be reactivated after running the process + context routine; Without this flag, the interrupt will be + reactivated after running the handler routine in the context of + the interrupt + + +Requesting the interrupt can be done either at the initialization of +the driver (:c:func:`init_module`), when the device is probed, or when +the device is used (e.g. during *open*). + +The following example performs the interrupt request for the COM1 +serial port: + +.. code-block:: c + + #include + + #define MY_BASEPORT 0x3F8 + #define MY_IRQ 4 + + static my_init(void) + { + [...] + struct my_device_data *my_data; + int err; + + err = request_irq(MY_IRQ, my_handler, IRQF_SHARED, + "com1", my_data); + if (err < 0) { + /* handle error*/ + return err; + } + [...] + } + +As you can see, the IRQ for serial port COM1 is 4, which is used in +shared mode (IRQF_SHARED). + +.. attention:: When requesting a shared interrupt (IRQF_SHARED) the + *dev_id* argument can not be NULL. + +To release the interrupt associated with the serial port, the +following operations will be executed: + +.. code-block:: c + + free_irq (MY_IRQ, my_data); + + +During the initialization function (:c:func:`init_module`), or in the +function that opens the device, interrupts must be activated for the +device. This operation is dependent on the device, but most often +involves setting a bit from the control register. + + +As an example, for the 8250 serial port, the following operations must +be performed to enable interrupts: + +.. code-block:: c + + #include + #define MY_BASEPORT 0x3F8 + + outb(0x08, MY_BASEPORT+4); + outb(0x01, MY_BASEPORT+1); + + +In the above example, two operations are performed: + +1. All interruptions are activated by setting bit 3 (Aux Output 2) in + the MCR register - Modem Control Register +2. The RDAI (Transmit Holding Register Empty Interrupt) is activated + by setting the appropriate bit in the IER - Interrupt Enable + Register. + + +Implementing an interrupt handler +--------------------------------- + +Lets take a look at the signature of the interrupt handler function: + +.. code-block:: c + + irqreturn_t (*handler)(int irq_no, void *dev_id); + +The function receives as parameters the number of the interrupt +(*irq_no*) and the pointer sent to :c:func:`request_irq` when the +interrupt was requested. The interrupt handling routine must return a +value with a type of :c:type:`typedef irqreturn_t`. For the current kernel +version, there are three valid values: *IRQ_NONE*, *IRQ_HANDLED*, +and *IRQ_WAKE_THREAD*. The device driver must return *IRQ_NONE* if +it notices that the interrupt has not been generated by the device it +is in charge. Otherwise, the device driver must return *IRQ_HANDLED* +if the interrupt can be handled directly from the interrupt context or +*IRQ_WAKE_THREAD* to schedule the running of the process context +processing function. + +The skeleton for an interrupt handler is: + +.. code-block:: c + + irqreturn_t my_handler(int irq_no, void *dev_id) + { + struct my_device_data *my_data = (struct my_device_data *) dev_id; + + /* if interrupt is not for this device (shared interrupts) */ + /* return IRQ_NONE;*/ + + /* clear interrupt-pending bit */ + /* read from device or write to device*/ + + return IRQ_HANDLED; + } + + +Typically, the first thing executed in the interrupt handler is to +determine whether the interrupt was generated by the device that the +driver ordered. This usually reads information from the device's +registers to indicate whether the device has generated an +interrupt. The second thing is to reset the interrupt pending bit on +the physical device as most devices will no longer generate +interruptions until this bit has been reset (e.g. for the 8250 +serial port bit 0 in the IIR register must be cleared). + + +Locking +------- + +Because the interrupt handlers run in interrupt context the actions +that can be performed are limited: unable to access user space memory, +can't call blocking functions. Also synchronization using spinlocks is +tricky and can lead to deadlocks if the spinlock used is already +acquired by a process that has been interrupted by the running +handler. + +However, there are cases where device drivers have to synchronize +using interrupts, such as when data is shared between the interrupt +handler and process context or bottom-half handlers. In these +situations it is necessary to both deactivate the interrupt and use +spinlocks. + +There are two ways to disable interrupts: disabling all interrupts, at +the processor level, or disabling a particular interrupt at the device +or interrupt controller level. Processor disabling is faster and is +therefore preferred. For this purpose, there are locking functions +that disable and enable interrupts acquiring and release a spinlock at +the same time: :c:func:`spin_lock_irqsave`, +:c:func:`spin_unlock_irqrestore`, :c:func:`spin_lock_irq`, and +:c:func:`spin_unlock_irq`: + +.. code-block:: c + + #include + + void spin_lock_irqsave (spinlock_t * lock, unsigned long flags); + void spin_unlock_irqrestore (spinlock_t * lock, unsigned long flags); + + void spin_lock_irq (spinlock_t * lock); + void spin_unlock_irq (spinlock_t * lock); + +The :c:func:`spin_lock_irqsave` function disables interrupts for the +local processor before it obtains the spinlock; The previous state of +the interrupts is saved in *flags*. + +If you are absolutely sure that the interrupts on the current +processor have not already been disabled by someone else and you are +sure you can activate the interrupts when you release the spinlock, +you can use :c:func:`spin_lock_irq`. + +For read / write spinlocks there are similar functions available: + +* :c:func:`read_lock_irqsave` +* :c:func:`read_unlock_irqrestore` +* :c:func:`read_lock_irq` +* :c:func:`read_unlock_irq` +* :c:func:`write_lock_irqsave` +* :c:func:`write_unlock_irqrestore` +* :c:func:`write_lock_irq` +* :c:func:`write_unlock_irq` + +If we want to disable interrupts at the interrupt controller level +(not recommended because disabling a particular interrupt is slower, +we can not disable shared interrupts) we can do this with +:c:func:`disable_irq`, :c:func:`disable_irq_nosync`, and +:c:func:`enable_irq`. Using these functions will disable the interrupts on +all processors. Calls can be nested: if disable_irq is called twice, +it will require as many calls enable_irq to enable it. The difference +between disable_irq and disable_irq_nosync is that the first one will +wait for the executed handlers to finish. Because of this, +:c:func:`disable_irq_nosync` is generally faster, but may lead to +races with the interrupts handler, so when not sure use +:c:func:`disable_irq`. + +The following sequence disables and then enables the interrupt for +the COM1 serial port: + +.. code-block:: c + + #define MY_IRQ 4 + + disable_irq (MY_IRQ); + enable_irq (MY_IRQ); + +It is also possible to disable interrupts at the device level. This +approach is also slower than disabling interrupts at the processor +level but it works with shared interrupts. The way to accomplish this +is device specific and it usually means we have to clear a bit from +one of the control registers. + +It is also possible to disable all interrupts for the current +processor independent of taking locks. Disabling all interruptions by +device drivers for synchronization purposes is inappropriate because +races are still possible if the interrupt is handled on another +CPU. For reference, the functions that disable / enable interrupts on +the local processor are :c:func:`local_irq_disable` and +:c:func:`local_irq_enable`. + +In order to use a resource shared between process context and the +interrupt handling routine, the functions described above will be used +as follows: + +.. code-block:: c + + static spinlock_t lock; + + /* IRQ handling routine: interrupt context */ + irqreturn_t kbd_interrupt_handle(int irq_no, void * dev_id) + { + ... + spin_lock(&lock); + /* Critical region - access shared resource */ + spin_unlock (&lock); + ... + } + + /* Process context: Disable interrupts when locking */ + static void my_access(void) + { + unsigned long flags; + + spin_lock_irqsave(&lock, flags); + /* Critical region - access shared resource */ + spin_unlock_irqrestore(&lock, flags); + + ... + } + + void my_init (void) + { + ... + spin_lock_init (&lock); + ... + } + + +The *my_access function* above runs in process context. To +synchronize access to the shared data, we disable the interrupts and +use the spinlock *lock*, i.e. the :c:func:`spin_lock_irqsave` and +:c:func:`spin_unlock_irqrestore` functions. + +In the interrupt handling routine, we use the :c:func:`spin_lock` and +:c:func:`spin_unlock` functions to access the shared resource. + +.. note:: The *flags* argument for :c:func:`spin_lock_irqsave` and + :c:func:`spin_unlock_irqrestore` is a value and not a pointer but keep + in mind that :c:func:`spin_lock_irqsave` function changes the value of + the flag, since this is actually a macro. + +Interrupt statistics +-------------------- + +Information and statistics about system interrupts can be found in +*/proc/interrupts* or */proc/stat*. Only system interrupts with +associated interrupt handlers appear in */proc/interrupts*: + +.. code-block:: shell + + # cat /proc/interrupts + CPU0 + 0: 7514294 IO-APIC-edge timer + 1: 4528 IO-APIC-edge i8042 + 6: 2 IO-APIC-edge floppy + 8: 1 IO-APIC-edge rtc + 9: 0 IO-APIC-level acpi + 12: 2301 IO-APIC-edge i8042 + 15: 41 IO-APIC-edge ide1 + 16: 3230 IO-APIC-level ioc0 + 17: 1016 IO-APIC-level vmxnet ether + NMI: 0 + LOC: 7229438 + ERR: 0 + MIS: 0 + +The first column specifies the IRQ associated with the interrupt. The +following column shows the number of interrupts that were generated +for each processor in the system; The last two columns provide +information about the interrupt controller and the device name that +registered the handler for that interrupt. + +The */proc/state* file provides information about system activity, +including the number of interruptions generated since the last (re)boot +of the system: + +.. code-block:: shell + + # cat /proc/stat | grep in + intr 7765626 7754228 4620 0 0 0 0 2 0 1 0 0 0 2377 0 0 41 3259 1098 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +Each line in the */proc/state* file begins with a keyword that +specifies the meaning of the information on the line. For information +on interrupts, this keyword is intr. The first number on the line +represents the total number of interrupts, and the other numbers +represent the number of interrupts for each IRQ, starting at 0. The +counter includes the number of interrupts for all processors in the +system. + + +Further reading +=============== + +Serial Port +----------- + +* `Serial Port `_ +* `Interfacing the Serial / RS232 Port `_ + + +Parallel port +------------- + +* `Interfacing the Standard Parallel Port `_ +* `Parallel Port Central `_ + +Keyboard controller +------------------- + +* `Intel 8042 `_ +* drivers/input/serio/i8042.c +* drivers/input/keyboard/atkbd.c + +Linux device drivers +-------------------- + +* `Linux Device Drivers, 3rd ed., Ch. 9 - Communicating with Hardware `_ +* `Linux Device Drivers, 3rd ed., Ch. 10 - Interrupt Handling `_ +* `Interrupt Handlers `_ + + +Exercises +========= + +.. include:: exercises-summary.hrst +.. |LAB_NAME| replace:: interrupts + +0. Intro +-------- + +Using |LXR|_, find the definitions of the following symbols in the Linux kernel: + +* :c:type:`struct resource` +* :c:func:`request_region` and :c:func:`__request_region` +* :c:func:`request_irq` and :c:func:`request_threaded_irq` +* :c:func:`inb` for the x86 architecture. + +Analyze the following Linux code: + +* Keyboard initialization function :c:func:`i8042_setup_kbd` +* The AT or PS/2 keyboard interrupt function :c:func:`atkbd_interrupt` + +Keyboard driver +------------------ + +The next exercise's objective is to create a driver that uses the +keyboard IRQ, inspect the incoming key codes and stores them in a +buffer. The buffer will be accessible from userspace via character +device driver. + +1. Request the I/O ports +------------------------ + +To start with, we aim to allocate memory in the I/O space for hardware +devices. We will see that we cannot allocate space for the keyboard +because the designated region is already allocated. Then we will allocate +I/O space for unused ports. + +The *kbd.c* file contains a skeleton for the keyboard driver. Browse +the source code and inspect :c:func:`kbd_init`. Notice that the I/O +ports we need are I8042_STATUS_REG and I8042_DATA_REG. + +Follow the sections maked with **TODO 1** in the skeleton. Request the I/O +ports in :c:func:`kbd_init` and make sure to check for errors and to properly +clean-up in case of errors. Also, add code to release the I/O ports in +:c:func:`kbd_exit`. + +.. note:: You can review the `Request access to I/O ports`_ section before + proceeding. + +Now build the module and copy it to the VM image: + +.. code-block:: shell + + tools/labs $ make build + tools/labs $ make copy + + +Now start the VM and insert the module: + +.. code-block:: shell + + root@qemux86:~# insmod skels/interrupts/kbd.ko + kbd: loading out-of-tree module taints kernel. + insmod: can't insert 'skels/interrupts/kbd.ko': Device or resource busy + +Notice that you get an error when trying to request the I/O +ports. This is because we already have a driver that has requested the +I/O ports. To validate check the :file:`/proc/ioports` file for the +``STATUS_REG`` and ``DATA_REG`` values: + +.. code-block:: shell + + root@qemux86:~# cat /proc/ioports | egrep "(0060|0064)" + 0060-0060 : keyboard + 0064-0064 : keyboard + + +Lets find out which driver register these ports and try to remove the +module associated with it. + +.. code-block:: shell + + $ find -name \*.c | xargs grep \"keyboard\" + + find -name \*.c | xargs grep \"keyboard\" | egrep '(0x60|0x64)' + ... + ./arch/x86/kernel/setup.c:{ .name = "keyboard", .start = 0x60, .end = 0x60, + ./arch/x86/kernel/setup.c:{ .name = "keyboard", .start = 0x64, .end = 0x64 + +It looks like the I/O ports are registered by the kernel during the +boot and we won't be able to remove the associated module. Instead +lets trick the kernel and register ports 0x61 and 0x65. + +Use the function :c:func:`request_region`(inside the :c:func:`kbd_init` +function) to allocate the ports and the function :c:func:`release_region` +(inside the :c:func:`kbd_exit` function) to release the allocated memory. + +This time we can load the module and */proc/ioports* shows that the +owner of these ports is our module: + +.. code-block:: shell + + root@qemux86:~# insmod skels/interrupts/kbd.ko + kbd: loading out-of-tree module taints kernel. + Driver kbd loaded + root@qemux86:~# cat /proc/ioports | grep kbd + 0061-0061 : kbd + 0065-0065 : kbd + +Lets remove the module and check that the I/O ports are released: + +.. code-block:: shell + + root@qemux86:~# rmmod kbd + Driver kbd unloaded + root@qemux86:~# cat /proc/ioports | grep kbd + root@qemux86:~# + +2. Interrupt handling routine +----------------------------- + +For this task we will implement and register an interrupt handler for +the keyboard interrupt. You can review the `Requesting an interrupt`_ +section before proceeding. + +Follow the sections maked with **TODO 2** in the skeleton. + +First, define an empty interrupt handling routine. + +.. note:: Since we already have a driver that uses this interrupt we + should report the interrupt as not handled (i.e. return + :c:type:`IRQ_NONE`) so that the original driver still has a + chance to process it. + +Then register the interrupt handler routine using +:c:type:`request_irq`. The interrupt number is defined by the +`I8042_KBD_IRQ` macro. The interrupt handling routine must be +requested with :c:type:`IRQF_SHARED` to share the interrupt line with +the keyboard driver (i8042). + +.. note:: For shared interrupts, *dev_id* can not be NULL . Use + ``&devs[0]``, that is pointer to :c:type:`struct kbd`. This + structure contains all the information needed for device + management. To see the interrupt in */proc/interrupts*, do + not use NULL for *dev_name* . You can use the MODULE_NAME + macro. + + If the interrupt requesting fails make sure to properly + cleanup by jumping to the right label, in this case the one + the releases the I/O ports and continues with unregistering + the character device driver. + +Compile, copy and load module in the kernel. Check that the interrupt +line has been registered by looking at */proc/interrupts* . Determine +the IRQ number from the source code (see `I8042_KBD_IRQ`) and verify +that there are two drivers registered at this interrupt line (which +means that we have a shared interrupt line): the i8042 initial driver +and our driver. + +.. note:: More details about the format of the */proc/interrupts* can + be found in the `Interrupt statistics`_ section. + +Print a message inside the routine to make sure it is called. Compile +and reload the module into the kernel. Check that the interrupt handling +routine is called when you press the keyboard on the virtual machine, +using :command:dmesg. Also note that when you use the serial port no +keyboard interrupt is generated. + +.. attention:: To get access to the keyboard on the virtual machine + boot with "QEMU_DISPLAY=sdl make boot". + +3. Store ASCII keys to buffer +----------------------------- + +Next, we want to collect the keystrokes in a buffer whose content we +will then send to the user space. For this routine we will add the +following in the interrupt handling: + +* capture the pressed keys (only pressed, ignore released) +* identify the ASCII characters. +* copy the ASCII characters corresponding to the keystrokes and store + them in the buffer of the device + +Follow the sections marked **TODO 3** in the skeleton. + +Reading the data register +......................... + +First, fill in the :c:func:`i8042_read_data` function to read the +``I8042_DATA_REG`` of the keyboard controller. The function +just needs to return the value of the register. The value of the +registry is also called scancode, which is what is generated at each +keystroke. + +.. hint:: Read the ``I8042_DATA_REG`` register using :c:func:`inb` and + store the value in the local variable :c:type:`val`. + Revisit the `Accessing I/O ports`_ section. + +Call the :c:func:`i8042_read_data` in the +:c:func:`kbd_interrupt_handler` and print the value read. + +Print information about the keystrokes in the following format: + +.. code-block:: c + + pr_info("IRQ:% d, scancode = 0x%x (%u,%c)\n", + irq_no, scancode, scancode, scancode); + + +Where scancode is the value of the read register using the +:c:func:`i8042_read_data` function. + +Notice that the scancode (reading of the read register) is not an ASCII +character of the pressed key. We'll have to understand the scancode. + +Interpreting the scancode +......................... + +Note that the registry value is a scancode, not the ASCII value of the +character pressed. Also note that an interrupt is send both when the +key is pressed and when the key is released. We only need to select +the code when the key is pressed and then and decode the ASCII +character. + +.. note:: To check scancode, we can use the showkey command (showkey + -s). + + In this form, the command will display the key scancodes for + 10 seconds after the last pressed key end then it will + stop. If you press and release a key you will get two + scancodes: one for the pressed key and one for the released + key. E.g: + + * If you press the ENTER key, you will get the 0x1c ( 0x1c ) + and 0x9c (for the released key) + * If you press the key a you will get the 0x1e (key pressed) + and 0x9e (for the key release) + * If you press b you will get 0x30 (key pressed) and 0xb0 + (for the release key) + * If you press the c key, you will get the 0x2e (key + pressed) 0xae and 0xae (for the released key) + * If you press the Shift key you will get the 0x2a (key + pressed) 0xaa and 0xaa (for the released key) + * If you press the Ctrl key you will get the 0x1d (key + pressed) and 0x9d (for the release key) + + As also indicated in this `article + `_, a key + release scancode is 128 (0x80) higher then a key press + scancode. This is how we can distinguish between a press + key scancode and a release scancode. + + A scancode is translated into a keycode that matches a + key. A pressed scanned keycode and a released scancode + have the same keycode. For the keys shown above we have + the following table: + + .. flat-table:: + + * - Key + - Key Press Scancode + - Key Release Scancode + - Keycode + + * - ENTER + - 0x1e + - 0x9e + - 0x1e (30) + + * - a + - 0x1e + - 0x9e + - 0x1e (30) + + * - b + - 0x3e + - 0x9e + - 0x30 (48) + + * - c + - 0x2e + - 0x9e + - 0x2e (46) + + * - Shift + - 0x2a + - 0xaa + - 0x2a (42) + + * - Ctrl + - 0x1d + - 0x9d + - 0x1d (29) + + The press / release key is performed in the is_key_press() + function and obtaining the ASCII character of a scancode + takes place in the get_ascii() function. + +In the interrupt handler check the scancode to see if the key is +pressed or released then determine the corresponding ASCII +character. + +.. hint:: To check for press / release, use :c:func:`is_key_press`. + Use :c:func:`get_ascii` function to get the corresponding + ASCII code. Both functions expect the scancode. + + +.. hint:: To display the received information use the following + format. + + .. code-block:: c + + pr_info("IRQ %d: scancode=0x%x (%u) pressed=%d ch=%c\n", + irq_no, scancode, scancode, pressed, ch); + + Where scancode is the value of the data register, and ch is + the value returned by the get_ascii() function. + +Store characters to the buffer +............................... + +We want to collect the pressed characters (not the other keys) into +circular a buffer that can be consumed from user space. + +Update the interrupt handler to add a pressed ASCII character to the +end of the device buffer. If the buffer is full, the character will be +discarded. + +.. hint:: The device buffer is the field :c:type:`buf` in the device's + :c:type:`struct kbd`. To get the device data from the interrupt handler + use the following construct: + + .. code-block:: c + + struct kbd *data = (struct kbd *) dev_id; + + The buffer's dimension is located in :c:type:`struct kbd`'s field, + :c:type:`count`. The :c:type:`put_idx` and :c:type:`get_idx` fields + specify the next writing and reading index. Take a look at the + :c:func:`put_char` function's implementation to observe how the data is + added to the circular buffer. + +.. attention:: Synchronize the access to the buffer and the helper + indexes with a spinlock. + Define the spinlock in the device struct :c:type:`struct kbd` + and initialize it in :c:func:`kbd_init`. + + Use the :c:func:`spin_lock` and :c:func:`spin_unlock` functions + to protect the buffer in the interrupt handler. + + Revisit the `Locking`_ section. + +4. Reading the buffer +---------------------- + +In order to have access to the keylogger's data, we have to send it to +the user space. We will do this using the */dev/kbd* character device. When +reading from this device, we will get the data from the buffer in the kernel +space, where we collected the keys pressed. + +For this step +follow the sections marked with **TODO 4** in the :c:func:`kbd_read` function. + +Implement :c:func:`get_char` in a similar way to :c:func:`put_char`. Be careful +when implementing the circular buffer. + +In the :c:func:`kbd_read` function copy the data from the buffer to the +userspace buffer. + +.. hint:: Use :c:func:`get_char` to read a character from the buffer + and put_user to store it to the user buffer. + +.. attention:: In the read function, ue :c:func:`spin_lock_irqsave` and + :c:func:`spin_unlock_irqrestore` for locking. + + Revisit the `Locking`_ section. + +.. attention:: We cannot use :c:func:`put_user` or :c:func:`copy_to_user` + while holding the lock, as userpace access is not permitted from + atomic contexts. + + For more info, read the :ref:`Access to the address space of the + process section <_access_to_process_address_space>` in the + previous lab. + +For testing, you will need to create the */dev/kbd* character device +driver using the mknod before reading from it. The device master and +minor are defined as ``KBD_MAJOR`` and ``KBD_MINOR``: + +.. code-block:: c + + mknod /dev/kbd c 42 0 + +Build, copy and boot the virtual machine and load the module. Test it +using the command: + +.. code-block:: c + + cat /dev/kbd + + +5. Reset the buffer +------------------- + +Reset the buffer if the device is written to. For this step follow the +sections marked with **TODO 5** in the skeleton. + +Implement :c:func:`reset_buffer` and add the write operation to *kbd_fops*. + +.. attention:: In the write function Use :c:func:`spin_lock_irqsave` and + :c:func:`spin_unlock_irqrestore` for locking when resetting the + buffer. + + Revisit the `Locking`_ section. + +For testing, you will need to create the */dev/kbd* character device +driver using the mknod before reading from it. The device master and +minor are defined as ``KBD_MAJOR`` and ``KBD_MINOR``: + +.. code-block:: c + + mknod /dev/kbd c 42 0 + +Build, copy and boot the virtual machine and load the module. +Test it using the command: + +.. code-block:: c + + cat /dev/kbd + +Press some keys, then run the command :command:`echo "clear" > /dev/kbd`. +Check the buffer's content again. It should be reset. + +Extra Exercises +=============== + +1. kfifo +--------- + +Implement a keylogger using the +`kfifo API `_. + +.. hint:: Follow the `API call examples from the kernel code `_. + For example, the file `bytestream-examples.c `_. diff --git a/Documentation/teaching/labs/introduction.rst b/Documentation/teaching/labs/introduction.rst new file mode 100644 index 00000000000000..1f93fb49b324f2 --- /dev/null +++ b/Documentation/teaching/labs/introduction.rst @@ -0,0 +1,799 @@ +============ +Introduction +============ + +Lab objectives +============== + +* presenting the rules and objectives of the Operating Systems 2 lab +* introducing the lab documentation +* introducing the Linux kernel and related resources + +Keywords +======== + +* kernel, kernel programming +* Linux, vanilla, http://www.kernel.org +* cscope, LXR +* gdb, /proc/kcore, addr2line, dump\_stack + +About this laboratory +===================== + +The Operating Systems 2 lab is a kernel programming and driver development lab. +The objectives of the laboratory are: + +* deepening the notions presented in the course +* presentation of kernel programming interfaces (kernel API) +* gaining documenting, development and debugging skills on a freestanding + environment +* acquiring knowledge and skills for drivers development + +A laboratory will present a set of concepts, applications and commands +specific to a given problem. The lab will start with a presentation +(each lab will have a set of slides) (15 minutes) and the remaining +time will be allocated to the lab exercises (80 minutes). + +For best laboratory performance, we recommend that you read the related slides. +To fully understand a laboratory, we recommend going through the lab support. For +in-depth study, use the supporting documentation. + +Documentation +============= + +- Linux + + - `Linux Kernel Development, 3rd + Edition `__ + - `Linux Device Drivers, 3rd + Edition `__ + - `Essential Linux Device + Drivers `__ + +- General + + - `mailing list `__ + (`searching the mailing list `__) + +Source code navigation +====================== + +.. _cscope_intro: + +cscope +------ + +`Cscope `__ is a tool for +efficient navigation of C sources. To use it, a cscope database must +be generated from the existing sources. In a Linux tree, the command +:command:`make ARCH=x86 cscope` is sufficient. Specification of the +architecture through the ARCH variable is optional but recommended; +otherwise, some architecture dependent functions will appear multiple +times in the database. + +You can build the cscope database with the command :command:`make +ARCH=x86 COMPILED_SOURCE=1 cscope`. This way, the cscope database will +only contain symbols that have already been used in the compile +process before, thus resulting in better performance when searching +for symbols. + +Cscope can also be used as stand-alone, but it is more useful when +combined with an editor. To use cscope with :command:`vim`, it is necessary to +install both packages and add the following lines to the file +:file:`.vimrc` (the machine in the lab already has the settings): + +.. code-block:: vim + + if has("cscope") + " Look for a 'cscope.out' file starting from the current directory, + " going up to the root directory. + let s:dirs = split(getcwd(), "/") + while s:dirs != [] + let s:path = "/" . join(s:dirs, "/") + if (filereadable(s:path . "/cscope.out")) + execute "cs add " . s:path . "/cscope.out " . s:path . " -v" + break + endif + let s:dirs = s:dirs[:-2] + endwhile + + set csto=0 " Use cscope first, then ctags + set cst " Only search cscope + set csverb " Make cs verbose + + nmap ``s :cs find s ``=expand("``")```` + nmap ``g :cs find g ``=expand("``")```` + nmap ``c :cs find c ``=expand("``")```` + nmap ``t :cs find t ``=expand("``")```` + nmap ``e :cs find e ``=expand("``")```` + nmap ``f :cs find f ``=expand("``")```` + nmap ``i :cs find i ^``=expand("``")``$`` + nmap ``d :cs find d ``=expand("``")```` + nmap :cnext + nmap :cprev + + " Open a quickfix window for the following queries. + set cscopequickfix=s-,c-,d-,i-,t-,e-,g- + endif + +The script searches for a file called :file:`cscope.out` in the current directory, or +in parent directories. If :command:`vim` finds this file, you can use the shortcut :code:`Ctrl +]` +or :code:`Ctrl+\ g` (the combination control-\\ followed by g) to jump directly to +the definition of the word under the cursor (function, variable, structure, etc.). +Similarly, you can use :code:`Ctrl+\ s` to go where the word under the cursor is used. + +You can take a cscope-enabled :file:`.vimrc` file (also contains other goodies) from +https://github.com/ddvlad/cfg/blob/master/\_vimrc. +The following guidelines are based on this file, but also show basic :command:`vim` commands +that have the same effect. + +If there are more than one results (usually there are) you can move between them +using :code:`F6` and :code:`F5` (:code:`:ccnext` and :code:`:cprev`). +You can also open a new panel showing the results using :code:`:copen`. To close +the panel, use the :code:`:cclose` command. + +To return to the previous location, use :code:`Ctrl+o` (o, not zero). +The command can be used multiple times and works even if cscope changed the +file you are currently editing. + +To go to a symbol definition directly when :command:`vim` starts, use :code:`vim -t ` +(for example :code:`vim -t task_struct`). Otherwise, if you started :command:`vim` and want +to search for a symbol by name, use :code:`cs find g ` (for example +:code:`cs find g task_struct`). + +If you found more than one results and opened a panel showing all the matches +(using :code:`:copen`) and you want to find a symbol of type structure, +it is recommended to search in the results panel (using :code:`/` -- slash) +the character :code:`{` (opening brace). + +.. important:: + You can get a summary of all the :command:`cscope` commands using :command:`:cs help`. + + For more info, use the :command:`vim` built-in help command: :command:`:h cscope` or :command:`:h copen`. + +If you use :command:`emacs`, install the :code:`xcscope-el` package and +add the following lines in :file:`~/.emacs`. + +.. code-block:: vim + + (require ‘xcscope) + (cscope-setup) + +These commands will activate cscope for the C and C++ modes automatically. +:code:`C-s s` is the key bindings prefix and :code:`C-s s s` is used to +search for a symbol (if you call it when the cursor is over a word, +it will use that). For more details, check `https://github.com/dkogan/xcscope.el` + +Kscope +~~~~~~ + +For a simpler interface, `Kscope `__ +is a cscope frontend which uses QT. It is lightweight, very fast and very +easy to use. It allows searching using regular expressions, call graphs, etc. +Kscope is no longer mantained. + +There is also a `port `__ +of version 1.6 for Qt4 and KDE 4 which keeps the integration of the text +editor Kate and is easier to use than the last version on SourceForge. + +LXR Cross-Reference +------------------- + +LXR (LXR Cross-Reference) is a tool that allows indexing and +referencing the symbols in the source code of a program using +a web interface. The web interface shows links to +locations in files where a symbol is defined or used. Development website +for LXR is http://sourceforge.net/projects/lxr. Similar tools +are `OpenGrok `__ and +`Gonzui `__. + +Although LXR was originally intended for the Linux kernel sources, it is +also used in the sources of `Mozilla `__, +`Apache HTTP Server `__ and +`FreeBSD `__. + +There are a number of sites that use LXR for cross-referencing the +the sources of the Linux kernel, the main site being `the original site of +development `__ which does not work anymore. You can +use `https://elixir.bootlin.com/ `__. + +LXR allows searching for an identifier (symbol), after a free text +or after a file name. The main feature and, at the same +time, the main advantage provided is the ease of finding the declaration +of any global identifier. This way, it facilitates quick access to function +declarations, variables, macro definitions and the code can be easily +navigated. Also, the fact that it can detect what code areas are affected +when a variable or function is changed is a real advantage in the development +and debugging phase. + +SourceWeb +--------- + +`SourceWeb `__ is a source code indexer +for C and C++. It uses the +`framework `__ +provided by the Clang compiler to index the code. + +The main difference between cscope and SourceWeb is the fact that SourceWeb +is, in a way, a compiler pass. SourceWeb doesn't index all the code, but +only the code that was efectively compiled by the compiler. This way, some +problems are eliminated, such as ambiguities about which variant of a function +defined in multiple places is used. This also means that the indexing takes +more time, because the compiled files must pass one more time through +the indexer to generate the references. + +Usage example: + +.. code-block:: bash + + make oldconfig + sw-btrace make -j4 + sw-btrace-to-compile-db + sw-clang-indexer --index-project + sourceweb index + +:file:`sw-btrace` is a script that adds the :file:`libsw-btrace.so` +library to :code:`LD_PRELOAD`. This way, the library is loaded by +every process started by :code:`make` (basically, the compiler), +registers the commands used to start the processes and generates +a filed called :file:`btrace.log`. This file is then used by +:code:`sw-btrace-to-compile-db` which converts it to a format defined +by clang: `JSON Compilation Database `__. +This JSON Compilation Database resulted from the above steps is then +used by the indexer, which makes one more pass through the compiled +source files and generates the index used by the GUI. + +Word of advice: don't index the sources you are working with, but use +a copy, because SourceWeb doesn't have, at this moment, the capability +to regenerate the index for a single file and you will have to regenerate +the complete index. + +Debugging +========= + +Debugging a kernel is a much more difficult process than the debugging +of a program, because there is no support from the operating system. +This is why this process is usually done using two computers, connected +on serial interfaces. + +.. _gdb_intro: + +gdb (Linux) +----------- + +A simpler debug method on Linux, but with many disadvantages, +is local debugging, using `gdb `__, +the uncompressed kernel image (:file:`vmlinux`) and :file:`/proc/kcore` +(the real-time kernel image). This method is usually used to inspect +the kernel and detect certain inconsistencies while it runs. The +method is useful especially if the kernel was compiled using the +:code:`-g` option, which keeps debug information. Some well-known +debug techniques can't be used by this method, such as breakpoints +of data modification. + +.. note:: Because :file:`/proc` is a virtual filesystem, :file:`/proc/kcore` + does not physically exist on the disk. It is generated on-the-fly + by the kernel when a program tries to access :file:`proc/kcore`. + + It is used for debugging purposes. + + From :command:`man proc`, we have: + + :: + + /proc/kcore + This file represents the physical memory of the system and is stored in the ELF core file format. With this pseudo-file, and + an unstripped kernel (/usr/src/linux/vmlinux) binary, GDB can be used to examine the current state of any kernel data struc‐ + tures. + +The uncompressed kernel image offers information about the data structures +and symbols it contains. + +.. code-block:: bash + + student@eg106$ cd ~/src/linux + student@eg106$ file vmlinux + vmlinux: ELF 32-bit LSB executable, Intel 80386, ... + student@eg106$ nm vmlinux | grep sys_call_table + c02e535c R sys_call_table + student@eg106$ cat System.map | grep sys_call_table + c02e535c R sys_call_table + +The :command:`nm` utility is used to show the symbols in an object or +executable file. In our case, :file:`vmlinux` is an ELF file. Alternately, +we can use the file :file:`System.map` to view information about the +symbols in kernel. + +Then we use :command:`gdb` to inspect the symbols using the uncompressed +kernel image. A simple :command:`gdb` session is the following: + +.. code-block:: bash + + student@eg106$ cd ~/src/linux + stduent@eg106$ gdb --quiet vmlinux + Using host libthread_db library "/lib/tls/libthread_db.so.1". + (gdb) x/x 0xc02e535c + 0xc02e535c ``: 0xc011bc58 + (gdb) x/16 0xc02e535c + 0xc02e535c ``: 0xc011bc58 0xc011482a 0xc01013d3 0xc014363d + 0xc02e536c ``: 0xc014369f 0xc0142d4e 0xc0142de5 0xc011548b + 0xc02e537c ``: 0xc0142d7d 0xc01507a1 0xc015042c 0xc0101431 + 0xc02e538c ``: 0xc014249e 0xc0115c6c 0xc014fee7 0xc0142725 + (gdb) x/x sys_call_table + 0xc011bc58 ``: 0xffe000ba + (gdb) x/x &sys_call_table + 0xc02e535c ``: 0xc011bc58 + (gdb) x/16 &sys_call_table + 0xc02e535c ``: 0xc011bc58 0xc011482a 0xc01013d3 0xc014363d + 0xc02e536c ``: 0xc014369f 0xc0142d4e 0xc0142de5 0xc011548b + 0xc02e537c ``: 0xc0142d7d 0xc01507a1 0xc015042c 0xc0101431 + 0xc02e538c ``: 0xc014249e 0xc0115c6c 0xc014fee7 0xc0142725 + (gdb) x/x sys_fork + 0xc01013d3 ``: 0x3824548b + (gdb) disass sys_fork + Dump of assembler code for function sys_fork: + 0xc01013d3 ``: mov 0x38(%esp),%edx + 0xc01013d7 ``: mov $0x11,%eax + 0xc01013dc ``: push $0x0 + 0xc01013de ``: push $0x0 + 0xc01013e0 ``: push $0x0 + 0xc01013e2 ``: lea 0x10(%esp),%ecx + 0xc01013e6 ``: call 0xc0111aab `` + 0xc01013eb ``: add $0xc,%esp + 0xc01013ee ``: ret + End of assembler dump. + +It can be noticed that the uncompressed kernel image was used as an argument +for :command:`gdb`. The image can be found in the root of the kernel sources +after compilation. + +A few commands used for debugging using :command:`gdb` are: + +- :command:`x` (examine) - Used to show the contents of the memory area + whose address is specified as an argument to the command (this address + can be the value of a physical address, a symbol or the address of a + symbol). It can take as arguments (preceded by :code:`/`): the format + to display the data in (:code:`x` for hexadecimal, :code:`d` for + decimal, etc.), how many memory units to display and the size of a + memory unit. + +- :command:`disassemble` - Used to disassemble a function. + +- :command:`p` (print) - Used to evaluate and show the value of an + expression. The format to show the data in can be specified as + an argument (:code:`/x` for hexadecimal, :code:`/d` for decimal, etc.). + +The analysis of the kernel image is a method of static analysis. If we +want to perform dynamic analysis (analyzing how the kernel runs, not +only its static image) we can use :file:`/proc/kcore`; this is a dynamic +image (in memory) of the kernel. + +.. code-block:: bash + + student@eg106$ gdb ~/src/linux/vmlinux /proc/kcore + Core was generated by `root=/dev/hda3 ro'. + #0 0x00000000 in ?? () + (gdb) p sys_call_table + $1 = -1072579496 + (gdb) p /x sys_call_table + $2 = 0xc011bc58 + (gdb) p /x &sys_call_table + $3 = 0xc02e535c + (gdb) x/16 &sys_call_table + 0xc02e535c ``: 0xc011bc58 0xc011482a 0xc01013d3 0xc014363d + 0xc02e536c ``: 0xc014369f 0xc0142d4e 0xc0142de5 0xc011548b + 0xc02e537c ``: 0xc0142d7d 0xc01507a1 0xc015042c 0xc0101431 + 0xc02e538c ``: 0xc014249e 0xc0115c6c 0xc014fee7 0xc0142725 + +Using the dynamic image of the kernel is useful for detecting `rootkits `__. + +- `Linux Device Drivers 3rd Edition - Debuggers and Related Tools `__ +- `Detecting Rootkits and Kernel-level Compromises in Linux `__ +- `User-Mode Linux `__ + +Getting a stack trace +--------------------- + +Sometimes, you will want information about the trace the execution +reaches a certain point. You can determine this information using +:command:`cscope` or LXR, but some function are called from many +execution paths, which makes this method difficult. + +In these situations, it is useful to get a stack trace, which can be +simply done using the function :code:`dump_stack()`. + +Documentation +============= + +Kernel development is a difficult process, compared to user space +programming. The API is different and the complexity of the subsystems +in kernel requires additional preparation. The associated documentation +is heterogeneous, sometimes requiring the inspection of multiple sources +to have a more complete understanding of a certain aspect. + +The main advantages of the Linux kernel are the access to sources and +the open development system. Because of this, the Internet offers a +larger number of documentation for the kernel. + +A few links related to the Linux kernel are shown bellow: + +- `KernelNewbies `__ +- `KernelNewbies - Kernel Hacking `__ +- `Kernel Analysis - HOWTO `__ +- `Linux Kernel Programming `__ +- `Linux kernel - Wikibooks `__ + +The links are not comprehensive. Using `The Internet `__ and +`kernel source code `__ is essential. + +Exercises +========= + +Remarks +------- + +.. note:: + + - Usually, the steps used to develop a kernel module are the + following: + + - editing the module source code (on the physical machine); + - module compilation (on the physical machine); + - generation of the minimal image for the virtual machine; + this image contains the kernel, your module, busybox and + eventually test programs; + - starting the virtual machine using QEMU; + - running the tests in the virtual machine. + + - When using cscope, use :file:`~/src/linux`. + If there is no :file:`cscope.out` file, you can generate it using + the command :command:`make ARCH=x86 cscope`. + + - You can find more details about the virtual machine at + :ref:`vm_link`. + +.. important:: + Before solving an exercice, **carefully** read all its bullets. + +1. Booting the virtual machine +------------------------------ + +A summary of the virtual machine infrastructure: + +- :file:`~/src/linux` - Linux kernel sources, needed to + compile modules. The directory contains the file :file:`cscope.out`, + used for navigation in the source tree. + +- :file:`~/src/linux/tools/labs/qemu`- scripts and auxiliary + files used to generate and run the QEMU VM. + +To start the VM, run :command:`make boot` in the directory :file:`~/src/linux/tools/labs`: + +.. code-block:: shell + + student@eg106:~$ cd ~/src/linux/tools/labs + student@eg106:~/src/linux/tools/labs$ make boot + +By default, you will not get a prompt or any graphical interface, but you can connect to +a console exposed by the virtual machine using :command:`minicom` or :command:`screen`. + +.. code-block:: shell + + student@eg106:~/src/linux/tools/labs$ minicom -D serial.pts + + + + qemux86 login: + Poky (Yocto Project Reference Distro) 2.3 qemux86 /dev/hvc0 + +Alternatively, you can start the virtual machine with graphical interface support, using +the :command:`QEMU_DISPLAY=sdl make boot`. + +.. note:: + To access the virtual machine, at the login prompt, enter the + username :code:`root`; there is no need to enter a password. + The virtual machine will start with the permissions of the + root account. + +2. Adding and using a virtual disk +---------------------------------- + +.. note:: If you don't have the file :file:`mydisk.img`, you can download + it from the address http://elf.cs.pub.ro/so2/res/laboratoare/mydisk.img. + The file must be placed in :file:`tools/labs`. + +In the :file:`~/src/linux/tools/labs` directory, you have a new virtual +machine disk, in the file :file:`mydisk.img`. We want to add the disk +to the virtual machine and use it within the virtual machine. + +Edit :file:`qemu/Makefile` and add :code:`-drive file=mydisk.img,if=virtio,format=raw` +to the :code:`QEMU_OPTS` variable. + +.. note:: There are already two disks added to qemu (disk1.img and disk2.img). You will need + to add the new one after them. In this case, the new disk can be accessed as + :file:`/dev/vdd` (vda is the root partition, vdb is disk1 and vdc is disk2). + +.. hint:: You do not need to manually create the entry for the new disk in :file:`/dev` + because the virtual machine uses :command:`devtmpfs`. + +Run :code:`make` in :file:`tools/labs` to boot the virtual machine. +Create :file:`/test` directory and try to mount the new disk: + +.. code-block:: bash + + mkdir /test + mount /dev/vdd /test + +The reason why we can not mount the virtual disk is because we do not have support in the +kernel for the filesystem with which the :file:`mydisk.img` is formatted. You will need +to identify the filesystem for :file:`mydisk.img` and compile kernel support for that filesystem. + +Close the virtual machine (close the QEMU window, you do not need to use another command). +Use the :command:`file` command on the physical machine to find out with which filesystem +the :file:`mydisk.img` file is formatted. You will identify the :command:`btrfs` file system. + +You will need to enable :command:`btrfs` support in the kernel and recompile the kernel image. + +.. warning:: If you receive an error while executing the :command:`make menuconfig` + command, you probably do not have the :command:`libncurses5-dev` + package installed. Install it using the command: + + :: + + sudo apt-get install libncurses5-dev + +.. hint:: Enter the :file:`~/src/linux/` subdirectory. Run :command:`make menuconfig` + and go to the *File systems* section. Enable *Btrfs filesystem support*. + You will need to use the builtin option (not the module), i.e. :command:`<*>` must appear + next to the option (**not** :command:``). + + Save the configuration you have made. Use the default configuration file (:file:`config`). + + In the kernel source subdirectory (:file:`~/src/linux/`) recompile using the command: + + :: + + make + + To wait less, you can use the :command:`-j` option run multiple jobs in parallel. + Generally, it is recommended to use :command:`number of CPUs+1`: + + :: + + make -j5 + +After the kernel recompilation finishes, **restart** the QEMU virtual machine: +that is, launch the :command:`make` command in the subdirectory. You +do not need to copy anything, because the :file:`bzImage` file is a symlink to the kernel +image you just recompiled. + +Inside the QEMU virtual machine, repeat the :command:`mkdir` and :command:`mount` operations. +With support for the :command:`btrfs` filesystem, now :command:`mount` will finish successfully. + +.. note:: When doing your homework, there is no need to recompile the kernel + because you will only use kernel modules. However, it is important + to be familiar with configuring and recompiling a kernel. + + If you still plan to recompile the kernel, make a backup of the bzImage + file (follow the link in ~/src/linux for the full path). This will allow + you to return to the initial setup in order to have an environment + identical to the one used by vmchecker. + +3. GDB and QEMU +--------------- + +We can investigate and troubleshoot the QEMU virtual machine in real time. + +.. note:: You can also use the :command:`GDB Dashboard` plugin for a user-friendly interface. + :command:`gdb` must be compiled with Python support. + + In order to install it, you can just run: + :: + + wget -P ~ git.io/.gdbinit + +To do this, we start the QEMU virtual machine first. Then, we can connect +with :command:`gdb` to **a running QEMU virtual machine** using the command + +:: + + make gdb + +We used the QEMU command with the :command:`-s` parameter, which means +listening to port :code:`1234` from :command:`gdb`. We can do debugging +using a **remote target** for :command:`gdb`. The existing :file:`Makefile` +takes care of the details. + +When you attach a debugger to a process, the process is suspended. +You can add breakpoints and inspect the current status of the process. + +Attach to the QEMU virtual machine (using the :command:`make gdb` command) +and place a breakpoint in the :code:`sys_access` function using the +following command in the :command:`gdb` console: + +:: + + break sys_access + +At this time, the virtual machine is suspended. To continue executing it (up to the possible call +of the :code:`sys_access` function), use the command: + +:: + + continue + +in the :command:`gdb` console. + +At this time, the virtual machine is active and has a usable console. +To make a :code:`sys_access` call, issue a :command:`ls` command. +Note that the virtual machine was again suspended by :command:`gdb` +and the corresponding :code:`sys_access` callback message appeared within the :command:`gdb` console. + +Trace code execution using :command:`step` instruction, :command:`continue` or :command:`next` +instruction. You probably do not understand everything that happens, so use commands +such as :command:`list` and :command:`backtrace` to trace the execution. + +.. hint:: At the :command:`gdb` prompt, you can press :command:`Enter` + (without anything else) to rerun the last command. + +4. GDB spelunking +----------------- + +Use :command:`gdb` to display the source code of the function that creates kernel threads +(:code:`kernel_thread`). + +.. note:: You can use GDB for static kernel analysis using, in the kernel source directory, + a command such as: + + :: + + gdb vmlinux + + Go over the `gdb (Linux) <#gdb-linux>`__ section of the lab. + +Use :command:`gdb` to find the address of the :code:`jiffies` variable in memory and its contents. +The :code:`jiffies` variable holds the number of ticks (clock beats) since the system started. + +.. hint:: To track the value of the jiffies variable, use dynamic analysis in :command:`gdb` + by running the command: + + :: + + make gdb + + as in the previous exercise. + + Go over the `gdb (Linux) <#gdb-linux>`__ section of the lab. + +.. hint:: The :code:`jiffies` is a 64-bit variable. + You can see that its address is the same as the :code:`jiffies_64` variable. + + To explore the contents of a 64-bit variable, use in the :command:`gdb` console the command: + + :: + + x/gx & jiffies + + If you wanted to display the contents of the 32-bit variable, + you would use in the :command:`gdb` console the command: + + :: + + x/wx & jiffies + +5. Cscope spelunking +-------------------- + +Use LXR or cscope in the :file:`~/src/linux/` directory to discover +the location of certain structures or functions. + +Cscope index files are already generated. Use :command:`vim` and other related commands +to scroll through the source code. For example, use the command: + +:: + + vim + +for opening the :command:`vim` editor. Afterwards, inside the editor, use commands such as: + +:command:`:cs find g task\_struct`. + +Find the file in which the following data types are defined: + +- ``struct task_struct`` + +- ``struct semaphore`` + +- ``struct list_head`` + +- ``spinlock_t`` + +- ``struct file_system_type`` + +.. hint:: For a certain structure, only its name needs to be searched. + + For instance, in the case of :command:`struct task_struct`, + search for the :command:`task_struct` string. + +Usually, you will get more matches. To locate the one you are interested in, do the following: + +#. List all matches by using, in :command:`vim`, :command:`:copen` command. + +#. Look for the right match (where the structure is defined) by looking for an open character + (:command:`{`), a single character on the structure definition line. To search for the open + braid you use in :command:`vim` the construction :command:`/{`. + +#. On the respective line, press :command:`Enter` to get into the source code where the variable + is defined. + +#. Close the secondary window using the command: :command:`:cclose` command. + +Find the file in which the following global kernel variables are declared: + +- ``sys_call_table`` + +- ``file_systems`` + +- ``current`` + +- ``chrdevs`` + +.. hint:: To do this, use a :command:`vim` command with the syntax: + + :command:`:cs f g ` + + where :command:`` is the name of the symbol being searched. + +Find the file in which the following functions are declared: + +- ``copy_from_user`` + +- ``vmalloc`` + +- ``schedule_timeout`` + +- ``add_timer`` + +.. hint:: To do this, use a :command:`vim` command with the syntax: + + :command:`:cs f g ` + + where :command:`` is the name of the symbol being searched. + +Scroll through the following sequence of structures: + +- ``struct task_struct`` + +- ``struct mm_struct`` + +- ``struct vm_area_struct`` + +- ``struct vm_operations_struct`` + +That is, you access a structure and then you find fields with the data type of the +next structure, access the respective fields and so on. +Note in which files these structures are defined; this will be useful to the following labs. + + +.. hint:: In order to search for a symbol in :command:`vim` (with :command:`cscope` support) + when the cursor is placed on it, use the :command:`Ctrl+]` keyboard shortcut. + + To return to the previous match (the one before search/jump), use the + :command:`Ctrl+o` keyboard shortcut. + + To move forward with the search (to return to matches before :command:`Ctrl+o`), + use the :command:`Ctrl+i` keyboard shortcut. + +Following the above instructions, find and go through the function call sequence: + +- ``bio_alloc`` + +- ``bio_alloc_bioset`` + +- ``bvec_alloc`` + +- ``kmem_cache_alloc`` + +- ``slab_alloc`` + +.. note:: Read `cscope <#cscope>`__ or `LXR Cross-Reference <#lxr-cross-reference>`__ sections of the lab. diff --git a/Documentation/teaching/labs/kernel-virtmem-map.png b/Documentation/teaching/labs/kernel-virtmem-map.png new file mode 100644 index 00000000000000..25ffb7a60e94de Binary files /dev/null and b/Documentation/teaching/labs/kernel-virtmem-map.png differ diff --git a/Documentation/teaching/labs/kernel_api.rst b/Documentation/teaching/labs/kernel_api.rst new file mode 100644 index 00000000000000..b9ca9195333fbe --- /dev/null +++ b/Documentation/teaching/labs/kernel_api.rst @@ -0,0 +1,855 @@ +========== +Kernel API +========== + +Lab objectives +============== + + * Familiarize yourself with the basic Linux kernel API + * Description of memory allocation mechanisms + * Description of locking mechanisms + +Overview +======== + +Inside the current lab we present a set of concepts and basic functions required +for starting Linux kernel programming. It is important to note that kernel +programming differs greatly from user space programming. The kernel is a +stand-alone entity that can not use libraries in user-space (not even libc). +As a result, the usual user-space functions (printf, malloc, free, open, read, +write, memcpy, strcpy, etc.) can no longer be used. In conclusion, kernel +programming is based on a totally new and independent API that is unrelated to +the user-space API, whether we refer to POSIX or ANSI C (standard C language +library functions). + +Accessing memory +================ + +An important difference in kernel programming is how to access and allocate +memory. Due to the fact that kernel programming is very close to the physical +machine, there are important rules for memory management. First, it works with +several types of memory: + + * Physical memory + * Virtual memory from the kernel address space + * Virtual memory from a process's address space + * Resident memory - we know for sure that the accessed pages are present in + physical memory + +Virtual memory in a process's address space can not be considered resident due +to the virtual memory mechanisms implemented by the operating system: pages may +be swapped or simply may not be present in physical memory as a result of the +demand paging mechanism. The memory in the kernel address space can be resident +or not. Both the data and code segments of a module and the kernel stack of a +process are resident. Dynamic memory may or may not be resident, depending on +how it is allocated. + +When working with resident memory, things are simple: memory can be accessed at +any time. But if working with non-resident memory, then it can only be accessed +from certain contexts. Non-resident memory can only be accessed from the +process context. Accessing non-resident memory from the context of an +interrupt has unpredictable results and, therefore, when the operating +system detects such access, it will take drastic measures: blocking or +resetting the system to prevent serious corruption. + +The virtual memory of a process can not be accessed directly from the kernel. +In general, it is totally discouraged to access the address space of a process, +but there are situations where a device driver needs to do it. The typical case +is where the device driver needs to access a buffer from the user-space. In +this case, the device driver must use special features and not directly access +the buffer. This is necessary to prevent access to invalid memory areas. + +Another difference from the user-space scheduling, relative to memory, is due to +the stack, a stack whose size is fixed and limited. A stack of 4K is used in +Linux, and a stack of 12K is used in Windows. For this reason, the +allocation of large structures on stack or the use of recursive calls should +be avoided. + +Contexts of execution +===================== + +In relation to kernel execution, we distinguish two contexts: process context +and interrupt context. We are in the process context when we run code as a +result of a system call or when we run in the context of a kernel thread. When +we run in a routine to handle an interrupt or a deferrable action, we run in +an interrupt context. + +Some of the kernel API calls can block the current process. Common examples are +using a semaphore or waiting for a condition. In this case, the process is +put into the ``WAITING`` state and another process is running. An interesting +situation occurs when a function that can lead to the current process to be +suspended, is called from an interrupt context. In this case, there is no +current process, and therefore the results are unpredictable. Whenever the +operating system detects this condition will generate an error condition that +will cause the operating system to shut down. + +Locking +======= + +One of the most important features of kernel programming is parallelism. Linux +supports SMP systems with multiple processors and kernel preemptivity. This +makes kernel programming more difficult because access to global variables must +be synchronized with either spinlock primitives or blocking primitives. Although +it is recommended to use blocking primitives, they can not be used in an +interrupt context, so the only locking solution in the context of an interrupt +is spinlocks. + +Spinlocks are used in order to achieve mutual exclusion. When it can not get +access to the critical region, it does not suspend the current process, but it +uses the busy-waiting mechanism (waiting in a :c:func:`while` loop for the lock +to be released). +The code that runs in the critical region protected by a spinlock is not allowed +to suspend the current process (it must adhere to the execution conditions in +the interrupt context). Moreover, the CPU will not be released except for +the case of an interrupt. Due to the mechanism used, it is important that a +spinlock is being held as little time as possible. + +Preemptivity +============ + +Linux uses preemptive kernels. The notion of preemptive multitasking should not +be confused with the notion of a preemptive kernel. The notion of preemptive +multitasking refers to the fact that the operating system forcefully interrupts +a process running in user space when its quantum (time slice) expires, in order +to run another process. +A kernel is preemptive if a process running in kernel mode (as a result of a +system call) can be interrupted so that another process is being run. + +Because of preemptivity, when we share resources between two portions of code +that can run from different process contexts, we need to protect ourselves with +synchronization primitives, even in the case of a single processor. + +Linux Kernel API +================ + +Convention indicating errors +---------------------------- + +For Linux kernel programming, the convention used for calling functions to +indicate success is the same as in UNIX programming: 0 for success, or a value +other than 0 for failure. +For failures, negative values are returned as shown in the example below: + +.. code-block:: c + + if (alloc_memory() != 0) + return -ENOMEM; + + if (user_parameter_valid() != 0) + return -EINVAL; + +The exhaustive list of errors and a summary explanation can be found in +:file:`include/asm-generic/errno-base.h` and in +:file:`includes/asm-generic/ernno.h`. + +Strings of characters +--------------------- + +In Linux, the kernel programmer is provided with the usual routine functions: +:c:func:`strcpy`, :c:func:`strncpy`, :c:func:`strlcpy`, :c:func:`strcat`, +:c:func:`strncat`, :c:func:`strlcat`, :c:func:`strcmp`, :c:func:`strncmp`, +:c:func:`strnicmp`, :c:func:`strchr`, :c:func:`strnchr`, :c:func:`strrchr`, +:c:func:`strstr`, :c:func:`strlen`, :c:func:`memset`, :c:func:`memmove`, +:c:func:`memcmp`, etc. These functions are declared in the +:file:`include/linux/string.h` header and are implemented in the kernel in the +:file:`lib/string.c` file. + +printk +------ + +The printf equivalent in the kernel is printk, defined in +:file:`include/linux/printk.h`. The :c:func:`printk` syntax is very similar +to :c:func:`printf`. The first +parameter of :c:func:`printk` decides the log category in which the current log +falls into: + +.. code-block:: c + + #define KERN_EMERG "<0>" /* system is unusable */ + #define KERN_ALERT "<1>" /* action must be taken immediately */ + #define KERN_CRIT "<2>" /* critical conditions */ + #define KERN_ERR "<3>" /* error conditions */ + #define KERN_WARNING "<4>" /* warning conditions */ + #define KERN_NOTICE "<5>" /* normal but significant condition */ + #define KERN_INFO "<6>" /* informational */ + #define KERN_DEBUG "<7>" /* debug-level messages */ + +Thus, a warning message in the kernel would be sent with: + +.. code-block:: c + + printk(KERN_WARNING "my_module input string %s\n", buff); + + +If the logging level is missing from the :c:func:`printk` call, logging is done +with the default level at the time of the call. One thing to keep in mind is +that messages sent with :c:func:`printk` are only visible on the console if and +only if their level exceeds the default level set on the console. + +To reduce the size of lines when using :c:func:`printk`, it is recommended to +use the following help functions instead of directly using the :c:func:`printk` +call: + +.. code-block:: c + + pr_emerg(fmt, ...); /* similar to printk(KERN_EMERG pr_fmt(fmt), ...); */ + pr_alert(fmt, ...); /* similar to printk(KERN_ALERT pr_fmt(fmt), ...); */ + pr_crit(fmt, ...); /* similar to printk(KERN_CRIT pr_fmt(fmt), ...); */ + pr_err(fmt, ...); /* similar to printk(KERN_ERR pr_fmt(fmt), ...); */ + pr_warning(fmt, ...); /* similar to printk(KERN_WARNING pr_fmt(fmt), ...); */ + pr_warn(fmt, ...); /* similar to cu printk(KERN_WARNING pr_fmt(fmt), ...); */ + pr_notice(fmt, ...); /* similar to printk(KERN_NOTICE pr_fmt(fmt), ...); */ + pr_info(fmt, ...); /* similar to printk(KERN_INFO pr_fmt(fmt), ...); */ + +A special case is :c:func:`pr_debug` that calls the :c:func:`printk` function +only when the :c:macro:`DEBUG` macro is defined or if dynamic debugging is used. + + +Memory allocation +----------------- + +In Linux only resident memory can be allocated, using :c:func:`kmalloc` call. +A typical :c:func:`kmalloc` call is presented below: + +.. code-block:: c + + #include + + string = kmalloc (string_len + 1, GFP_KERNEL); + if (!string) { + //report error: -ENOMEM; + } + +As you can see, the first parameter indicates the size in bytes of the allocated +area. The function returns a pointer to a memory area that can be directly used +in the kernel, or :c:macro:`NULL` if memory could not be allocated. The second +parameter specifies how allocation should be done and the most commonly used +values for this are: + + * :c:data:`GFP_KERNEL` - using this value may cause the current process to + be suspended. Thus, it can not be used in the interrupt context. + * :c:data:`GFP_ATOMIC` - using this value it ensures that the + :c:func:`kmalloc` function does not suspend the current process. It can be + used anytime. + +The counterpart to the :c:func:`kmalloc` function is :c:func:`kfree`, a function +that receives as argument an area allocated by :c:func:`kmalloc`. This function +does not suspend the current process and can therefore be called from any +context. + +lists +----- + +Because linked lists are often used, the Linux kernel API provides a unified +way of defining and using lists. This involves using a +:c:type:`struct list_head` element in the structure we want to consider as a +list node. The :c:type:`struct list_head` is defined in +:file:`include/linux/list.h` along with all the other functions that manipulate +the lists. The following code shows the definition of +the :c:type:`struct list_head` and the use of an element of this type in another +well-known structure in the Linux kernel: + +.. code-block:: c + + struct list_head { + struct list_head *next, *prev; + }; + + struct task_struct { + ... + struct list_head children; + ... + }; + +The usual routines for working with lists are the following: + + * :c:macro:`LIST_HEAD(name)` is used to declare the sentinel of a list + * :c:func:`INIT_LIST_HEAD(struct list_head *list)` is used to initialize the + sentinel of a list when dynamic allocation is made, by setting the value of + the :c:data:`next` and :c:data:`prev` to list fields. + * :c:func:`list_add(struct list_head *new, struct list_head *head)` adds the + :c:data:`new` element after the :c:data:`head` element. + * :c:func:`list_del(struct list_head *entry)` deletes the item at the + :c:data:`entry` address of the list it belongs to. + * :c:macro:`list_entry(ptr, type, member)` returns the structure with the + type :c:type:`type` that contains the element :c:data:`ptr` from the list, + having the name :c:member:`member` within the structure. + * :c:macro:`list_for_each(pos, head)` iterates over a list using + :c:data:`pos` as a cursor. + * :c:macro:`list_for_each_safe(pos, n, head)` iterates over a list using + :c:data:`pos` as a cursor and :c:data:`n` as a temporary cursor. + This macro is used to delete an item from the list. + +The following code shows how to use these routines: + +.. code-block:: c + + #include + #include + + struct pid_list { + pid_t pid; + struct list_head list; + }; + + LIST_HEAD(my_list); + + static int add_pid(pid_t pid) + { + struct pid_list *ple = kmalloc(sizeof *ple, GFP_KERNEL); + + if (!ple) + return -ENOMEM; + + ple->pid = pid; + list_add(&ple->list, &my_list); + + return 0; + } + + static int del_pid(pid_t pid) + { + struct list_head *i, *tmp; + struct pid_list *ple; + + list_for_each_safe(i, tmp, &my_list) { + ple = list_entry(i, struct pid_list, list); + if (ple->pid == pid) { + list_del(i); + kfree(ple); + return 0; + } + } + + return -EINVAL; + } + + static void destroy_list(void) + { + struct list_head *i, *n; + struct pid_list *ple; + + list_for_each_safe(i, n, &my_list) { + ple = list_entry(i, struct pid_list, list); + list_del(i); + kfree(ple); + } + } + +The evolution of the list can be seen in the following figure: + +.. image:: list_evolution.png + :width: 85% + +You see the stack type behavior introduced by the :c:macro:`list_add` macro, +and the use of a sentinel. + +From the above example, it can be noticed that the way to define and use a list +(double-linked) is generic and, at the same time, it does not introduce an +additional overhead. The :c:type:`struct list_head` is used to maintain the +links between the list elements. It can be noticed that iterating over the list +is also done with this structure, and that retrieving a list element can be done +using :c:macro:`list_entry`. This idea of implementing and using a list is not +new, as it has already been described in The Art of Computer Programming by +Donald Knuth in the 1980s. + +Several kernel list functions and macro definitions are presented and explained +in the :file:`include/linux/list.h` header. + +Spinlock +-------- + +:c:type:`spinlock_t` (defined in :file:`linux/spinlock.h`) is the basic type +that implements the spinlock concept in Linux. It describes a spinlock, and the +operations associated with a spinlock are :c:func:`spin_lock_init`, +:c:func:`spin_lock`, :c:func:`spin_unlock`. An example of use is given below: + +.. code-block:: c + + #include + + DEFINE_SPINLOCK(lock1); + spinlock_t lock2; + + spin_lock_init(&lock2); + + spin_lock(&lock1); + /* critical region */ + spin_unlock(&lock1); + + spin_lock(&lock2); + /* critical region */ + spin_unlock(&lock2); + + +In Linux, you can use reader-writer spinlocks, useful for readers-writers +problems. +These types of locks are identified by :c:type:`rwlock_t`, and the functions +that can work on a reader-writer spinlock are +:c:func:`rwlock_init`, +:c:func:`read_lock`, +:c:func:`write_lock`. +An example of use: + + +.. code-block:: c + + #include + + DEFINE_RWLOCK(lock); + + struct pid_list { + pid_t pid; + struct list_head list; + }; + + int have_pid(struct list_head *lh, int pid) + { + struct list_head *i; + void *elem; + + read_lock(&lock); + list_for_each(i, lh) { + struct pid_list *pl = list_entry(i, struct pid_list, list); + if (pl->pid == pid) { + read_unlock(&lock); + return 1; + } + } + read_unlock(&lock); + + return 0; + } + + void add_pid(struct list_head *lh, struct pid_list *pl) + { + write_lock(&lock); + list_add(&pl->list, lh); + write_unlock(&lock); + } + +mutex +----- + +A mutex is a variable of the :c:type:`struct mutex` type (defined in +:file:`linux/mutex.h`). +Functions and macros for working with mutexes are listed below: + +.. code-block:: c + + #include + + /* functions for mutex initialization */ + void mutex_init(struct mutex *mutex); + DEFINE_MUTEX(name); + + /* functions for mutex acquire */ + void mutex_lock(struct mutex *mutex); + + /* functions for mutex release */ + void mutex_unlock(struct mutex *mutex); + +Operations are similar to classic mutex operations in user-space or spinlock +operations: the mutex is acquired before entering the critical region and it is +released after exiting the critical region. Unlike spinlocks, these operations +can only be used in process context. + +.. _atomic-variables: + +Atomic variables +---------------- + +Often, you only need to synchronize access to a simple variable, such as a +counter. For this, an :c:type:`atomic_t` type can be used (defined in +:file:`include/linux/atomic.h`), that holds an integer value. Below are some +operations that can be performed on an :c:type:`atomic_t` variable. + +.. code-block:: c + + #include + + void atomic_set(atomic_t *v, int i); + int atomic_read(atomic_t *v); + void atomic_add(int i, atomic_t *v); + void atomic_sub(int i, atomic_t *v); + void atomic_inc(atomic_t *v); + void atomic_dec(atomic_t *v); + int atomic_inc_and_test(atomic_t *v); + int atomic_dec_and_test(atomic_t *v); + int atomic_cmpxchg(atomic_t *v, int old, int new); + +Use of atomic variables +*********************** + +A common way of using atomic variables is to store the status of an action +(e.g. a flag). So we can use an atomic variable to mark exclusive actions. For +example, we consider that an atomic variable can have the LOCKED and UNLOCKED +values, and if the respective variable equals LOCKED then a specific function +should return -EBUSY. +Such an usage is shown schematically in the code below: + +.. code-block:: c + + #define LOCKED 0 + #define UNLOCKED 1 + + static atomic_t flag; + + static int my_acquire(void) + { + int initial_flag; + + /* + * Check if flag is UNLOCKED; if so, lock it and do it atomically. + * + * This is the atomic equivalent of + * if (flag == UNLOCKED) + * flag = LOCKED; + * else + * return -EBUSY; + */ + initial_flag = atomic_cmpxchg(&flag, UNLOCKED, LOCKED); + if (initial_flag == LOCKED) { + printk(KERN_ALERT "Already locked.\n"); + return -EBUSY; + } + + /* Do your thing after getting the lock. */ + [...] + } + + static void my_release(void) + { + /* Release flag; mark it as unlocked. */ + atomic_set(&flag, UNLOCKED); + } + + void my_init(void) + { + [...] + /* Atomic variable is initially unlocked. */ + atomic_set(&flag, UNLOCKED); + + [...] + } + + +The above code is the equivalent of using a trylock (such as +:c:func:`pthread_mutex_trylock`). + +We can also use a variable to store the size of a buffer and for atomic +updates of the respective variable. The code below is such an example: + +.. code-block:: c + + static unsigned char buffer[MAX_SIZE]; + static atomic_t size; + + static void add_to_buffer(unsigned char value) + { + buffer[atomic_read(&size)] = value; + atomic_inc(&size); + } + + static unsigned char remove_from_buffer(void) + { + unsigned char value; + + value = buffer[atomic_read(&size)]; + atomic_dec(&size); + + return value + } + + static void reset_buffer(void) + { + atomic_set(&size, 0); + } + + void my_init(void) + { + [...] + /* Initialized buffer and size. */ + atomic_set(&size, 0); + memset(buffer, 0, sizeof(buffer)); + + [...] + } + +Atomic bitwise operations +------------------------- + +The kernel provides a set of functions (in :file:`asm/bitops.h`) that modify or +test bits in an atomic way. + +.. code-block:: c + + #include + + void set_bit(int nr, void *addr); + void clear_bit(int nr, void *addr); + void change_bit(int nr, void *addr); + int test_and_set_bit(int nr, void *addr); + int test_and_clear_bit(int nr, void *addr); + int test_and_change_bit(int nr, void *addr); + +:c:data:`Addr` represents the address of the memory area whose bits are being +modified or tested and :c:data:`nr` is the bit on which the operation is +performed. + +Exercises +========= + +.. include:: exercises-summary.hrst +.. |LAB_NAME| replace:: kernel_api + +0. Intro +-------- + +Using |LXR|_ find the definitions of the following symbols in the Linux kernel: + + * :c:type:`struct list_head` + * :c:func:`INIT_LIST_HEAD` + * :c:func:`list_add` + * :c:macro:`list_for_each` + * :c:macro:`list_entry` + * :c:macro:`container_of` + * :c:macro:`offsetof` + +1. Memory allocation in Linux kernel +------------------------------------ + +Generate the skeleton for the task named **1-mem** and browse the +contents of the :file:`mem.c` file. Observe the use of :c:func:`kmalloc` +call for memory allocation. + + 1. Compile the source code and load the :file:`mem.ko` module using + :command:`insmod`. + 2. View the kernel messages using the :command:`dmesg` command. + 3. Unload the kernel module using the :command:`rmmod mem` command. + +.. note:: Review the `Memory Allocation`_ section in the lab. + +2. Sleeping in atomic context +----------------------------- + +Generate the skeleton for the task named **2-sched-spin** and browse +the contents of the :file:`sched-spin.c` file. + + 1. Compile the source code and load the module, according the above info: + (:command:`make build` and :command:`make copy`) + 2. Notice that it is waiting for 5 seconds until the insertion + order is complete. + 3. Unload the kernel module. + 4. Look for the lines marked with: ``TODO 0`` to create an atomic + section. Re-compile the source code and reload the module into + the kernel. + +You should now get an error. Look at the stack trace. What is the +cause of the error? + +.. hint:: In the error message, follow the line containing the :c:macro:`BUG` + for a description of the error. You are not allowed to sleep in + atomic context. The atomic context is given by a section + between a lock operation and an unlock on a spinlock. + +.. note:: The + :c:func:`schedule_timeout` function, corroborated with the + :c:macro:`set_current_state` macro, forces the current process to wait + for 5 seconds. + +.. note:: Review the `Contexts of execution`_, `Locking`_ and `Spinlock`_ + sections. + +3. Working with kernel memory +----------------------------- + +Generate the skeleton for the task named **3-memory** directory and +browse the contents of the :file:`memory.c` file. Notice the comments +marked with ``TODO``. You must allocate 4 structures of type :c:type:`struct +task_info` and initialize them (in :c:func:`memory_init`), then print and +free them (in :c:func:`memory_exit`). + + 1. (TODO 1) Allocate memory for :c:type:`struct task_info` structure and + initialize its fields: + + * The :c:member:`pid` field to the PID transmitted as a parameter; + * The :c:member:`timestamp` field to the value of the :c:data:`jiffies` + variable, which holds the number of ticks that have occurred since the + system booted. + + 2. (TODO 2) Allocate :c:type:`struct task_info` for the current process, + the parent process, the next process, the next process of the next + process, with the following information: + + * PID of the current process, which can be retrieved from + :c:type:`struct task_struct` structure, returned by :c:macro:`current` + macro. + + .. hint:: + Search for :c:type:`pid` in :c:type:`task_struct`. + + * PID of the parent process of the current process. + + .. hint:: + Search for the relevant field from :c:type:`struct task_struct` + structure. Look after "parent". + + * PID of the next process from the list of processes, relative to the + current process. + + .. hint:: + Use :c:macro:`next_task` macro, which returns a pointer to the next + process (i.e a :c:type:`struct task_struct` structure). + + * PID of the next process of the next process, relative to the current + process. + + .. hint:: + Call the :c:macro:`next_task` macro 2 times. + + 3. (TODO 3) Display the four structures. + + * Use :c:func:`printk` to display their two fields: + :c:member:`pid` and :c:member:`timestamp`. + + 4. (TODO 4) Release the memory occupied by the structures + (use :c:func:`kfree`). + +.. hint:: + * You can access the current process using :c:macro:`current` + macro. + * Look for the relevant fields in the :c:type:`struct task_struct` + structure (:c:member:`pid`, :c:member:`parent`). + * Use the :c:macro:`next_task` macro. The macro returns the pointer to + the next process (ie. a :c:type:`struct task_struct*` structure). + +.. note:: The :c:type:`struct task_struct` structure contains two fields to + designate the parent of a task: + + * :c:member:`real_parent` points to the process that created the + task or to process with pid 1 (init) if the parent + completed its execution. + * :c:member:`parent` indicates to the current task parent (the + process that will be reported if the task completes + execution). + + In general, the values of the two fields are the same, but + there are situations where they differ, for example when + using the :c:func:`ptrace` system call. + +.. hint:: Review the `Memory allocation`_ section in the lab. + + +4. Working with kernel lists +---------------------------- + +Generate the skeleton for the task named **4-list**. Browse the +contents of the :file:`list.c` file and notice the comments marked with +``TODO``. The current process will add the four structures from the +previous exercise into a list. The list will be built in the +:c:func:`task_info_add_for_current` function which is called when module is +loaded. The list will be printed and deleted in the :c:func:`list_exit` +function and the :c:func:`task_info_purge_list` function. + + 1. (TODO 1) Complete the :c:func:`task_info_add_to_list` function to allocate + a :c:type:`struct task_info` structure and add it to the list. + + 2. (TODO 2) Complete the :c:func:`task_info_purge_list` function to delete + all the elements in the list. + + 3. Compile the kernel module. Load and unload the module by + following the messages displayed by the kernel. + +.. hint:: Review the labs `Lists`_ section. When deleting items from + the list, you will need to use either the + :c:macro:`list_for_each_safe` or :c:macro:`list_for_each_entry_safe` + macros. + +5. Working with kernel lists for process handling +------------------------------------------------- + +Generate the skeleton for the task named **5-list-full**. Browse the +contents of the :file:`list-full.c` and notice comments marked with +``TODO``. In addition to the :file:`4-list` functionality we add the +following: + + * A :c:member:`count` field showing how many times a process has been "added" + to the list. + * If a process is "added" several times, no new entry is created in + the list, but: + + * Update the :c:member:`timestamp` field. + * Increment :c:member:`count`. + + * To implement the counter facility, add a :c:func:`task_info_find_pid` + function that searches for a pid in the existing list. + + * If found, return the reference to the :c:type:`task_info` struct. If + not, return :c:macro:`NULL`. + + * An expiration facility. If a process was added more than 3 + seconds ago and if it does not have a :c:member:`count` greater than 5 then + it is considered expired and is removed from the list. + * The expiration facility is already implemented in the + :c:func:`task_info_remove_expired` function. + + 1. (TODO 1) Implement the :c:func:`task_info_find_pid` function. + 2. (TODO 2) Change a field of an item in the list so it does not + expire. It must not satisfy a part of the expiration condition + from :c:func:`task_info_remove_expired`. + + .. hint:: For ``TODO 2``, extract the first element from the list (the one + referred by :c:member:`head.next`) and set the :c:member:`count` + field to a large enough value. Use :c:func:`atomic_set` function. + + 3. Compile, copy, load and unload the kernel module following the displayed + messages. + Kernel module loading will take some time, because :c:func:`sleep` is + being called by :c:func:`schedule_timeout` function. + +6. Synchronizing list work +-------------------------- + +Generate the skeleton for the task named **6-list-sync**. + + 1. Browse the code and look for ``TODO 1`` string. + 2. Use a spinlock or a read-write lock to synchronize access to the + list. + 3. Compile, load and unload the kernel module. + +.. important:: Always lock data, not code! + +.. note:: Read `Spinlock`_ section of the lab. + +7. Test module calling in our list module +----------------------------------------- + +Generate the skeleton for the task named **7-list-test** and browse +the contents of the :file:`list-test.c` file. We'll use it as a test +module. It will call functions exported by the **6-list-sync** +task. The exported functions are the ones marked with **extern** in +:file:`list-test.c` file. + +To export the above functions from the module located at :file:`6-list-sync/` +directory, the following steps are required: + + 1. Functions must not be static. + 2. Use the :c:macro:`EXPORT_SYMBOL` macro to export the kernel symbols. For + example: :c:macro:`EXPORT_SYMBOL(task_info_remove_expired);`. The + macro must be used for each function after the function is defined. + Browse the code and look for the ``TODO 2`` string in the + :file:`list-sync.c`. + 3. Remove from the module from **6-list-sync** the code that avoids the + expiration of a list item (it is in contradiction to our exercise). + 4. Compile and load the module from :file:`6-list-sync/`. Once loaded, it + exposes exported functions and can be used by the test + module. You can check this by searching for the function names + in :file:`/proc/kallsyms` before and after loading the module. + 5. Compile the test module and then load it. + 6. Use :command:`lsmod` to check that the two modules have been loaded. + What do you notice? + 7. Unload the kernel test module. + +What should be the unload order of the two modules (the module from +**6-list-sync** and the test module)? What happens if you use another order? diff --git a/Documentation/teaching/labs/kernel_modules.rst b/Documentation/teaching/labs/kernel_modules.rst new file mode 100644 index 00000000000000..c1b0dd0694e8b7 --- /dev/null +++ b/Documentation/teaching/labs/kernel_modules.rst @@ -0,0 +1,1270 @@ +============== +Kernel modules +============== + +Lab objectives +============== + +* creating simple modules +* describing the process of kernel module compilation +* presenting how a module can be used with a kernel +* simple kernel debugging methods + +Overview +======== + +A monolithic kernel, though faster than a microkernel, has the disadvantage of +lack of modularity and extensibility. On modern monolithic kernels, this has +been solved by using kernel modules. A kernel module (or loadable kernel mode) +is an object file that contains code that can extend the kernel functionality +at runtime (it is loaded as needed); When a kernel module is no longer needed, +it can be unloaded. Most of the device drivers are used in the form of kernel +modules. + +For the development of Linux device drivers, it is recommended to download the +kernel sources, configure and compile them and then install the compiled version +on the test /development tool machine. + +An example of a kernel module +============================= + +Below is a very simple example of a kernel module. When loading into the kernel, +it will generate the message :code:`"Hi"`. When unloading the kernel module, the +:code:`"Bye"` message will be generated. + +.. code-block:: c + + #include + #include + #include + + MODULE_DESCRIPTION("My kernel module"); + MODULE_AUTHOR("Me"); + MODULE_LICENSE("GPL"); + + static int dummy_init(void) + { + pr_debug("Hi\n"); + return 0; + } + + static void dummy_exit(void) + { + pr_debug("Bye\n"); + } + + module_init(dummy_init); + module_exit(dummy_exit); + + +The generated messages will not be displayed on the console but will be saved +in a specially reserved memory area for this, from where they will be extracted +by the logging daemon (syslog). To display kernel messages, you can use the +:command:`dmesg` command or inspect the logs: + +.. code-block:: bash + + # cat /var/log/syslog | tail -2 + Feb 20 13:57:38 asgard kernel: Hi + Feb 20 13:57:43 asgard kernel: Bye + + # dmesg | tail -2 + Hi + Bye + +Compiling kernel modules +======================== + +Compiling a kernel module differs from compiling an user program. First, other +headers should be used. Also, the module should not be linked to libraries. +And, last but not least, the module must be compiled with the same options as +the kernel in which we load the module. For these reasons, there is a standard +compilation method (:code:`kbuild`). This method requires the use of two files: +a :file:`Makefile` and a :file:`Kbuild` file. + +Below is an example of a :file:`Makefile`: + +.. code-block:: bash + + KDIR = /lib/modules/`uname -r`/build + + kbuild: + make -C $(KDIR) M=`pwd` + + clean: + make -C $(KDIR) M=`pwd` clean + +And the example of a :file:`Kbuild` file used to compile a module: + +.. code-block:: bash + + EXTRA_CFLAGS = -Wall -g + + obj-m = modul.o + + +As you can see, calling :command:`make` on the :file:`Makefile` file in the +example shown will result in the :command:`make` invocation in the kernel +source directory (``/lib/modules/`uname -r`/build``) and referring to the +current directory (``M = `pwd```). This process ultimately leads to reading +the :file:`Kbuild` file from the current directory and compiling the module +as instructed in this file. + +.. note:: For labs we will configure different :command:`KDIR`, according to + the virtual machine specifications: + + .. code-block:: bash + + KDIR = /home/student/src/linux + [...] + +A :file:`Kbuild` file contains one or more directives for compiling a kernel +module. The easiest example of such a directive is ``obj-m = +module.o``. Following this directive, a kernel module (:code:`ko` - kernel +object) will be created, starting from the ``module.o`` file. ``module.o`` will +be created starting from ``module.c`` or ``module.S``. All of these files can +be found in the :file:`Kbuild`'s directory. + +An example of a :file:`Kbuild` file that uses several sub-modules is shown +below: + +.. code-block:: bash + + EXTRA_CFLAGS = -Wall -g + + obj-m = supermodule.o + supermodule-y = module-a.o module-b.o + +For the example above, the steps to compile are: + + * compile the :file:`module-a.c` and :file:`module-b.c` sources, + resulting in module-a.o and module-b.o objects + * :file:`module-a.o` and :file:`module-b.o` will then be linked + in :file:`supermodule.o` + * from :file:`supermodule.o` will be created :file:`supermodule.ko` + module + + +The suffix of targets in :file:`Kbuild` determines how they are used, as +follows: + + * M (modules) is a target for loadable kernel modules + + * Y (yes) represents a target for object files to be compiled and then + linked to a module (``$(mode_name)-y``) or within the kernel (``obj-y``) + + * any other target suffix will be ignored by :file:`Kbuild` and will not be + compiled + + +.. note:: These suffixes are used to easily configure the kernel by running the + :command:`make menuconfig` command or directly editing the + :file:`.config` file. This file sets a series of variables that are + used to determine which features are added to the kernel at build + time. For example, when adding BTRFS support with :command:`make + menuconfig`, add the line :code:`CONFIG_BTRFS_FS = y` to the + :file:`.config` file. The BTRFS kbuild contains the line + ``obj-$(CONFIG_BTRFS_FS):= btrfs.o``, which becomes ``obj-y:= + btrfs.o``. This will compile the :file:`btrfs.o` object and will be + linked to the kernel. Before the variable was set, the line became + ``obj:=btrfs.o`` and so it was ignored, and the kernel was build + without BTRFS support. + +For more details, see the :file:`Documentation/kbuild/makefiles.txt` and +:file:`Documentation/kbuild/modules.txt` files within the kernel sources. + + +Loading/unloading a kernel module +================================= + +To load a kernel module, use the :command:`insmod` utility. This utility +receives as a parameter the path to the :file:`*.ko` file in which the module +was compiled and linked. Unloading the module from the kernel is done using +the :command:`rmmod` command, which receives the module name as a parameter. + +.. code-block:: bash + + $ insmod module.ko + $ rmmod module.ko + +When loading the kernel module, the routine specified as a parameter of the +``module_init`` macro will be executed. Similarly, when the module is unloaded +the routine specified as a parameter of the ``module_exit`` will be executed. + +A complete example of compiling and loading/unloading a kernel module is +presented below: + +.. code-block:: bash + + faust:~/lab-01/modul-lin# ls + Kbuild Makefile modul.c + + faust:~/lab-01/modul-lin# make + make -C /lib/modules/`uname -r`/build M=`pwd` + make[1]: Entering directory `/usr/src/linux-2.6.28.4' + LD /root/lab-01/modul-lin/built-in.o + CC [M] /root/lab-01/modul-lin/modul.o + Building modules, stage 2. + MODPOST 1 modules + CC /root/lab-01/modul-lin/modul.mod.o + LD [M] /root/lab-01/modul-lin/modul.ko + make[1]: Leaving directory `/usr/src/linux-2.6.28.4' + + faust:~/lab-01/modul-lin# ls + built-in.o Kbuild Makefile modul.c Module.markers + modules.order Module.symvers modul.ko modul.mod.c + modul.mod.o modul.o + + faust:~/lab-01/modul-lin# insmod modul.ko + + faust:~/lab-01/modul-lin# dmesg | tail -1 + Hi + + faust:~/lab-01/modul-lin# rmmod modul + + faust:~/lab-01/modul-lin# dmesg | tail -2 + Hi + Bye + +Information about modules loaded into the kernel can be found using the +:command:`lsmod` command or by inspecting the :file:`/proc/modules`, +:file:`/sys/module` directories. + +Debugging +========= + +Troubleshooting a kernel module is much more complicated than debugging a +regular program. First, a mistake in a kernel module can lead to blocking the +entire system. Troubleshooting is therefore much slowed down. To avoid reboot, +it is recommended to use a virtual machine (qemu, virtualbox, vmware). + +When a module containing bugs is inserted into the kernel, it will eventually +generate a `kernel oops `_. +A kernel oops is an invalid operation detected by the kernel and can only +be generated by the kernel. For a stable kernel version, it almost certainly +means that the module contains a bug. After the oops appears, the kernel will +continue to work. + +Very important to the appearance of a kernel oops is saving the generated +message. As noted above, messages generated by the kernel are saved in logs and +can be displayed with the :command:`dmesg` command. To make sure that no kernel +message is lost, it is recommended to insert/test the kernel directly from the +console, or periodically check the kernel messages. Noteworthy is that an oops +can occur because of a programming error, but also a because of hardware error. + +If a fatal error occurs, after which the system can not return to a stable +state, a `kernel panic `_ is +generated. + +Look at the kernel module below that contains a bug that generates an oops: + +.. code-block:: c + + /* + * Oops generating kernel module + */ + + #include + #include + #include + + MODULE_DESCRIPTION ("Oops"); + MODULE_LICENSE ("GPL"); + MODULE_AUTHOR ("PSO"); + + #define OP_READ 0 + #define OP_WRITE 1 + #define OP_OOPS OP_WRITE + + static int my_oops_init (void) + { + int *a; + + a = (int *) 0x00001234; + #if OP_OOPS == OP_WRITE + *a = 3; + #elif OP_OOPS == OP_READ + printk (KERN_ALERT "value = %d\n", *a); + #else + #error "Unknown op for oops!" + #endif + + return 0; + } + + static void my_oops_exit (void) + { + } + + module_init (my_oops_init); + module_exit (my_oops_exit); + +.. ** + +Inserting this module into the kernel will generate an oops: + +.. code-block:: bash + + faust:~/lab-01/modul-oops# insmod oops.ko + [...] + + faust:~/lab-01/modul-oops# dmesg | tail -32 + BUG: unable to handle kernel paging request at 00001234 + IP: [] my_oops_init+0x5/0x20 [oops] + *de = 00000000 + Oops: 0002 [#1] PREEMPT DEBUG_PAGEALLOC + last sysfs file: /sys/devices/virtual/net/lo/operstate + Modules linked in: oops(+) netconsole ide_cd_mod pcnet32 crc32 cdrom [last unloaded: modul] + + Pid: 4157, comm: insmod Not tainted (2.6.28.4 #2) VMware Virtual Platform + EIP: 0060:[] EFLAGS: 00010246 CPU: 0 + EIP is at my_oops_init+0x5/0x20 [oops] + EAX: 00000000 EBX: fffffffc ECX: c89d4300 EDX: 00000001 + ESI: c89d4000 EDI: 00000000 EBP: c5799e24 ESP: c5799e24 + DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068 + Process insmod (pid: 4157, ti=c5799000 task=c665c780 task.ti=c5799000) + Stack: + c5799f8c c010102d c72b51d8 0000000c c5799e58 c01708e4 00000124 00000000 + c89d4300 c5799e58 c724f448 00000001 c89d4300 c5799e60 c0170981 c5799f8c + c014b698 00000000 00000000 c5799f78 c5799f20 00000500 c665cb00 c89d4300 + Call Trace: + [] ? _stext+0x2d/0x170 + [] ? __vunmap+0xa4/0xf0 + [] ? vfree+0x21/0x30 + [] ? load_module+0x19b8/0x1a40 + [] ? __mutex_unlock_slowpath+0xd5/0x140 + [] ? trace_hardirqs_on_caller+0x106/0x150 + [] ? sys_init_module+0x8a/0x1b0 + [] ? trace_hardirqs_on_caller+0x106/0x150 + [] ? trace_hardirqs_on_thunk+0xc/0x10 + [] ? sysenter_do_call+0x12/0x43 + Code: 05 34 12 00 00 03 00 00 00 5d c3 eb 0d 90 90 90 90 90 90 90 90 + EIP: [] my_oops_init+0x5/0x20 [oops] SS:ESP 0068:c5799e24 + ---[ end trace 2981ce73ae801363 ]--- + +Although relatively cryptic, the message provided by the kernel to the +appearance of an oops provides valuable information about the error. First line: + +.. code-block:: bash + + BUG: unable to handle kernel paging request at 00001234 + EIP: [] my_oops_init + 0x5 / 0x20 [oops] + +Tells us the cause and the address of the instruction that generated the error. +In our case this is an invalid access to memory. + +Next line + + ``Oops: 0002 [# 1] PREEMPT DEBUG_PAGEALLOC`` + +Tells us that it's the first oops (#1). This is important in the context that +an oops can lead to other oopses. Usually only the first oops is relevant. +Furthermore, the oops code (``0002``) provides information about the error type +(see :file:`arch/x86/include/asm/traps.h`): + + + * Bit 0 == 0 means no page found, 1 means protection fault + * Bit 1 == 0 means read, 1 means write + * Bit 2 == 0 means kernel, 1 means user mode + +In this case, we have a write access that generated the oops (bit 1 is 1). + +Below is a dump of the registers. It decodes the instruction pointer (``EIP``) +value and notes that the bug appeared in the :code:`my_oops_init` function with +a 5-byte offset (``EIP: [] my_oops_init+0x5``). The message also +shows the stack content and a backtrace of calls until then. + +If an invalid read call is generated (``#define OP_OOPS OP_READ``), the message +will be the same, but the oops code will differ, which would now be ``0000``: + +.. code-block:: bash + + faust:~/lab-01/modul-oops# dmesg | tail -33 + BUG: unable to handle kernel paging request at 00001234 + IP: [] my_oops_init+0x6/0x20 [oops] + *de = 00000000 + Oops: 0000 [#1] PREEMPT DEBUG_PAGEALLOC + last sysfs file: /sys/devices/virtual/net/lo/operstate + Modules linked in: oops(+) netconsole pcnet32 crc32 ide_cd_mod cdrom + + Pid: 2754, comm: insmod Not tainted (2.6.28.4 #2) VMware Virtual Platform + EIP: 0060:[] EFLAGS: 00010292 CPU: 0 + EIP is at my_oops_init+0x6/0x20 [oops] + EAX: 00000000 EBX: fffffffc ECX: c89c3380 EDX: 00000001 + ESI: c89c3010 EDI: 00000000 EBP: c57cbe24 ESP: c57cbe1c + DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068 + Process insmod (pid: 2754, ti=c57cb000 task=c66ec780 task.ti=c57cb000) + Stack: + c57cbe34 00000282 c57cbf8c c010102d c57b9280 0000000c c57cbe58 c01708e4 + 00000124 00000000 c89c3380 c57cbe58 c5db1d38 00000001 c89c3380 c57cbe60 + c0170981 c57cbf8c c014b698 00000000 00000000 c57cbf78 c57cbf20 00000580 + Call Trace: + [] ? _stext+0x2d/0x170 + [] ? __vunmap+0xa4/0xf0 + [] ? vfree+0x21/0x30 + [] ? load_module+0x19b8/0x1a40 + [] ? printk+0x0/0x1a + [] ? __mutex_unlock_slowpath+0xd5/0x140 + [] ? trace_hardirqs_on_caller+0x106/0x150 + [] ? sys_init_module+0x8a/0x1b0 + [] ? trace_hardirqs_on_caller+0x106/0x150 + [] ? trace_hardirqs_on_thunk+0xc/0x10 + [] ? sysenter_do_call+0x12/0x43 + Code: 34 12 00 00 c7 04 24 54 30 9c c8 89 44 24 04 e8 58 a0 99 f7 31 + EIP: [] my_oops_init+0x6/0x20 [oops] SS:ESP 0068:c57cbe1c + ---[ end trace 45eeb3d6ea8ff1ed ]--- + +objdump +------- + +Detailed information about the instruction that generated the oops can be found +using the :command:`objdump` utility. Useful options to use are :command:`-d` +to disassemble the code and :command:`-S` for interleaving C code in assembly +language code. For efficient decoding, however, we need the address where the +kernel module was loaded. This can be found in :file:`/proc/modules`. + +Here's an example of using :command:`objdump` on the above module to identify +the instruction that generated the oops: + +.. code-block:: bash + + faust:~/lab-01/modul-oops# cat /proc/modules + oops 1280 1 - Loading 0xc89d4000 + netconsole 8352 0 - Live 0xc89ad000 + pcnet32 33412 0 - Live 0xc895a000 + ide_cd_mod 34952 0 - Live 0xc8903000 + crc32 4224 1 pcnet32, Live 0xc888a000 + cdrom 34848 1 ide_cd_mod, Live 0xc886d000 + + faust:~/lab-01/modul-oops# objdump -dS --adjust-vma=0xc89d4000 oops.ko + + oops.ko: file format elf32-i386 + + + Disassembly of section .text: + + c89d4000 : + #define OP_READ 0 + #define OP_WRITE 1 + #define OP_OOPS OP_WRITE + + static int my_oops_init (void) + { + c89d4000: 55 push %ebp + #else + #error "Unknown op for oops!" + #endif + + return 0; + } + c89d4001: 31 c0 xor %eax,%eax + #define OP_READ 0 + #define OP_WRITE 1 + #define OP_OOPS OP_WRITE + + static int my_oops_init (void) + { + c89d4003: 89 e5 mov %esp,%ebp + int *a; + + a = (int *) 0x00001234; + #if OP_OOPS == OP_WRITE + *a = 3; + c89d4005: c7 05 34 12 00 00 03 movl $0x3,0x1234 + c89d400c: 00 00 00 + #else + #error "Unknown op for oops!" + #endif + + return 0; + } + c89d400f: 5d pop %ebp + c89d4010: c3 ret + c89d4011: eb 0d jmp c89c3020 + c89d4013: 90 nop + c89d4014: 90 nop + c89d4015: 90 nop + c89d4016: 90 nop + c89d4017: 90 nop + c89d4018: 90 nop + c89d4019: 90 nop + c89d401a: 90 nop + c89d401b: 90 nop + c89d401c: 90 nop + c89d401d: 90 nop + c89d401e: 90 nop + c89d401f: 90 nop + + c89d4020 : + + static void my_oops_exit (void) + { + c89d4020: 55 push %ebp + c89d4021: 89 e5 mov %esp,%ebp + } + c89d4023: 5d pop %ebp + c89d4024: c3 ret + c89d4025: 90 nop + c89d4026: 90 nop + c89d4027: 90 nop + +Note that the instruction that generated the oops (``c89d4005`` identified +earlier) is: + + ``C89d4005: c7 05 34 12 00 00 03 movl $ 0x3,0x1234`` + +That is exactly what was expected - storing value 3 at 0x0001234. + +The :file:`/proc/modules` is used to find the address where a kernel module is +loaded. The :command:`--adjust-vma` option allows you to display instructions +relative to ``0xc89d4000``. The :command:`-l` option displays the number of +each line in the source code interleaved with the assembly language code. + +addr2line +--------- + +A more simplistic way to find the code that generated an oops is to use the +:command:`addr2line` utility: + +.. code-block:: bash + + faust:~/lab-01/modul-oops# addr2line -e oops.o 0x5 + /root/lab-01/modul-oops/oops.c:23 + +Where ``0x5`` is the value of the program counter (``EIP = c89d4005``) that +generated the oops, minus the base address of the module (``0xc89c4000``) +according to :file:`/proc/modules` + +minicom +------- + +:command:`Minicom` (or other equivalent utilities, eg :command:`picocom`, +:command:`screen`) is a utility that can be used to connect and interact with a +serial port. The serial port is the basic method for analyzing kernel messages +or interacting with an embedded system in the development phase. There are two +more common ways to connect: + +* a serial port where the device we are going to use is :file:`/dev/ttyS0` + +* a serial USB port (FTDI) in which case the device we are going to use is + :file:`/dev/ttyUSB`. + +For the virtual machine used in the lab, the device that we need to use is +displayed after the virtual machine starts: + +.. code-block:: bash + + char device redirected to /dev/pts/20 (label virtiocon0) + +Minicom use: + +.. code-block:: bash + + #for connecting via COM1 and using a speed of 115,200 characters per second + minicom -b 115200 -D /dev/ttyS0 + + #For USB serial port connection + minicom -D /dev/ttyUSB0 + + #To connect to the serial port of the virtual machine + minicom -D /dev/pts/20 + +netconsole +---------- + +:command:`Netconsole` is a utility that allows logging of kernel debugging +messages over the network. This is useful when the disk logging system does not +work or when serial ports are not available or when the terminal does not +respond to commands. :command:`Netconsole` comes in the form of a kernel +module. + +To work, it needs the following parameters: + + * port, IP address, and the source interface name of the debug station + * port, MAC address, and IP address of the machine to which the debug + messages will be sent + +These parameters can be configured when the module is inserted into the kernel, +or even while the module is inserted if it has been compiled with the +``CONFIG_NETCONSOLE_DYNAMIC`` option. + +An example configuration when inserting :command:`netconsole` kernel module is +as follows: + +.. code-block:: bash + + alice:~# modprobe netconsole netconsole=6666@192.168.191.130/eth0,6000@192.168.191.1/00:50:56:c0:00:08 + +Thus, the debug messages on the station that has the address +``192.168.191.130`` will be sent to the ``eth0`` interface, having source port +``6666``. The messages will be sent to ``192.168.191.1`` with the MAC address +``00:50:56:c0:00:08``, on port ``6000``. + +Messages can be played on the destination station using :command:`netcat`: + +.. code-block:: bash + + bob:~ # nc -l -p 6000 -u + +Alternatively, the destination station can configure :command:`syslogd` to +intercept these messages. More information can be found in +:file:`Documentation/networking/netconsole.txt`. + +Printk debugging +---------------- + +``The two oldest and most useful debugging aids are Your Brain and Printf``. + +For debugging, a primitive way is often used, but it is quite effective: +:code:`printk` debugging. Although a debugger can also be used, it is generally +not very useful: simple bugs (uninitialized variables, memory management +problems, etc.) can be easily localized by control messages and the +kernel-decoded oop message. + +For more complex bugs, even a debugger can not help us too much unless the +operating system structure is very well understood. When debugging a kernel +module, there are a lot of unknowns in the equation: multiple contexts (we have +multiple processes and threads running at a time), interruptions, virtual +memory, etc. + +You can use :code:`printk` to display kernel messages to user space. It is +similar to :code:`printf`'s functionality; the only difference is that the +transmitted message can be prefixed with a string of :code:`""`, where +:code:`n` indicates the error level (loglevel) and has values between ``0`` and +``7``. Instead of :code:`""`, the levels can also be coded by symbolic +constants: + +.. code-block:: c + + KERN_EMERG - n = 0 + KERN_ALERT - n = 1 + KERN_CRIT - n = 2 + KERN_ERR - n = 3 + KERN_WARNING - n = 4 + KERN_NOTICE - n = 5 + KERN_INFO - n = 6 + KERN_DEBUG - n = 7 + + +The definitions of all log levels are found in :file:`linux/kern_levels.h`. +Basically, these log levels are used by the system to route messages sent to +various outputs: console, log files in :file:`/var/log` etc. + +.. note:: To display :code:`printk` messages in user space, the :code:`printk` + log level must be of higher priority than `console_loglevel` + variable. The default console log level can be configured from + :file:`/proc/sys/kernel/printk`. + + For instance, the command: + + .. code-block:: bash + + echo 8 > /proc/sys/kernel/printk + + will enable all the kernel log messages to be displayed in the + console. That is, the logging level has to be strictly less than the + :code:`console_loglevel` variable. For example, if the + :code:`console_loglevel` has a value of ``5`` (specific to + :code:`KERN_NOTICE`), only messages with loglevel stricter than ``5`` + (i.e :code:`KERN_EMERG`, :code:`KERN_ALERT`, :code:`KERN_CRIT`, + :code:`KERN_ERR`, :code:`KERN_WARNING`) will be shown. + +Console-redirected messages can be useful for quickly viewing the effect of +executing the kernel code, but they are no longer so useful if the kernel +encounters an irreparable error and the system freezes. In this case, the logs +of the system must be consulted, as they keep the information between system +restarts. These are found in :file:`/var/log` and are text files, populated by +:code:`syslogd` and :code:`klogd` during the kernel run. :code:`syslogd` and +:code:`klogd` take the information from the virtual file system mounted in +:file:`/proc`. In principle, with :code:`syslogd` and :code:`klogd` turned on, +all messages coming from the kernel will go to :file:`/var/log/kern.log`. + +A simpler version for debugging is using the :file:`/var/log/debug` file. It +is populated only with the :code:`printk` messages from the kernel with the +:code:`KERN_DEBUG` log level. + +Given that a production kernel (similar to the one we're probably running with) +contains only release code, our module is among the few that send messages +prefixed with KERN_DEBUG . In this way, we can easily navigate through the +:file:`/var/log/debug` information by finding the messages corresponding to a +debugging session for our module. + +Such an example would be the following: + +.. code-block:: bash + + # Clear the debug file of previous information (or possibly a backup) + $ echo "New debug session" > /var/log/debug + # Run the tests + # If there is no critical error causing a panic kernel, check the output + # if a critical error occurs and the machine only responds to a restart, + restart the system and check /var/log/debug. + +The format of the messages must obviously contain all the information of +interest in order to detect the error, but inserting in the code :code:`printk` +to provide detailed information can be as time-consuming as writing the code to +solve the problem. This is usually a trade-off between the completeness of the +debugging messages displayed using :code:`printk` and the time it takes to +insert these messages into the text. + +A very simple way, less time-consuming for inserting :code:`printk` and +providing the possibility to analyze the flow of instructions for tests is the +use of the predefined constants :code:`__FILE__`, :code:`__LINE__` and +:code:`__func__`: + + * ``__FILE__`` is replaced by the compiler with the name of the source file + it is currently being compiled. + + * ``__LINE__`` is replaced by the compiler with the line number on which the + current instruction is found in the current source file. + + * ``__func__`` /``__FUNCTION__`` is replaced by the compiler with the name + of the function in which the current instruction is found. + +.. note:: + :code:`__FILE__` and :code:`__LINE__` are part of the ANSI C specifications: + :code:`__func__` is part of specification C99; :code:`__FUNCTION__` is a GNU + :code:`C` extension and is not portable; However, since we write code for the + :code:`Linux` kernel, we can use it without any problems. + +The following macro definition can be used in this case: + +.. code-block:: c + + #define PRINT_DEBUG \ + printk (KERN_DEBUG "[% s]: FUNC:% s: LINE:% d \ n", __FILE__, + __FUNCTION__, __LINE__) + +Then, at each point where we want to see if it is "reached" in execution, +insert PRINT_DEBUG; This is a simple and quick way, and can yield by carefully +analyzing the output. + +The :command:`dmesg` command is used to view the messages printed with +:code:`printk` but not appearing on the console. + +To delete all previous messages from a log file, run: + +.. code-block:: bash + + cat /dev/null > /var/log/debug + +To delete messages displayed by the :command:`dmesg` command, run: + +.. code-block:: bash + + dmesg -c + + +Dynamic debugging +----------------- + +Dynamic `dyndbg `_ +debugging enables dynamic debugging activation/deactivation. +Unlike :code:`printk`, it offers more advanced :code:`printk` options for the +messages we want to display; it is very useful for complex modules or +troubleshooting subsystems. +This significantly reduces the amount of messages displayed, leaving only +those relevant for the debug context. To enable ``dyndbg``, the kernel must be +compiled with the ``CONFIG_DYNAMIC_DEBUG`` option. Once configured, +:code:`pr_debug()`, :code:`dev_dbg()` and :code:`print_hex_dump_debug()`, +:code:`print_hex_dump_bytes()` can be dynamically enabled per call. + +The :file:`/sys/kernel/debug/dynamic_debug/control` file from the debugfs (where +:file:`/sys/kernel/debug` is the path to which debugfs was mounted) is used to +filter messages or to view existing filters. + +.. code-block:: c + + mount -t debugfs none /debug + +`Debugfs `_ +is a simple file system, used as a kernel-space interface and +user-space interface to configure different debug options. Any debug utility +can create and use its own files /folders in debugfs. + +For example, to display existing filters in ``dyndbg``, you will use: + +.. code-block:: bash + + cat /debug/dynamic_debug/control + +And to enable the debug message from line ``1603`` in the :file:`svcsock.c` file: + +.. code-block:: bash + + echo 'file svcsock.c line 1603 +p' > /debug/dynamic_debug/control + +The :file:`/debug/dynamic_debug/control` file is not a regular file. It shows +the ``dyndbg`` settings on the filters. Writing in it with an echo will change +these settings (it will not actually make a write). Be aware that the file +contains settings for ``dyndbg`` debugging messages. Do not log in this file. + +Dyndbg Options +~~~~~~~~~~~~~~ + +* ``func`` - just the debug messages from the functions that have the same + name as the one defined in the filter. + + .. code-block:: bash + + echo 'func svc_tcp_accept +p' > /debug/dynamic_debug/control + +* ``file`` - the name of the file(s) for which we want to display the debug + messages. It can be just the source name, but also the absolute path or + kernel-tree path. + + .. code-block:: bash + + file svcsock.c + file kernel/freezer.c + file /usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svcsock.c + +* ``module`` - module name. + + .. code-block:: bash + + module sunrpc + +* ``format`` - only messages whose display format contains the specified string. + + .. code-block:: bash + + format "nfsd: SETATTR" + +* ``line`` - the line or lines for which we want to enable debug calls. + + .. code-block:: bash + + # Triggers debug messages between lines 1603 and 1605 in the svcsock.c file + $ echo 'file svcsock.c line 1603-1605 +p' > /sys/kernel/debug/dynamic_debug/control + # Enables debug messages from the beginning of the file to line 1605 + $ echo 'file svcsock.c line -1605 +p' > /sys/kernel/debug/dynamic_debug/control + +In addition to the above options, a series of flags can be added, removed, or set +with operators ``+``, ``-`` or ``=``: + + * ``p`` activates the pr_debug() . + * ``f`` includes the name of the function in the printed message. + * ``l`` includes the line number in the printed message. + * ``m`` includes the module name in the printed message. + * ``t`` includes the thread id if it is not called from interrupt context + * ``_`` no flag is set. + +KDB: Kernel debugger +-------------------- + +The kernel debugger has proven to be very useful to facilitate the development and +debugging process. One of its main advantages is the possibility to perform live debugging. +This allows us to monitor, in real time, the accesses to memory or even modify the memory +while debugging. +The debugger has been integrated in the mainline kernel starting with version 2.6.26-rci. +KDB is not a *source debugger*, but for a complete analysis it can be used in parallel with +gdb and symbol files -- see :ref:`the GDB debugging section ` + +To use KDB, you have the following options: + + * non-usb keyboard + VGA text console + * serial port console + * USB EHCI debug port + +For the lab, we will use a serial interface connected to the host. +The following command will activate GDB over the serial port: + +.. code-block:: bash + + echo hvc0 > /sys/module/kgdboc/parameters/kgdboc + +KDB is a *stop mode debugger*, which means that, while it is active, all the other processes +are stopped. The kernel can be *forced* to enter KDB during execution using the following +`SysRq `__ command + +.. code-block:: bash + + echo g > /proc/sysrq-trigger + +or by using the key combination ``Ctrl+O g`` in a terminal connected to the serial port +(for example using :command:`minicom`). + +KDB has various commands to control and define the context of the debugged system: + + * lsmod, ps, kill, dmesg, env, bt (backtrace) + * dump trace logs + * hardware breakpoints + * modifying memory + +For a better description of the available commands you can use the ``help`` command in +the KDB shell. +In the next example, you can notice a simple KDB usage example which sets a hardware +breakpoint to monitor the changes of the ``mVar`` variable. + +.. code-block:: bash + + # trigger KDB + echo g > /proc/sysrq-trigger + # or if we are connected to the serial port issue + Ctrl-O g + # breakpoint on write access to the mVar variable + kdb> bph mVar dataw + # return from KDB + kdb> go + +Exercises +========= + +.. _exercises_summary: + +.. include:: exercises-summary.hrst +.. |LAB_NAME| replace:: kernel_modules + +0. Intro +-------- + +Using :command:`cscope` or |LXR|_ find the definitions of the following symbols +in the Linux kernel source code: + +* :c:func:`module_init` and :c:func:`module_exit` + + - what do the two macros do? What is ``init_module`` and ``cleanup_module``? + +* :c:data:`ignore_loglevel` + + - What is this variable used for? + +.. warning:: + If you have problems using :command:`cscope`, it is possible that the database + is not generated. To generate it, use the following command in the kernel + directory: + + .. code-block:: bash + + make ARCH=x86 cscope + +.. note:: + When searching for a structure using :command:`cscope`, use only the + structure name (without :code:`struct`). So, to search for the + structure :c:type:`struct module`, you will use the command + + .. code-block:: bash + + vim -t module + + or, in :command:`vim`, the command + + .. code-block:: bash + + :cs f g module + +.. note:: + For more info on using :command:`cscope`, read the + :ref:`cscope section ` in the previous lab. + +1. Kernel module +---------------- + +To work with the kernel modules, we will follow the steps described +:ref:`above `. + +Generate the skeleton for the task named **1-2-test-mod** then build and +copy the module to the VM, by running the following commands in +:file:`tools/labs`. + +.. code-block:: bash + + $ LABS=kernel_modules make skels + $ make build + $ make copy + +These commands will build and copy all the modules in the current +lab skeleton. + +.. warning:: + Until after solving exercise 3, you will get a compilation error for + ``3-error-mod``. To avoid this issue, remove the directory + :file:`skels/kernel_modules/3-error-mod/` and remove the corresponding + line from ``skels/Kbuild``. + +Start the VM using :command:`make boot`, connect to the serial console +using `minicom -D serial.pts` and perform the following tasks: + +* load the kernel module. + +* list the kernel modules and check if current module is present + +* unload the kernel module + +* view the messages displayed at loading/unloading the kernel module using + :command:`dmesg` command + +.. note:: Read `Loading/unloading a kernel module`_ section. When unloading + a kernel module, you can specify only the module name + (without extension). + +2. Printk +--------- + +Watch the virtual machine console. Why were the messages displayed directly +to the virtual machine console? + +Configure the system such that the messages are not displayed directly +on the serial console, and they can only be inspected using ``dmesg``. + +.. hint:: One option is to set the console log level by writting + the desired level to ``/proc/sys/kernel/printk``. + Use a value smaller than the level used for the prints in + the source code of the module. + +Load/unload the module again. +The messages should not be printed to the virtual machine console, +but they should be visible when running ``dmesg``. + +3. Error +-------- + +Generate the skeleton for the task named **3-error-mod**. Compile the +sources and get the corresponding kernel module. + +Why have compilation +errors occurred? **Hint:** How does this module differ from the previous module? + +Modify the module to solve the cause of those errors, then compile and test +the module. + +4. Sub-modules +-------------- + +Inspect the C source files ``mod1.c`` and ``mod2.c`` in :file:`4-multi-mod/`. +Module 2 contains only the definition of a function used by module 1. + +Change the :file:`Kbuild` file to create the ``multi_mod.ko`` module from the +two C source files. + +.. hint:: Read the `Compiling kernel modules`_ section of the lab. + +Compile, copy, boot the VM, load and unload the kernel module. Make sure messages +are properly displayed on the console. + +5. Kernel oops +-------------- + +Enter the directory for the task **5-oops-mod** and inspect the +C source file. Notice where the problem will occur. Add the compilation flag +``-g`` in the Kbuild file. + +.. hint:: Read `Compiling kernel modules`_ section of the lab. + +Compile the corresponding module and load it into the kernel. Identify the memory +address at which the oops appeared. + +.. hint:: Read `Debugging`_ section of the lab. To identify the + address, follow the oops message and extract the value of + the instructions pointer (``EIP``) register. + +Determine which instruction has triggered the oops. + +.. hint:: Use the :file:`proc/modules` information to get the load address of + the kernel module. Use, on the physical machine, objdump + and/or addr2line . Objdump needs debugging support for + compilation! Read the lab's `objdump`_ and `addr2line`_ + sections. + +Try to unload the kernel module. Notice that the operation does not +work because there are references from the kernel module within the +kernel since the oops; Until the release of those references (which is +almost impossible in the case of an oops), the module can not be +unloaded. + +6. Module parameters +-------------------- + +Enter the directory for the task **6-cmd-mod** and inspect the C +``cmd_mod.c`` source file. Compile and copy the associated module and +load the kernel module to see the printk message. Then unload the +module from the kernel. + +Without modifying the sources, load the kernel module so that the +message shown is ``Early bird gets tired``. + +.. hint:: The str variable can be changed by passing a parameter to + the module. Find more information `here + `_. + +.. _proc-info: + +7. Proc info +------------ + +Check the skeleton for the task named **7-list-proc**. Add code to +display the Process ID (``PID``) and the executable name for the current +process. + +Follow the commands marked with ``TODO``. +The information must be displayed both when loading and unloading the +module. + +.. note:: + * In the Linux kernel, a process is described by the + :c:type:`struct task_struct`. Use |LXR|_ or ``cscope`` to find the + definition of :c:type:`struct task_struct`. + + * To find the structure field that contains the name of the + executable, look for the "executable" comment. + + * The pointer to the structure of the current process + running at a given time in the kernel is given by the + :c:macro:`current` variable (of the type + :c:type:`struct task_struct*`). + +.. hint:: To use :c:macro:`current` you'll need to include the header + in which the :c:type:`struct task_struct` is defined, i.e + ``linux/sched.h``. + +Compile, copy, boot the VM and load the module. Unload the kernel module. + +Repeat the loading/unloading operation. Note that the PIDs of the +displayed processes differ. This is because a process is created +from the executable :file:`/sbin/insmod` when the module is loaded and +when the module is unloaded a process is created from the executable +:file:`/sbin/rmmod`. + +Extra Exercises +=============== + +1. KDB +------ + +Go to the **8-kdb** directory. Activate KDB over the serial port and enter KDB +mode using :command:`SysRq`. Connect to the pseudo-terminal linked to virtiocon0 +using :command:`minicom`, configure KDB to use the hvc0 serial port: + +.. code-block:: bash + + echo hvc0 > /sys/module/kgdboc/parameters/kgdboc + +and enable it using SysRq (:command:`Ctrl + O g`). +Review the current system status (:command:`help` to see the available KDB +commands). Continue the kernel execution using the :command:`go` command. + +Load the :file:`hello_kdb` module. +The module will simulate a bug when writing to the :file:`/proc/hello_kdb_bug` +file. To simulate a bug, use the below command: + +.. code-block:: bash + + echo 1 > /proc/hello_kdb_bug + +After running the above command, at every oops/panic the kernel stops the +execution and enters debug mode. + +Analyze the stacktrace and determine the code that generated the bug. +How can we find out from KDB the address where the module was loaded? + +In parallel, use GDB in a new window to view the code based on KDB information. + +.. hint:: + Load the symbol file. Use :command:`info line`. + +When writing to :file:`/proc/hello_kdb_break`, the module will increment the +:c:data:`kdb_write_address` variable. Enter KDB and set a breakpoint for each +write access of the :c:data:`kdb_write_address` variable. +Return to kernel to trigger a write using: + +.. code-block:: bash + + echo 1 > /proc/hello_kdb_break + +2. PS Module +------------ + +Update the created kernel module at :ref:`proc-info` in order to display +information about all the processes in the system, when inserting the kernel +module, not just about the current process. Afterwards, compare the obtained +result with the output of the :command:`ps` command. + +.. hint:: + * Processes in the system are structured in a circular list. + + * :c:macro:`for_each _...` macros (such as :c:macro:`for_each_process`) are + useful when you want to navigate the items in a list. + + * To understand how to use a feature or a macro, use |LXR|_ or Vim and + :command:`cscope` and search for usage scenarios. + +3. Memory Info +-------------- + +Create a kernel module that displays the virtual memory areas of the current +process; for each memory area it will display the start address and the end +address. + +.. hint:: + * Start from an existing kernel module. + + * Investigate the structures :c:type:`struct task_struct`, + :c:type:`struct mm_struct` and :c:type:`struct vm_area_struct`. A + memory area is indicated by a structure of type :c:type:`struct + vm_area_struct`. + + * Don't forget to include the headers where the necessary structures are + defined. + +4. Dynamic Debugging +-------------------- + +Go to the **9-dyndbg** directory and compile the :code:`dyndbg.ko` module. + +Familiarize yourself with the :code:`debugfs` file system mounted in +:file:`/debug` and analyze the contents of the file +:file:`/debug/dynamic_debug/control`. Insert the :code:`dyndbg.ko` module and +notice the new content of the :file:`dynamic_debug/control` file. + +What appears extra in the respective file? Run the following command: + +.. code-block:: bash + + grep dyndbg /debug/dynamic_debug/control + +Configure :command:`dyndbg` so that only messages marked as "Important" in +:c:func:`my_debug_func` function are displayed when the module is unloaded. +The exercise will only filter out the :c:func:`pr_debug` calls; :c:func:`printk` +calls being always displayed. + +Specify two ways to filter. + +.. hint:: + Read the `Dynamic debugging`_ section and look at the :command:`dyndbg` + options (for example, :command:`line`, :command:`format`). + +Perform the filtering and revise the :file:`dynamic_debug/control` file. What +has changed? How do you know which calls are activated? + +.. hint:: + Check the :command:`dyndbg` flags. Unload the kernel module and observe the + log messages. + +5. Dynamic Debugging During Initialization +------------------------------------------ + +As you have noticed, :c:func:`pr_debug` calls can only be activated /filtered +after module insertion. In some situations, it might be helpful to view the +messages from the initialization of the module. This can be done by using a +default (fake) parameter called :command:`dyndbg` that can be passed as an +argument to initialize the module. With this parameter you can add /delete +:command:`dyndbg` flags. + +.. hint:: + Read the last part of the `Dynamic debugging`_ section and see the available + flags (e.g.: :command:`+/- p`). + +Read the `Debug Messages section at Module Initialization Time +`_ +and insert the module so that the messages in :c:func:`my_debug_func` (called +:c:func:`dyndbg_init`) are also displayed during initialization. + +.. warning:: + In the VM from the lab, you will need to use :command:`insmod` instead of + :command:`modprobe`. + +Without unloading the module, deactivate :c:func:`pr_debug` calls. + +.. hint:: + You can delete the set flags. Unload the kernel module. diff --git a/Documentation/teaching/labs/list_evolution.png b/Documentation/teaching/labs/list_evolution.png new file mode 100644 index 00000000000000..aa44396dc6d60b Binary files /dev/null and b/Documentation/teaching/labs/list_evolution.png differ diff --git a/Documentation/teaching/labs/memory_mapping.rst b/Documentation/teaching/labs/memory_mapping.rst new file mode 100644 index 00000000000000..9957e29b7b2be2 --- /dev/null +++ b/Documentation/teaching/labs/memory_mapping.rst @@ -0,0 +1,503 @@ +============== +Memory mapping +============== + +Lab objectives +============== + +* Understand address space mapping mechanisms +* Learn about the most important structures related to memory management + +Keywords: + +* address space +* :c:func:`mmap` +* :c:type:`struct page` +* :c:type:`struct vm_area_struct` +* :c:type:`struct vm_struct` +* :c:type:`remap_pfn_range` +* :c:func:`SetPageReserved` +* :c:func:`ClearPageReserved` + + +Overview +======== + +In the Linux kernel it is possible to map a kernel address space to a +user address space. This eliminates the overhead of copying user space +information into the kernel space and vice versa. This can be done +through a device driver and the user space device interface +(:file:`/dev`). + +This feature can be used by implementing the :c:func:`mmap` operation +in the device driver's :c:type:`struct file_operations` and using the +:c:func:`mmap` system call in user space. + +The basic unit for virtual memory management is a page, which size is +usually 4K, but it can be up to 64K on some platforms. Whenever we +work with virtual memory we work with two types of addresses: virtual +address and physical address. All CPU access (including from kernel +space) uses virtual addresses that are translated by the MMU into +physical addresses with the help of page tables. + +A physical page of memory is identified by the Page Frame Number +(PFN). The PFN can be easily computed from the physical address by +dividing it with the size of the page (or by shifting the physical +address with PAGE_SHIFT bits to the right). + +.. image:: paging.png + :width: 49 % + +For efficiency reasons, the virtual address space is divided into +user space and kernel space. For the same reason, the kernel space +contains a memory mapped zone, called **lowmem**, which is contiguously +mapped in physical memory, starting from the lowest possible physical +address (usually 0). The virtual address where lowmem is mapped is +defined by :c:macro:`PAGE_OFFSET`. + +On a 32bit system, not all available memory can be mapped in lowmem and +because of that there is a separate zone in kernel space called +**highmem** which can be used to arbitrarily map physical memory. + +Memory allocated by :c:func:`kmalloc` resides in lowmem and it is +physically contiguous. Memory allocated by :c:func:`vmalloc` is not +contiguous and does not reside in lowmem (it has a dedicated zone in +highmem). + +.. image:: kernel-virtmem-map.png + :width: 49 % + +Structures used for memory mapping +================================== + +Before discussing the mechanism of memory-mapping a device, we will +present some of the basic structures related to the memory management +subsystem of the Linux kernel. + +Before discussing about the memory mapping mechanism over a device, +we will present some of the basic structures used by the Linux memory +management subsystem. +Some of the basic structures are: :c:type:`struct page`, +:c:type:`struct vm_area_struct`, :c:type:`struct mm_struct`. + +:c:type:`struct page` +--------------------- + +:c:type:`struct page` is used to embed information about all physical +pages in the system. The kernel has a :c:type:`struct page` structure +for all pages in the system. + +There are many functions that interact with this structure: + +* :c:func:`virt_to_page` returns the page associated with a virtual + address +* :c:func:`pfn_to_page` returns the page associated with a page frame + number +* :c:func:`page_to_pfn` return the page frame number associated with a + :c:type:`struct page` +* :c:func:`page_address` returns the virtual address of a + :c:type:`struct page`; this functions can be called only for pages from + lowmem +* :c:func:`kmap` creates a mapping in kernel for an arbitrary physical + page (can be from highmem) and returns a virtual address that can be + used to directly reference the page + +:c:type:`struct vm_area_struct` +------------------------------- + +:c:type:`struct vm_area_struct` holds information about a contiguous +virtual memory area. The memory areas of a process can be viewed by +inspecting the *maps* attribute of the process via procfs: + +.. code-block:: shell + + root@qemux86:~# cat /proc/1/maps + #address perms offset device inode pathname + 08048000-08050000 r-xp 00000000 fe:00 761 /sbin/init.sysvinit + 08050000-08051000 r--p 00007000 fe:00 761 /sbin/init.sysvinit + 08051000-08052000 rw-p 00008000 fe:00 761 /sbin/init.sysvinit + 092e1000-09302000 rw-p 00000000 00:00 0 [heap] + 4480c000-4482e000 r-xp 00000000 fe:00 576 /lib/ld-2.25.so + 4482e000-4482f000 r--p 00021000 fe:00 576 /lib/ld-2.25.so + 4482f000-44830000 rw-p 00022000 fe:00 576 /lib/ld-2.25.so + 44832000-449a9000 r-xp 00000000 fe:00 581 /lib/libc-2.25.so + 449a9000-449ab000 r--p 00176000 fe:00 581 /lib/libc-2.25.so + 449ab000-449ac000 rw-p 00178000 fe:00 581 /lib/libc-2.25.so + 449ac000-449af000 rw-p 00000000 00:00 0 + b7761000-b7763000 rw-p 00000000 00:00 0 + b7763000-b7766000 r--p 00000000 00:00 0 [vvar] + b7766000-b7767000 r-xp 00000000 00:00 0 [vdso] + bfa15000-bfa36000 rw-p 00000000 00:00 0 [stack] + +A memory area is characterized by a start address, a stop address, +length, permissions. + +A :c:type:`struct vm_area_struct` is created at each :c:func:`mmap` +call issued from user space. A driver that supports the :c:func:`mmap` +operation must complete and initialize the associated +:c:type:`struct vm_area_struct`. The most important fields of this +structure are: + +* :c:member:`vm_start`, :c:member:`vm_end` - the beginning and the end of + the memory area, respectively (these fields also appear in + :file:`/proc//maps`); +* :c:member:`vm_file` - the pointer to the associated file structure (if any); +* :c:member:`vm_pgoff` - the offset of the area within the file; +* :c:member:`vm_flags` - a set of flags; +* :c:member:`vm_ops` - a set of working functions for this area +* :c:member:`vm_next`, :c:member:`vm_prev` - the areas of the same process + are chained by a list structure + +:c:type:`struct mm_struct` +-------------------------- + +:c:type:`struct mm_struct` encompasses all memory areas associated +with a process. The :c:member:`mm` field of :c:type:`struct task_struct` +is a pointer to the :c:type:`struct mm_struct` of the current process. + + +Device driver memory mapping +============================ + +Memory mapping is one of the most interesting features of a Unix +system. From a driver's point of view, the memory-mapping facility +allows direct memory access to a user space device. + +To assign a :c:func:`mmap` operation to a driver, the :c:member:`mmap` +field of the device driver's :c:type:`struct file_operations` must be +implemented. If that is the case, the user space process can then use +the :c:func:`mmap` system call on a file descriptor associated with +the device. + +The mmap system call takes the following parameters: + +.. code-block:: c + + void *mmap(caddr_t addr, size_t len, int prot, + int flags, int fd, off_t offset); + +To map memory between a device and user space, the user process must +open the device and issue the :c:func:`mmap` system call with the resulting +file descriptor. + +The device driver :c:func:`mmap` operation has the following signature: + +.. code-block:: c + + int (*mmap)(struct file *filp, struct vm_area_struct *vma); + +The *filp* field is a pointer to a :c:type:`struct file` created when +the device is opened from user space. The *vma* field is used to +indicate the virtual address space where the memory should be mapped +by the device. A driver should allocate memory (using +:c:func:`kmalloc`, :c:func:`vmalloc`, :c:func:`alloc_pages`) and then +map it to the user address space as indicated by the *vma* parameter +using helper functions such as :c:func:`remap_pfn_range`. + +:c:func:`remap_pfn_range` will map a contiguous physical address space +into the virtual space represented by :c:type:`vm_area_struct`: + +.. code-block:: c + + int remap_pfn_range (structure vm_area_struct *vma, unsigned long addr, + unsigned long pfn, unsigned long size, pgprot_t prot); + +:c:func:`remap_pfn_range` expects the following parameters: + +* *vma* - the virtual memory space in which mapping is made; +* *addr* - the virtual address space from where remapping begins; page + tables for the virtual address space between addr and addr + size + will be formed as needed +* *pfn* the page frame number to which the virtual address should be + mapped +* *size* - the size (in bytes) of the memory to be mapped +* *prot* - protection flags for this mapping + +Here is an example of using this function that contiguously maps the +physical memory starting at page frame number *pfn* (memory that was +previously allocated) to the *vma->vm_start* virtual address: + +.. code-block:: c + + struct vm_area_struct *vma; + unsigned long len = vma->vm_end - vma->vm_start; + int ret ; + + ret = remap_pfn_range(vma, vma->vm_start, pfn, len, vma->vm_page_prot); + if (ret < 0) { + pr_err("could not map the address area\n"); + return -EIO; + } + +To obtain the page frame number of the physical memory we must +consider how the memory allocation was performed. For each +:c:func:`kmalloc`, :c:func:`vmalloc`, :c:func:`alloc_pages`, we must +used a different approach. For :c:func:`kmalloc` we can use something +like: + +.. code-block:: c + + static char *kmalloc_area; + + unsigned long pfn = virt_to_phys((void *)kmalloc_area)>>PAGE_SHIFT; + +while for :c:func:`vmalloc`: + +.. code-block:: c + + static char *vmalloc_area; + + unsigned long pfn = vmalloc_to_pfn(vmalloc_area); + +and finally for :c:func:`alloc_pages`: + +.. code-block:: c + + struct page *page; + + unsigned long pfn = page_to_pfn(page); + +.. attention:: Note that memory allocated with :c:func:`vmalloc` is not + physically contiguous so if we want to map a range allocated + with :c:func:`vmalloc`, we have to map each page individually + and compute the physical address for each page. + +Since the pages are mapped to user space, they might be swapped +out. To avoid this we must set the PG_reserved bit on the page. +Enabling is done using :c:func:`SetPageReserved` while reseting it +(which must be done before freeing the memory) is done with +:c:func:`ClearPageReserved`: + +.. code-block:: c + + void alloc_mmap_pages(int npages) + { + int i; + char *mem = kmalloc(PAGE_SIZE * npages); + + if (!mem) + return mem; + + for(i = 0; i < npages * PAGE_SIZE; i += PAGE_SIZE) + SetPageReserved(virt_to_page(((unsigned long)mem) + i)); + + return mem; + } + + void free_mmap_pages(void *mem, int npages) + { + int i; + + for(i = 0; i < npages * PAGE_SIZE; i += PAGE_SIZE) + ClearPageReserved(virt_to_page(((unsigned long)mem) + i)); + + kfree(mem); + } + + +Further reading +=============== + +* `Linux Device Drivers 3rd Edition - Chapter 15. Memory Mapping and DMA `_ +* `Linux Device Driver mmap Skeleton `_ +* `Driver porting: supporting mmap () `_ +* `Device Drivers Concluded `_ +* `mmap `_ + +Exercises +========= + +.. include:: exercises-summary.hrst +.. |LAB_NAME| replace:: memory_mapping + +1. Mapping contiguous physical memory to userspace +-------------------------------------------------- + +Implement a device driver that maps contiguous physical memory +(e.g. obtained via :c:func:`kmalloc`) to userspace. + +Review the `Device driver memory mapping`_ section, generate the +skeleton for the task named **kmmap** and fill in the areas marked +with **TODO 1**. + +Start with allocating a NPAGES+2 memory area page using :c:func:`kmalloc` +in the module init function and find the first address in the area that is +aligned to a page boundary. + +.. hint:: The size of a page is *PAGE_SIZE*. + + Store the allocated area in *kmalloc_ptr* and the page + aligned address in *kmalloc_area*: + + Use :c:func:`PAGE_ALIGN` to determine *kmalloc_area*. + +Enable the PG_reserved bit of each page with +:c:func:`SetPageReserved`. Clear the bit with +:c:func:`ClearPageReserved` before freeing the memory. + +.. hint:: Use :c:func:`virt_to_page` to translate virtual pages into + physical pages, as required by :c:func:`SetPageReserved` + and :c:func:`ClearPageReserved`. + +For verification purpose (using the test below), fill in the first 4 +bytes of each page with the following values: 0xaa, 0xbb, 0xcc, 0xdd. + +Implement the :c:func:`mmap` driver function. + +.. hint:: For mapping, use :c:func:`remap_pfn_range`. The third + argument for :c:func:`remap_pfn_range` is a page frame number (PFN). + + To convert from virtual kernel address to physical address, + use :c:func:`virt_to_phys`. + + To convert a physical address to its PFN, shift the address + with PAGE_SHIFT bits to the right. + +For testing, load the kernel module and run: + +.. code-block:: shell + + root@qemux86:~# skels/memory_mapping/test/mmap-test 1 + +If everything goes well, the test will show "matched" messages. + +2. Mapping non-contiguous physical memory to userspace +------------------------------------------------------ + +Implement a device driver that maps non-contiguous physical memory +(e.g. obtained via :c:func:`vmalloc`) to userspace. + +Review the `Device driver memory mapping`_ section, generate the +skeleton for the task named **vmmap** and fill in the areas marked +with **TODO 1**. + +Allocate a memory area of NPAGES with :c:func:`vmalloc`. + +.. hint:: The size of a page is *PAGE_SIZE*. + Store the allocated area in *vmalloc_area*. + Memory allocated by :c:func:`vmalloc` is paged aligned. + +Enable the PG_reserved bit of each page with +:c:func:`SetPageReserved`. Clear the bit with +:c:func:`ClearPageReserved` before freeing the memory. + +.. hint:: Use :c:func:`vmalloc_to_page` to translate virtual pages + into physical pages used by the functions + :c:func:`SetPageReserved` and :c:func:`ClearPageReserved`. + +For verification purpose (using the test below), fill in the first 4 +bytes of each page with the following values: 0xaa, 0xbb, 0xcc, 0xdd. + +Implement the mmap driver function. + +.. hint:: To convert from virtual vmalloc address to physical address, + use :c:func:`vmalloc_to_pfn` which returns a PFN directly. + +.. attention:: vmalloc pages are not physically contiguous so it is + needed to use :c:func:`remap_pfn_range` for each page. + + Loop through all virtual pages and for each: + * determine the physical address + * map it with :c:func:`remap_fpn_range` + + Make sure the that you determine the physical address + each time and that you use a range of one page for mapping. + +For testing, load the kernel module and run: + +.. code-block:: shell + + root@qemux86:~# skels/memory_mapping/test/mmap-test 1 + +If everything goes well, the test will show "matched" messages. + +3. Read / write operations in mapped memory +------------------------------------------- + +Modify one of the previous modules to allow read / write operations on +your device. This is a didactic exercise to see that the same space +can also be used with the :c:func:`mmap` call and with :c:func:`read` +and :c:func:`write` calls. + +Fill in areas marked with **TODO 2**. + +.. note:: The offset parameter sent to the read / write operation can + be ignored as all reads / writes from the test program will + be done with 0 offsets. + +For testing, load the kernel module and run: + +.. code-block:: shell + + root@qemux86:~# skels/memory_mapping/test/mmap-test 2 + + +4. Display memory mapped in procfs +---------------------------------- + +Using one of the previous modules, create a procfs file in which you +display the total memory mapped by the calling process. + +Fill in the areas marked with **TODO 3**. + +Create a new entry in procfs (:c:macro:`PROC_ENTRY_NAME`, defined in +:file:`mmap-test.h`) that will show the total memory mapped by the process +that called the :c:func:`read` on that file. + +.. hint:: Use :c:func:`proc_create`. For the mode parameter, use 0, + and for the parent parameter use NULL. Use + :c:data:`struct file_operations my_proc_file_ops` for operations. + +In the module exit function, delete the :c:macro:`PROC_ENTRY_NAME` entry +using :c:func:`remove_proc_entry`. + +.. note:: A (complex) use and description of the :c:type:`struct + seq_file` interface can be found here in this `example + `_ . + + For this exercise, just a simple use of the interface + described `here `_ is + sufficient. Check the "extra-simple" API described there. + +In the :c:func:`my_seq_show` function you will need to: + +* Obtain the :c:type:`struct mm_struct` structure of the current process + using the :c:func:`get_task_mm` function. + + .. hint:: The current process is available via the *current* variable + of type :c:type:`struct task_struct*`. + +* Iterate through the entire :c:type:`struct vm_area_struct` list + associated with the process. + + .. hint:: Use the variable :c:data:`vma_iterator` and start from + :c:data:`mm->mmap`. Use the :c:member:`vm_next` field of + the :c:type:`struct vm_area_struct` to navigate through + the list of memory areas. Stop when you reach :c:macro:`NULL`. + +* Use *vm_start* and *vm_end* for each area to compute the total size. + +* Use :c:func:`pr_info("%lx %lx\n, ...)` to print *vm_start* and *vm_end* for + each area. + +* To release :c:type:`struct mm_struct`, decrement the reference + counter of the structure using :c:func:`mmput`. + +* Use :c:func:`seq_printf` to write to the file. Show only the total count, + no other messages. Do not even show newline (\n). + +In :c:func:`my_seq_open` register the display function +(:c:func:`my_seq_show`) using :c:func:`single_open`. + +.. note:: :c:func:`single_open` can use :c:macro:`NULL` as its third argument. + +For testing, load the kernel module and run: + +.. code-block:: shell + + root@qemux86:~# skels/memory_mapping/test/mmap-test 3 + +.. note:: The test waits for a while (it has an internal sleep + instruction). As long as the test waits, use the + :command:`pmap` command in another console to see the + mappings of the test and compare those to the test results. diff --git a/Documentation/teaching/labs/minfs.png b/Documentation/teaching/labs/minfs.png new file mode 100644 index 00000000000000..affd823050fb00 Binary files /dev/null and b/Documentation/teaching/labs/minfs.png differ diff --git a/Documentation/teaching/labs/minfs_arch.png b/Documentation/teaching/labs/minfs_arch.png new file mode 100644 index 00000000000000..6778e6638fd60d Binary files /dev/null and b/Documentation/teaching/labs/minfs_arch.png differ diff --git a/Documentation/teaching/labs/networking.rst b/Documentation/teaching/labs/networking.rst new file mode 100644 index 00000000000000..bee07e835e106b --- /dev/null +++ b/Documentation/teaching/labs/networking.rst @@ -0,0 +1,1259 @@ +============================ +Networking +============================ + +Lab objectives +============== + + * Understanding the Linux kernel networking architecture + * Acquiring practical IP packet management skills using a packet filter or + firewall + * Familiarize yourself with how to use sockets at the Linux kernel level + +Overview +======== + +The development of the Internet has led to an exponential increase in network +applications and, as a consequence, to increasing the speed and productivity +requirements of an operating system's networking subsystem. The networking +subsystem is not an essential component of an operating system kernel (the Linux +kernel can be compiled without networking support). It is, however, quite +unlikely for a computing system (or even an embedded device) to have a +non-networked operating system due to the need for connectivity. Modern operating +systems use the `TCP/IP stack +`_. Their kernel +implements protocols up to the transport layer, while application layer protocols +are tipically implemented in user space (HTTP, FTP, SSH, etc.). + +Networking in user space +------------------------ + +In user space the abstraction of network communication is the socket. The +socket abstracts a communication channel and is the kernel-based TCP/IP stack +interaction interface. An IP socket is associated with an IP address, the +transport layer protocol used (TCP, UDP etc) and a port. Common function calls +that use sockets are: creation (``socket``), initialization +(``bind``), connecting (``connect``), waiting for a connection +(``listen``, ``accept``), closing a socket (``close``). + +Network communication is accomplished via ``read``/``write`` or ``recv``/``send`` calls +for TCP sockets and ``recvfrom``/``sendto`` for UDP sockets. Transmission and +reception operations are transparent to the application, leaving encapsulation +and transmission over network at the kernel's discretion. However, it is +possible to implement the TCP/IP stack in user space using raw sockets (the +``PF_PACKET`` option when creating a socket), or implementing an application +layer protocol in kernel (`TUX web server +`_). + +For more details about user space programming using sockets, see `Bee's Guide to +Network Programming Using Internet +Sockets `_. + +Linux networking +================ + +The Linux kernel provides three basic structures for working with network +packets: :c:type:`struct socket`, :c:type:`struct sock` and :c:type:`struct +sk_buff`. + +The first two are abstractions of a socket: + + * :c:type:`struct socket` is an abstraction very close to user space, ie `BSD + sockets `_ used to program + network applications; + * :c:type:`struct sock` or *INET socket* in Linux terminology is the network + representation of a socket. + +The two structures are related: the :c:type:`struct socket` contains an INET +socket field, and the :c:type:`struct sock` has a BSD socket that holds it. + +The :c:type:`struct sk_buff` structure is the representation of a network packet +and its status. The structure is created when a kernel packet is received, +either from the user space or from the network interface. + +The :c:type:`struct socket` structure +------------------------------------- + +The :c:type:`struct socket` structure is the kernel representation of a BSD +socket, the operations that can be executed on it are similar to those offered +by the kernel (through system calls). Common operations with sockets +(creation, initialization/bind, closing, etc.) result in specific system +calls; they work with the :c:type:`struct socket` structure. + +The :c:type:`struct socket` operations are described in :file:`net/socket.c` and +are independent of the protocol type. The :c:type:`struct socket` structure is thus +a generic interface over particular network operations implementations. +Typically, the names of these operations begin with the ``sock_`` prefix. + +.. _SocketStructOps: + +Operations on the socket structure +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Socket operations are: + +Creation +"""""""" + +Creation is similar to calling the :c:func:`socket` function in user space, but the +:c:type:`struct socket` created will be stored in the ``res`` parameter: + + * ``int sock_create(int family, int type, int protocol, struct socket **res)`` + creates a socket after the :c:func:`socket` system call; + * ``int sock_create_kern(struct net *net, int family, int type, int protocol, + struct socket **res)`` creates a kernel socket; + * ``int sock_create_lite(int family, int type, int protocol, struct socket **res)`` + creates a kernel socket without parameter sanity checks. + +The parameters of these calls are as follows: + + * ``net``, where it is present, used as reference to the network namespace used; + we will usually initialize it with ``init_net``; + * ``family`` represents the family of protocols used in the transfer of + information; they usually begin with the ``PF_`` (Protocol Family) string; + the constants representing the family of protocols used are found in + :file:`linux/socket.h`, of which the most commonly used is ``PF_INET``, for + TCP/IP protocols; + * ``type`` is the type of socket; the constants used for this parameter are + found in :file:`linux/net.h`, of which the most used are ``SOCK_STREAM`` for + a connection based source-to-destination communication and ``SOCK_DGRAM`` + for connectionless communication; + * ``protocol`` represents the protocol used and is closely related to the + ``type`` parameter; the constants used for this parameter are found in + :file:`linux/in.h`, of which the most used are ``IPPROTO_TCP`` for TCP and + ``IPPROTO_UDP`` for UDP. + +To create a TCP socket in kernel space, you must call: + +.. code-block:: c + + struct socket *sock; + int err; + + err = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); + if (err < 0) { + /* handle error */ + } + +and for creating UDP sockets: + +.. code-block:: c + + struct socket *sock; + int err; + + err = sock_create_kern(&init_net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + if (err < 0) { + /* handle error */ + } + +A usage sample is part of the :c:func:`sys_socket` system call handler: + +.. code-block:: c + + SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) + { + int retval; + struct socket *sock; + int flags; + + /* Check the SOCK_* constants for consistency. */ + BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); + BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK); + BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); + BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); + + flags = type & ~SOCK_TYPE_MASK; + if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + return -EINVAL; + type &= SOCK_TYPE_MASK; + + if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) + flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; + + retval = sock_create(family, type, protocol, &sock); + if (retval < 0) + goto out; + + return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); + } + +Closing +""""""" + +Close connection (for sockets using connection) and release associated +resources: + + * ``void sock_release(struct socket *sock)`` calls the ``release`` function in + the ``ops`` field of the socket structure: + +.. code-block:: c + + void sock_release(struct socket *sock) + { + if (sock->ops) { + struct module *owner = sock->ops->owner; + + sock->ops->release(sock); + sock->ops = NULL; + module_put(owner); + } + //... + } + +Sending/receiving messages +"""""""""""""""""""""""""" + +The messages are sent/received using the following functions: + + * ``int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags);`` + * ``int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size, int flags);`` + * ``int sock_sendmsg(struct socket *sock, struct msghdr *msg);`` + * ``int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size);`` + +The message sending/receiving functions will then call the ``sendmsg``/ +``recvmsg`` function in the ``ops`` field of the socket. Functions +containing ``kernel_`` as a prefix are used when the socket is used in the +kernel. + +The parameters are: + + * ``msg``, a :c:type:`struct msghdr` structure, containing the message to be + sent/received. Among the important components of this structure are ``msg_name`` + and ``msg_namelen``, which, for UDP sockets, must be filled in with the address + to which the message is sent (:c:type:`struct sockaddr_in`); + * ``vec``, a :c:type:`struct kvec` structure, containing a pointer to the buffer + containing its data and size; as can be seen, it has a similar structure to the + :c:type:`struct iovec` structure (the :c:type:`struct iovec` structure + corresponds to the user space data, and the :c:type:`struct kvec` structure + corresponds to kernel space data). + +A usage example can be seen in the :c:func:`sys_sendto` system call handler: + +.. code-block:: c + + SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, + unsigned int, flags, struct sockaddr __user *, addr, + int, addr_len) + { + struct socket *sock; + struct sockaddr_storage address; + int err; + struct msghdr msg; + struct iovec iov; + int fput_needed; + + err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter); + if (unlikely(err)) + return err; + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (!sock) + goto out; + + msg.msg_name = NULL; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_namelen = 0; + if (addr) { + err = move_addr_to_kernel(addr, addr_len, &address); + if (err < 0) + goto out_put; + msg.msg_name = (struct sockaddr *)&address; + msg.msg_namelen = addr_len; + } + if (sock->file->f_flags & O_NONBLOCK) + flags |= MSG_DONTWAIT; + msg.msg_flags = flags; + err = sock_sendmsg(sock, &msg); + + out_put: + fput_light(sock->file, fput_needed); + out: + return err; + } + +The :c:type:`struct socket` fields +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c + + /** + * struct socket - general BSD socket + * @state: socket state (%SS_CONNECTED, etc) + * @type: socket type (%SOCK_STREAM, etc) + * @flags: socket flags (%SOCK_NOSPACE, etc) + * @ops: protocol specific socket operations + * @file: File back pointer for gc + * @sk: internal networking protocol agnostic socket representation + * @wq: wait queue for several uses + */ + struct socket { + socket_state state; + + short type; + + unsigned long flags; + + struct socket_wq __rcu *wq; + + struct file *file; + struct sock *sk; + const struct proto_ops *ops; + }; + +The noteworthy fields are: + + * ``ops`` - the structure that stores pointers to protocol-specific functions; + * ``sk`` - The ``INET socket`` associated with it. + +The :c:type:`struct proto_ops` structure +"""""""""""""""""""""""""""""""""""""""" + +The :c:type:`struct proto_ops` structure contains the implementations of the specific +operations implemented (TCP, UDP, etc.); these functions will be called from +generic functions through :c:type:`struct socket` (:c:func:`sock_release`, +:c:func:`sock_sendmsg`, etc.) + +The :c:type:`struct proto_ops` structure therefore contains a number of function +pointers for specific protocol implementations: + +.. code-block:: c + + struct proto_ops { + int family; + struct module *owner; + int (*release) (struct socket *sock); + int (*bind) (struct socket *sock, + struct sockaddr *myaddr, + int sockaddr_len); + int (*connect) (struct socket *sock, + struct sockaddr *vaddr, + int sockaddr_len, int flags); + int (*socketpair)(struct socket *sock1, + struct socket *sock2); + int (*accept) (struct socket *sock, + struct socket *newsock, int flags, bool kern); + int (*getname) (struct socket *sock, + struct sockaddr *addr, + int peer); + //... + } + +The initialization of the ``ops`` field from :c:type:`struct socket` is done in +the :c:func:`__sock_create` function, by calling the :c:func:`create` function, +specific to each protocol; an equivalent call is the implementation of the +:c:func:`__sock_create` function: + +.. code-block:: c + + //... + err = pf->create(net, sock, protocol, kern); + if (err < 0) + goto out_module_put; + //... + +This will instantiate the function pointers with calls specific to the protocol +type associated with the socket. The :c:func:`sock_register` and +:c:func:`sock_unregister` calls are used to fill the ``net_families`` vector. + +For the rest of the socket operations (other than creating, closing, and +sending/receiving a message as described above in the `Operations on the socket +structure`_ section), the functions sent via pointers in this structure will be +called. For example, for ``bind``, which associates a socket with a socket on +the local machine, we will have the following code sequence: + +.. code-block:: c + + #define MY_PORT 60000 + + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = htons (MY_PORT), + .sin_addr = { htonl (INADDR_LOOPBACK) } + }; + + //... + err = sock->ops->bind (sock, (struct sockaddr *) &addr, sizeof(addr)); + if (err < 0) { + /* handle error */ + } + //... + +As you can see, for transmitting the address and port information that +will be associated with the socket, a :c:type:`struct sockaddr_in` is filled. + +The :c:type:`struct sock` structure +----------------------------------- + +The :c:type:`struct sock` describes an ``INET`` socket. Such a structure is +associated with a user space socket and implicitly with a :c:type:`struct +socket` structure. The structure is used to store information about the status +of a connection. The structure's fields and associated operations usually begin +with the ``sk_`` string. Some fields are listed below: + +.. code-block:: c + + struct sock { + //... + unsigned int sk_padding : 1, + sk_no_check_tx : 1, + sk_no_check_rx : 1, + sk_userlocks : 4, + sk_protocol : 8, + sk_type : 16; + //... + struct socket *sk_socket; + //... + struct sk_buff *sk_send_head; + //... + void (*sk_state_change)(struct sock *sk); + void (*sk_data_ready)(struct sock *sk); + void (*sk_write_space)(struct sock *sk); + void (*sk_error_report)(struct sock *sk); + int (*sk_backlog_rcv)(struct sock *sk, + struct sk_buff *skb); + void (*sk_destruct)(struct sock *sk); + }; + +\ + + * ``sk_protocol`` is the type of protocol used by the socket; + * ``sk_type`` is the socket type (``SOCK_STREAM``, ``SOCK_DGRAM``, etc.); + * ``sk_socket`` is the BSD socket that holds it; + * ``sk_send_head`` is the list of :c:type:`struct sk_buff` structures for + transmission; + * the function pointers at the end are callbacks for different situations. + +Initializing the :c:type:`struct sock` and attaching it to a BSD socket is done +using the callback created from ``net_families`` (called +:c:func:`__sock_create`). Here's how to initialize the :c:type:`struct sock` +structure for the IP protocol, in the :c:func:`inet_create` function: + +.. code-block:: c + + /* + * Create an inet socket. + */ + + static int inet_create(struct net *net, struct socket *sock, int protocol, + int kern) + { + + struct sock *sk; + + //... + err = -ENOBUFS; + sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern); + if (!sk) + goto out; + + err = 0; + if (INET_PROTOSW_REUSE & answer_flags) + sk->sk_reuse = SK_CAN_REUSE; + + + //... + sock_init_data(sock, sk); + + sk->sk_destruct = inet_sock_destruct; + sk->sk_protocol = protocol; + sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; + //... + } + +.. _StructSKBuff: + +The :c:type:`struct sk_buff` structure +-------------------------------------- + +The :c:type:`struct sk_buff` (socket buffer) describes a network packet. The +structure fields contain information about both the header and packet contents, +the protocols used, the network device used, and pointers to the other +:c:type:`struct sk_buff`. A summary description of the content of the structure +is presented below: + +.. code-block:: c + + struct sk_buff { + union { + struct { + /* These two members must be first. */ + struct sk_buff *next; + struct sk_buff *prev; + + union { + struct net_device *dev; + /* Some protocols might use this space to store information, + * while device pointer would be NULL. + * UDP receive path is one user. + */ + unsigned long dev_scratch; + }; + }; + + struct rb_node rbnode; /* used in netem & tcp stack */ + }; + struct sock *sk; + + union { + ktime_t tstamp; + u64 skb_mstamp; + }; + + /* + * This is the control buffer. It is free to use for every + * layer. Please put your private variables there. If you + * want to keep them across layers you have to do a skb_clone() + * first. This is owned by whoever has the skb queued ATM. + */ + char cb[48] __aligned(8); + + unsigned long _skb_refdst; + void (*destructor)(struct sk_buff *skb); + union { + struct { + unsigned long _skb_refdst; + void (*destructor)(struct sk_buff *skb); + }; + struct list_head tcp_tsorted_anchor; + }; + /* ... */ + + unsigned int len, + data_len; + __u16 mac_len, + hdr_len; + + /* ... */ + + __be16 protocol; + __u16 transport_header; + __u16 network_header; + __u16 mac_header; + + /* private: */ + __u32 headers_end[0]; + /* public: */ + + /* These elements must be at the end, see alloc_skb() for details. */ + sk_buff_data_t tail; + sk_buff_data_t end; + unsigned char *head, + *data; + unsigned int truesize; + refcount_t users; + }; + +where: + + * ``next`` and ``prev`` are pointers to the next, and previous element in the + buffer list; + * ``dev`` is the device which sends or receives the buffer; + * ``sk`` is the socket associated with the buffer; + * ``destructor`` is the callback that deallocates the buffer; + * ``transport_header``, ``network_header``, and ``mac_header`` are offsets + between the beginning of the packet and the beginning of the various headers + in the packets. They are internally maintained by the various processing + layers through which the packet passes. To get pointers to the headers, use + one of the following functions: :c:func:`tcp_hdr`, :c:func:`udp_hdr`, + :c:func:`ip_hdr`, etc. In principle, each protocol provides a function to + get a reference to the header of that protocol within a received packet. + Keep in mind that the ``network_header`` field is not set until the packet + reaches the network layer and the ``transport_header`` field is not set + until the packet reaches the transport layer. + +The structure of an `IP header `_ +(:c:type:`struct iphdr`) has the following fields: + +.. code-block:: c + + struct iphdr { + #if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 ihl:4, + version:4; + #elif defined (__BIG_ENDIAN_BITFIELD) + __u8 version:4, + ihl:4; + #else + #error "Please fix " + #endif + __u8 tos; + __be16 tot_len; + __be16 id; + __be16 frag_off; + __u8 ttl; + __u8 protocol; + __sum16 check; + __be32 saddr; + __be32 daddr; + /*The options start here. */ + }; + +where: + + * ``protocol`` is the transport layer protocol used; + * ``saddr`` is the source IP address; + * ``daddr`` is the destination IP address. + +The structure of a `TCP header +`_ +(:c:type:`struct tcphdr`) has the following fields: + +.. code-block:: c + + struct tcphdr { + __be16 source; + __be16 dest; + __be32 seq; + __be32 ack_seq; + #if defined(__LITTLE_ENDIAN_BITFIELD) + __u16 res1:4, + doff:4, + fin:1, + syn:1, + rst:1, + psh:1, + ack:1, + urg:1, + ece:1, + cwr:1; + #elif defined(__BIG_ENDIAN_BITFIELD) + __u16 doff:4, + res1:4, + cwr:1, + ece:1, + urg:1, + ack:1, + psh:1, + rst:1, + syn:1, + fin:1; + #else + #error "Adjust your defines" + #endif + __be16 window; + __sum16 check; + __be16 urg_ptr; + }; + +where: + + * ``source`` is the source port; + * ``dest`` is the destination port; + * ``syn``, ``ack``, ``fin`` are the TCP flags used; for a more detailed view, + see this `diagram + `_. + +The structure of a `UDP header +`_ +(:c:type:`struct udphdr`) has the following fields: + +.. code-block:: c + + struct udphdr { + __be16 source; + __be16 dest; + __be16 len; + __sum16 check; + }; + +where: + + * ``source`` is the source port; + * ``dest`` is the destination port. + +An example of accessing the information present in the headers of a network +packet is as follows: + +.. code-block:: c + + struct sk_buff *skb; + + struct iphdr *iph = ip_hdr(skb); /* IP header */ + /* iph->saddr - source IP address */ + /* iph->daddr - destination IP address */ + if (iph->protocol == IPPROTO_TCP) { /* TCP protocol */ + struct tcphdr *tcph = tcp_hdr(skb); /* TCP header */ + /* tcph->source - source TCP port */ + /* tcph->dest - destination TCP port */ + } else if (iph->protocol == IPPROTO_UDP) { /* UDP protocol */ + struct udphdr *udph = udp_hdr(skb); /* UDP header */ + /* udph->source - source UDP port */ + /* udph->dest - destination UDP port */ + } + +.. _Conversions: + +Conversions +=========== + +In different systems, there are several ways of ordering bytes in a word +(`Endianness `_), including: `Big +Endian `_ (the most +significant byte first) and `Little +Endian `_ (the least +significant byte first). Since a network interconnects systems with different +platforms, the Internet has imposed a standard sequence for the storage of +numerical data, called `network byte-order +`_. In +contrast, the byte sequence for the representation of numerical data on the host +computer is called host byte-order. Data received/sent from/to the network is in +the network byte-order format and should be converted between this format and +the host byte-order. + +For converting we use the following macros: + + * ``u16 htons(u16 x)`` converts a 16 bit integer from host byte-order to + network byte-order (host to network short); + * ``u32 htonl(u32 x)`` converts a 32 bit integer from host byte-order to + network byte-order (host to network long); + * ``u16 ntohs(u16 x)`` converts a 16 bit integer from network byte-order to + host byte-order (network to host short); + * ``u32 ntohl(u32 x)`` converts a 32 bit integer from network byte-order to + host byte-order (network to host long). + +.. _netfilter: + +netfilter +========= + +Netfilter is the name of the kernel interface for capturing network packets for +modifying/analyzing them (for filtering, NAT, etc.). `The netfilter +`_ interface is used in user space by `iptables +`_. + +In the Linux kernel, packet capture using netfilter is done by attaching hooks. +Hooks can be specified in different locations in the path followed by a kernel +network packet, as needed. An organization chart with the route followed by a +package and the possible areas for a hook can be found `here +`_. + +The header included when using netfilter is :file:`linux/netfilter.h`. + +A hook is defined through the :c:type:`struct nf_hook_ops` structure: + +.. code-block:: c + + struct nf_hook_ops { + /* User fills in from here down. */ + nf_hookfn *hook; + struct net_device *dev; + void *priv; + u_int8_t pf; + unsigned int hooknum; + /* Hooks are ordered in ascending priority. */ + int priority; + }; + +where: + + * ``pf`` is the package type (``PF_INET``, etc.); + * ``priority`` is the priority; priorities are defined in + :file:`uapi/linux/netfilter_ipv4.h` as follows: + +.. code-block:: c + + enum nf_ip_hook_priorities { + NF_IP_PRI_FIRST = INT_MIN, + NF_IP_PRI_CONNTRACK_DEFRAG = -400, + NF_IP_PRI_RAW = -300, + NF_IP_PRI_SELINUX_FIRST = -225, + NF_IP_PRI_CONNTRACK = -200, + NF_IP_PRI_MANGLE = -150, + NF_IP_PRI_NAT_DST = -100, + NF_IP_PRI_FILTER = 0, + NF_IP_PRI_SECURITY = 50, + NF_IP_PRI_NAT_SRC = 100, + NF_IP_PRI_SELINUX_LAST = 225, + NF_IP_PRI_CONNTRACK_HELPER = 300, + NF_IP_PRI_CONNTRACK_CONFIRM = INT_MAX, + NF_IP_PRI_LAST = INT_MAX, + }; + +\ + + + * ``net_device`` is the device (network interface) on which the capture is + intended; + + + * ``hooknum`` is the type of hook used. When a packet is captured, the + processing mode is defined by the ``hooknum`` and ``hook`` fields. For IP, + hook types are defined in :file:`linux/netfilter.h`: + +.. code-block:: c + + enum nf_inet_hooks { + NF_INET_PRE_ROUTING, + NF_INET_LOCAL_IN, + NF_INET_FORWARD, + NF_INET_LOCAL_OUT, + NF_INET_POST_ROUTING, + NF_INET_NUMHOOKS + }; + +\ + + * ``hook`` is the handler called when capturing a network packet (packet sent + as a :c:type:`struct sk_buff` structure). The ``private`` field is private information + handed to the handler. The capture handler prototype is defined by the + :c:type:`struct nf_hookfn` type: + +.. code-block:: c + + struct nf_hook_state { + unsigned int hook; + u_int8_t pf; + struct net_device *in; + struct net_device *out; + struct sock *sk; + struct net *net; + int (*okfn)(struct net *, struct sock *, struct sk_buff *); + }; + + typedef unsigned int nf_hookfn(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state); + +For the :c:func:`nf_hookfn` capture function, the ``priv`` field is the private +information with which the :c:type:`struct nf_hook_ops` was initialized. ``skb`` +is the pointer to the captured network packet. Based on ``skb`` information, +packet filtering decisions are made. The function's ``state`` parameter is the +status information related to the packet capture, including the input interface, +the output interface, the priority, the hook number. Priority and hook number +are useful for allowing the same function to be called by several hooks. + +A capture handler can return one of the constants ``NF_*``: + +.. code-block:: c + + /* Responses from hook functions. */ + #define NF_DROP 0 + #define NF_ACCEPT 1 + #define NF_STOLEN 2 + #define NF_QUEUE 3 + #define NF_REPEAT 4 + #define NF_STOP 5 + #define NF_MAX_VERDICT NF_STOP + +``NF_DROP`` is used to filter (ignore) a packet, and ``NF_ACCEPT`` is used to +accept a packet and forward it. + +Registering/unregistering a hook is done using the functions defined in +:file:`linux/netfilter.h`: + +.. code-block:: c + + /* Function to register/unregister hook points. */ + int nf_register_net_hook(struct net *net, const struct nf_hook_ops *ops); + void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *ops); + int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg, + unsigned int n); + void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, + unsigned int n); + + +.. attention:: + + There are some restrictions related to the use of header extraction functions + from a :c:type:`struct sk_buff` structure set as a parameter in a netfilter + hook. While the IP header can be obtained each time using :c:func:`ip_hdr`, + the TCP and UDP headers can be obtained with :c:func:`tcp_hdr` and + :c:func:`udp_hdr` only for packages that come from inside the system rather + than the ones that are received from outside the system. In the latter case, + you must manually calculate the header offset in the package: + + .. code-block:: c + + // For TCP packets (iph->protocol == IPPROTO_TCP) + tcph = (struct tcphdr*)((__u32*)iph + iph->ihl); + // For UDP packets (iph->protocol == IPPROTO_UDP) + udph = (struct udphdr*)((__u32*)iph + iph->ihl); + + This code works in all filtering situations, so it's recommended to use it + instead of header access functions. + +A usage example for a netfilter hook is shown below: + +.. code-block:: c + + #include + #include + #include + #include + #include + #include + #include + + static unsigned int my_nf_hookfn(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) + { + /* process packet */ + //... + + return NF_ACCEPT; + } + + static struct nf_hook_ops my_nfho = { + .hook = my_nf_hookfn, + .hooknum = NF_INET_LOCAL_OUT, + .pf = PF_INET, + .priority = NF_IP_PRI_FIRST + }; + + int __init my_hook_init(void) + { + return nf_register_net_hook(&init_net, &my_nfho); + } + + void __exit my_hook_exit(void) + { + nf_unregister_net_hook(&init_net, &my_nfho); + } + + module_init(my_hook_init); + module_exit(my_hook_exit); + +netcat +====== + +When developing applications that include networking code, one of the most +used tools is netcat. Also nicknamed "Swiss-army knife for TCP / IP". It allows: + + * Initiating TCP connections; + * Waiting for a TCP connection; + * Sending and receiving UDP packets; + * Displaying traffic in hexdump format; + * Run a program after establishing a connection (eg, a shell); + * Set special options in sent packages. + +Initiating TCP connections: + +.. code-block:: console + + nc hostname port + +Listening to a TCP port: + +.. code-block:: console + + nc -l -p port + +Sending and receiving UDP packets is done adding the ``-u`` command line option. + +.. note:: + + The command is :command:`nc`; often :command:`netcat` is an alias for this + command. There are other implementations of the netcat command, some of which + have slightly different parameters than the classic implementation. Run + :command:`man nc` or :command:`nc -h` to check how to use it. + +For more information on netcat, check the following `tutorial +`_. + +Further reading +=============== + +#. Understanding Linux Network Internals +#. `Linux IP networking`_ +#. `The TUX Web Server`_ +#. `Beej's Guide to Network Programming Using Internet Sockets`_ +#. `Kernel Korner - Network Programming in the Kernel`_ +#. `Hacking the Linux Kernel Network Stack`_ +#. `The netfilter.org project`_ +#. `A Deep Dive Into Iptables and Netfilter Architecture`_ +#. `Linux Foundation Networking Page`_ + +.. _Linux IP networking: http://www.cs.unh.edu/cnrg/gherrin/ +.. _The TUX Web Server: http://www.stllinux.org/meeting_notes/2001/0719/myTUX/ +.. _Beej's Guide to Network Programming Using Internet Sockets: http://beej.us/net2/bgnet.html +.. _Kernel Korner - Network Programming in the Kernel: http://www.linuxjournal.com/article/7660 +.. _Hacking the Linux Kernel Network Stack: http://phrack.org/issues/61/13.html +.. _The netfilter.org project: http://www.netfilter.org/ +.. _A Deep Dive Into Iptables and Netfilter Architecture: https://www.digitalocean.com/community/tutorials/a-deep-dive-into-iptables-and-netfilter-architecture +.. _Linux Foundation Networking Page: http://www.linuxfoundation.org/en/Net:Main_Page + +Exercises +========= + +.. include:: exercises-summary.hrst +.. |LAB_NAME| replace:: networking + +.. important:: + + You need to make sure that the ``netfilter`` support is active in kernel. It + is enabled via ``CONFIG_NETFILTER``. To activate it, run :command:`make menuconfig` in + the :file:`linux` directory and check the ``Network packet filtering framework + (Netfilter)`` option in ``Networking support -> Networking options``. If it + was not enabled, enable it (as builtin, not external module - it must be + marked with ``*``). + + +1. Displaying packets in kernel space +------------------------------------- + +Write a kernel module that displays the source address and port for TCP packets +that initiate an outbound connection. Start from the code in +:file:`1-2-netfilter` and fill in the areas marked with ``TODO 1``, taking into +account the comments below. + +You will need to register a netfilter hook of type ``NF_INET_LOCAL_OUT`` as explained +in the `netfilter`_ section. + +`The struct sk_buff structure`_ lets you access the packet headers using +specific functions. The :c:func:`ip_hdr` function returns the IP header as a +pointer to a :c:type:`struct iphdr` structure. The :c:func:`tcp_hdr` function +returns the TCP header as a pointer to a :c:type:`struct tcphdr` structure. + +The `diagram`_ explains how to make a TCP connection. The connection initiation +packet has the ``SYN`` flag set in the TCP header and the ``ACK`` flag cleared. + +.. note:: + + To display the source IP address, use the ``%pI4`` format of the printk + function. Details can be found in the `kernel documentation + `_ (``IPv4 + addresses`` section). The following is an example code snippet that uses + ``%pI4``: + + .. code-block:: c + + printk("IP address is %pI4\n", &iph->saddr); + + When using the ``%pI4`` format, the argument to printk is a pointer. Hence the + construction ``&iph->saddr`` (with operator & - ampersand) instead of + ``iph->saddr``. + +The source TCP port is, in the TCP header, in the `network byte-order`_ format. +Read through the :ref:`Conversions` section. Use :c:func:`ntohs` to convert. + +For testing, use the :file:`1-2-netfilter/user/test-1.sh` file. The test creates +a connection to the localhost, a connection that will be intercepted and +displayed by the kernel module. The script is copied on the virtual machine by +the :command:`make copy` command only if it is marked as executable. The script +uses the statically compiled :command:`netcat` tool stored in +:file:`skels/networking/netcat`; this program must have execution +permissions. + +After running the checker the output should be similar to the one bellow: + +.. code-block:: c + + # ./test-1.sh + [ 229.783512] TCP connection initiated from 127.0.0.1:44716 + Should show up in filter. + Check dmesg output. + +2. Filtering by destination address +----------------------------------- + +Extend the module from exercise 1 so that you can specify a destination address +by means of a ``MY_IOCTL_FILTER_ADDRESS`` ioctl call. You'll only show packages +containing the specified destination address. To solve this task, fill in the +areas marked with ``TODO 2`` and follow the specifications below. + +To implement the ioctl routine, you must fill out the ``my_ioctl`` function. +Review the section in :ref:`ioctl`. The address sent from user space is in +`network byte-order`_, so there will be **NO need** for conversion. + +.. note:: + + The IP address sent via ``ioctl`` is sent by address, not by value. The + address must be stored in the ``ioctl_set_addr`` variable. For copying use + :c:func:`copy_from_user`. + +To compare the addresses, fill out the ``test_daddr`` function. Addresses in +network byte-order will be used without having to convert addresses (if they +are equal from left to right they will be equal if reversed too). + +The ``test_daddr`` function must be called from the netfilter hook to display +the connection initialization packets for which the destination address is the +one sent through the ioctl routine. The connection initiation packet has the +``SYN`` flag set in the TCP header and the ``ACK`` flag cleared. You have to +check two things: + + * the TCP flags; + * the destination address of the packet (using ``test_addr``). + +For testing, use the :file:`1-2-netfilter/user/test-2.sh` script. This script +needs to compile the :file:`1-2-netfilter/user/test.c` file in the test +executable. Compilation is done automatically on the physical system when +running the :command:`make build` command. The test script is copied to the +virtual machine only if it is marked as executable. The script uses the +statically compiled :command:`netcat` tool in :file:`skels/networking/netcat`; +this executable must have execution permissions. + +After running the checker the output should be similar to the one bellow: + +.. code-block:: console + + # ./test-2.sh + [ 797.673535] TCP connection initiated from 127.0.0.1:44721 + Should show up in filter. + Should NOT show up in filter. + Check dmesg output. + +The test ask for packet filtering first for the ``127.0.0.1`` IP address and +then for the ``127.0.0.2`` IP address. The first connection initiation packet +(to ``127.0.0.1``) is intercepted and displayed by the filter, while the second +(to ``127.0.0.2``) is not intercepted. + +3. Listening on a TCP socket +---------------------------- + +Write a kernel module that creates a TCP socket that listens to connections on +port ``60000`` on the loopback interface (in ``init_module``). Start from the +code in :file:`3-4-tcp-sock` fill in the areas marked with ``TODO 1`` taking +into account the observations below. + +Read the `Operations on the socket structure`_ and `The struct proto_ops +structure`_ sections. + +The ``sock`` socket is a ``server socket`` and must be put in the listening +state. That is, the ``bind`` and ``listen`` operations must be applied to the +socket. For the ``bind`` and ``listen`` equivalent, in kernel space you will +need to call ``sock->ops->...;`` examples of such functions you can call are +``sock->ops->bind``, ``sock->ops->listen`` etc. + +.. note:: + + For example, call ``sock->ops->bind``, or ``sock->ops->listen`` functions, see + how they are called in the :c:func:`sys_bind` and :c:func:`sys_listen` system + call handlers. + + Look for the system call handlers in the ``net/socket.c`` file in the Linux + kernel source code tree. + +.. note:: + + For the second argument of the ``listen`` (backlog) call, use the + ``LISTEN_BACKLOG``. + +Remember to release the socket in the module's exit function and in the area +marked with error labels; use :c:func:`sock_release`. + +For testing, run the :command:`3-4-tcp_sock/test-3.sh` script. The script is +copied on the virtual machine by :command:`make copy` only if it is marked as +executable. + +After running the test, a TCP socket will be displayed by listening to +connections on port ``60000``. + +4. Accepting connections in kernel space +---------------------------------------- + +Expand the module from the previous exercise to allow an external connection (no +need to send any message, only accept new connections). Fill in the areas marked +with ``TODO 2``. + +Read the `Operations on the socket structure`_ and `The struct proto_ops +structure`_ sections. + +For the kernel space ``accept`` equivalent, see the system call handler for +:c:func:`sys_accept4`. Follow the :c:func:`lnet_sock_accept` implementation, and +how the ``sock->ops->accept`` call is used. Use ``0`` as the value for the +second to last argument (``flags``), and ``true`` for the last argument +(``kern``). + +.. note:: + + Look for the system call handlers in the ``net/socket.c`` file in the Linux + kernel source code tree. + +.. note:: + + The new socket (``new_sock``) must be created with the + :c:func:`sock_create_lite` function and then its operations must be configured + using + + .. code-block:: console + + newsock->ops = sock->ops; + +Print the address and port of the destination socket. To find the peer name of a +socket (its address), refer to the :c:func:`sys_getpeername` system call handler. + +.. note:: + + The first argument for the ``sock->ops->getname`` function will be the + connection socket, ie ``new_sock``, the one initialized with by the ``accept`` + call. + + The last argument of the ``sock->ops->getname`` function will be ``1``, + meaning that we want to know about the endpoint or the peer (*remote end* or + *peer*). + + Display the peer address (indicated by the ``raddr`` variable) using the + ``print_sock_address`` macro defined in the file. + +Release the newly created socket (after accepting the connection) in the module +exit function and after the error label. After adding the ``accept`` code to the +module initialization function, the :command:`insmod` operation will lock until +a connection is established. You can unlock using :command:`netcat` on that +port. Consequently, the test script from the previous exercise will not work. + +For testing, run the :file:`3-4-tcp_sock/test-4.sh` script. The script is copied on +the virtual machine by :command:`make copy` only if it is marked as executable. + +Nothing special will be displayed (in the kernel buffer). The success of the +test will be defined by the connection establishment. Then use ``Ctrl+c`` to +stop the test script, and then you can remove the kernel module. + +5. UDP socket sender +-------------------- + +Write a kernel module that creates a UDP socket and sends the message from the +``MY_TEST_MESSAGE`` macro on the socket to the loopback address on port +``60001``. + +Start from the code in :file:`5-udp-sock`. + +Read the `Operations on the socket structure`_ and `The struct proto_ops +structure`_ sections. + +To see how to send messages in the kernel space, see the :c:func:`sys_send` +system call handler or `Sending/receiving messages`_. + +.. hint:: + + The ``msg_name`` field of the :c:type:`struct msghdr` structure must be + initialized to the destination address (pointer to :c:type:`struct sockaddr`) + and the ``msg_namelen`` field to the address size. + + Initialize the ``msg_flags`` field of the :c:type:`struct msghdr` structure + to ``0``. + + Initialize the ``msg_control`` and ``msg_controllen`` fields of the + :c:type:`struct msghdr` structure to ``NULL`` and ``0`` respectively. + +For sending the message use :c:func:`kernel_sendmsg`. + +The message transmission parameters are retrieved from the kernel space. Cast +the :c:type:`struct iovec` structure pointer to a :c:type:`struct kvec` pointer +in the :c:func:`kernel_sendmsg` call. + +.. hint:: + + The last two parameters of :c:func:`kernel_sendmsg` are ``1`` (number of I/O + vectors) and ``len`` (message size). + +For testing, use the :file:`test-5.sh` file. The script is copied on the virtual +machine by the :command:`make copy` command only if it is marked as executable. +The script uses the statically compiled ``netcat`` tool stored in +:file:`skels/networking/netcat`; this executable must have execution +permissions. + +For a correct implementation, running the :file:`test-5.sh` script will cause +the ``kernelsocket`` message to be displayed like in the output below: + +.. code-block:: console + + /root # ./test-5.sh + + pid=1059 + + sleep 1 + + nc -l -u -p 60001 + + insmod udp_sock.ko + kernelsocket + + rmmod udp_sock + + kill 1059 diff --git a/Documentation/teaching/labs/paging.png b/Documentation/teaching/labs/paging.png new file mode 100644 index 00000000000000..53f7fb18c41352 Binary files /dev/null and b/Documentation/teaching/labs/paging.png differ diff --git a/Documentation/teaching/labs/read.png b/Documentation/teaching/labs/read.png new file mode 100644 index 00000000000000..4502fb42271ab6 Binary files /dev/null and b/Documentation/teaching/labs/read.png differ diff --git a/Documentation/teaching/labs/read2.png b/Documentation/teaching/labs/read2.png new file mode 100644 index 00000000000000..6f04b13ee8ae0d Binary files /dev/null and b/Documentation/teaching/labs/read2.png differ diff --git a/Documentation/teaching/labs/write.png b/Documentation/teaching/labs/write.png new file mode 100644 index 00000000000000..d87abc99e511b4 Binary files /dev/null and b/Documentation/teaching/labs/write.png differ diff --git a/Documentation/teaching/labs/write2.png b/Documentation/teaching/labs/write2.png new file mode 100644 index 00000000000000..e533a36a515b96 Binary files /dev/null and b/Documentation/teaching/labs/write2.png differ diff --git a/Documentation/teaching/lectures/debugging.rst b/Documentation/teaching/lectures/debugging.rst new file mode 100644 index 00000000000000..1cf29e38674297 --- /dev/null +++ b/Documentation/teaching/lectures/debugging.rst @@ -0,0 +1,942 @@ +========= +Debugging +========= + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +Lecture objectives: +=================== + +One essential part of Linux kernel development is debugging. In user space we had +the support of the kernel so we could easily stop processes and use gdb to inspect +their behavior. In the kernel, in order to use gdb we need to use hypervisor like +QEMU or JTAG based hardware interfaces which are not always available. The Linux +kernel provides a set of tools and debug options useful for investigating abnormal +behavior. + +In this lecture we will learn about: + +.. slide:: Debugging + :inline-contents: True + :level: 2 + + * decoding an oops/panic + * list debugging + * memory debugging + * locking debugging + * profiling + +Decoding an oops/panic +====================== + +An oops is an inconsistent state that the kernel detects inside itself. +Upon detecting an oops the Linux kernel kills the offending process, +prints information that can help debug the problem and continues execution +but with limited reliability. + +Lets consider the following Linux kernel module: + +.. slide:: Oops module + :inline-contents: True + :level: 2 + + .. code-block:: c + + static noinline void do_oops(void) + { + *(int*)0x42 = 'a'; + } + + static int so2_oops_init(void) + { + pr_info("oops_init\n"); + do_oops(); + + return 0; + } + + static void so2_oops_exit(void) + { + pr_info("oops exit\n"); + } + + module_init(so2_oops_init); + module_exit(so2_oops_exit); + +Notice that ''do_oops'' function tries to write at an invalid memory address. Because the kernel +cannot find a suitable physical page were to write, it kills the insmod task in the context of +which ''do_oops'' runs. Then it prints the following oops message: + + .. code-block:: bash + + root@qemux86:~/skels/debugging/oops# insmod oops.ko + BUG: unable to handle kernel NULL pointer dereference at 00000042 + IP: do_oops+0x8/0x10 [oops] + *pde = 00000000 + Oops: 0002 [#1] SMP + Modules linked in: oops(O+) + CPU: 0 PID: 234 Comm: insmod Tainted: G O 4.15.0+ #3 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 + EIP: do_oops+0x8/0x10 [oops] + EFLAGS: 00000292 CPU: 0 + EAX: 00000061 EBX: 00000000 ECX: c7ed3584 EDX: c7ece8dc + ESI: c716c908 EDI: c8816010 EBP: c7257df0 ESP: c7257df0 + DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 + CR0: 80050033 CR2: 00000042 CR3: 0785f000 CR4: 00000690 + Call Trace: + so2_oops_init+0x17/0x20 [oops] + do_one_initcall+0x37/0x170 + ? cache_alloc_debugcheck_after.isra.19+0x15f/0x2f0 + ? __might_sleep+0x32/0x90 + ? trace_hardirqs_on_caller+0x11c/0x1a0 + ? do_init_module+0x17/0x1c2 + ? kmem_cache_alloc+0xa4/0x1e0 + ? do_init_module+0x17/0x1c2 + do_init_module+0x46/0x1c2 + load_module+0x1f45/0x2380 + SyS_init_module+0xe5/0x100 + do_int80_syscall_32+0x61/0x190 + entry_INT80_32+0x2f/0x2f + EIP: 0x44902cc2 + EFLAGS: 00000206 CPU: 0 + EAX: ffffffda EBX: 08afb050 ECX: 0000eef4 EDX: 08afb008 + ESI: 00000000 EDI: bf914dbc EBP: 00000000 ESP: bf914c1c + DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b + Code: 42 00 00 00 5d c3 90 55 89 e5 83 ec 04 c7 04 24 24 70 81 c8 e8 + EIP: do_oops+0x8/0x10 [oops] SS:ESP: 0068:c7257df0 + CR2: 0000000000000042 + ---[ end trace 011848be72f8bb42 ]--- + Killed + +An oops contains information about the IP which caused the fault, register status, process, +CPU on which the fault happend like below: + +.. slide:: Oops information + :inline-contents: True + :level: 2 + + .. code-block:: bash + + root@qemux86:~/skels/debugging/oops# insmod oops.ko + BUG: unable to handle kernel NULL pointer dereference at 00000042 + IP: do_oops+0x8/0x10 [oops] + *pde = 00000000 + Oops: 0002 [#1] SMP + Modules linked in: oops(O+) + CPU: 0 PID: 234 Comm: insmod Tainted: G O 4.15.0+ #3 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 + EIP: do_oops+0x8/0x10 [oops] + CR0: 80050033 CR2: 00000042 CR3: 0785f000 CR4: 00000690 + EIP: 0x44902cc2 + EFLAGS: 00000206 CPU: 0 + EAX: ffffffda EBX: 08afb050 ECX: 0000eef4 EDX: 08afb008 + ESI: 00000000 EDI: bf914dbc EBP: 00000000 ESP: bf914c1c + DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b + Code: 42 00 00 00 5d c3 90 55 89 e5 83 ec 04 c7 04 24 24 70 81 c8 e8 + Killed + +Another important thing that an oops can provide is the stack trace of functions called before +the fault happend: + +.. slide:: Oops stacktrace + :inline-contents: True + :level: 2 + + + .. code-block:: bash + + root@qemux86:~/skels/debugging/oops# insmod oops.ko + BUG: unable to handle kernel NULL pointer dereference at 00000042 + Call Trace: + so2_oops_init+0x17/0x20 [oops] + do_one_initcall+0x37/0x170 + ? cache_alloc_debugcheck_after.isra.19+0x15f/0x2f0 + ? __might_sleep+0x32/0x90 + ? trace_hardirqs_on_caller+0x11c/0x1a0 + ? do_init_module+0x17/0x1c2 + ? kmem_cache_alloc+0xa4/0x1e0 + ? do_init_module+0x17/0x1c2 + do_init_module+0x46/0x1c2 + load_module+0x1f45/0x2380 + SyS_init_module+0xe5/0x100 + do_int80_syscall_32+0x61/0x190 + entry_INT80_32+0x2f/0x2f + Killed + +Decoding an oops +---------------- + +.. slide:: Debugging + :inline-contents: True + :level: 2 + + * CONFIG_DEBUG_INFO + * addr2line + * gdb + * objdump -dSr + +addr2line +--------- + +*addr2line* translates addresses into file names and line numbers. Given +an address in an executable it uses the debugging information to figure out +which file name and line number are associated with it. + +Modules are loaded at dynamic addresses but are compiled starting with 0 as +a base address. So, in order to find the line number for a given dynamic address +we need to know module's load address. + +.. slide:: addr2line + :inline-contents: True + :level: 2 + + .. code-block:: bash + + $ addr2line -e oops.o 0x08 + $ skels/debugging/oops/oops.c:5 + $ # 0x08 is the offset of the offending instruction inside the oops.ko module + +objdump +------- + +Similar we can determine the offending line using objdump: + +.. slide:: objdump + :inline-contents: True + :level: 2 + + .. code-block:: bash + + $ cat /proc/modules + oops 20480 1 - Loading 0xc8816000 (O+) + + $ objdump -dS --adjust-vma=0xc8816000 oops.ko + c8816000: b8 61 00 00 00 mov $0x61,%eax + + static noinline void do_oops(void) + { + c8816005: 55 push %ebp + c8816006: 89 e5 mov %esp,%ebp + *(int*)0x42 = 'a'; + c8816008: a3 42 00 00 00 mov %eax,0x42 + +gdb +--- + +.. slide:: gdb + :inline-contents: True + :level: 2 + + .. code-block:: bash + + $ gdb ./vmlinux + + (gdb) list *(do_panic+0x8) + 0xc1244138 is in do_panic (lib/test_panic.c:8). + 3 + 4 static struct timer_list panic_timer; + 5 + 6 static void do_panic(struct timer_list *unused) + 7 { + 8 *(int*)0x42 = 'a'; + 9 } + 10 + 11 static int so2_panic_init(void) + +Kernel panic +------------ + +A kernel panic is a special type of oops where the kernel cannot continue execution. For example +if the function do_oops from above was called in the interrupt context, the kernel wouldn't know how to kill +and it will decide that it is better to crash the kernel and stop execution. + +Here is a sample code that will generate a kernel panic: + +.. slide:: Kernel panic + :inline-contents: True + :level: 2 + + .. code-block:: c + + static struct timer_list panic_timer; + + static void do_panic(struct timer_list *unused) + { + *(int*)0x42 = 'a'; + } + + static int so2_panic_init(void) + { + pr_info("panic_init\n"); + + timer_setup(&panic_timer, do_panic, 0); + mod_timer(&panic_timer, jiffies + 2 * HZ); + + return 0; + } + +Loading the module will generate the following kernel panic message: + +.. code-block:: bash + + root@qemux86:~/skels/debugging/panic# insmod panic.ko + panic: loading out-of-tree module taints kernel. + panic_init + root@qemux86:~/skels/debugging/panic# BUG: unable to handle kernel NULL pointer dereference at 00000042 + IP: do_panic+0x8/0x10 [panic] + *pde = 00000000 + Oops: 0002 [#1] SMP + Modules linked in: panic(O) + CPU: 0 PID: 0 Comm: swapper/0 Tainted: G O 4.15.0+ #19 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 + EIP: do_panic+0x8/0x10 [panic] + EFLAGS: 00010246 CPU: 0 + EAX: 00000061 EBX: 00000101 ECX: 000002d8 EDX: 00000000 + ESI: c8817000 EDI: c8819200 EBP: c780ff34 ESP: c780ff34 + DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 + CR0: 80050033 CR2: 00000042 CR3: 0716b000 CR4: 00000690 + Call Trace: + + call_timer_fn+0x63/0xf0 + ? process_timeout+0x10/0x10 + run_timer_softirq+0x14f/0x170 + ? 0xc8817000 + ? trace_hardirqs_on_caller+0x9b/0x1a0 + __do_softirq+0xde/0x1f2 + ? __irqentry_text_end+0x6/0x6 + do_softirq_own_stack+0x57/0x70 + + irq_exit+0x7d/0x90 + smp_apic_timer_interrupt+0x4f/0x90 + ? trace_hardirqs_off_thunk+0xc/0x1d + apic_timer_interrupt+0x3a/0x40 + EIP: default_idle+0xa/0x10 + EFLAGS: 00000246 CPU: 0 + EAX: c15c97c0 EBX: 00000000 ECX: 00000000 EDX: 00000001 + ESI: 00000000 EDI: 00000000 EBP: c15c3f48 ESP: c15c3f48 + DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 + arch_cpu_idle+0x9/0x10 + default_idle_call+0x19/0x30 + do_idle+0x105/0x180 + cpu_startup_entry+0x25/0x30 + rest_init+0x1e3/0x1f0 + start_kernel+0x305/0x30a + i386_start_kernel+0x95/0x99 + startup_32_smp+0x15f/0x164 + Code: 42 00 00 00 5d c3 90 55 89 e5 83 ec 08 c7 04 24 24 80 81 c8 e8 + EIP: do_panic+0x8/0x10 [panic] SS:ESP: 0068:c780ff34 + CR2: 0000000000000042 + ---[ end trace 77f49f83f2e42f91 ]--- + Kernel panic - not syncing: Fatal exception in interrupt + Kernel Offset: disabled + ---[ end Kernel panic - not syncing: Fatal exception in interrupt + + +List debugging +============== + +In order to catch access to uninitialized elements the kernel uses poison +magic values. + +.. slide:: List debugging + :inline-contents: True + :level: 2 + + .. code-block:: bash + + static inline void list_del(struct list_head *entry) + { + __list_del(entry->prev, entry->next); + entry->next = (struct list_head*)LIST_POISON1; + entry->prev = (struct list_head*)LIST_POISON2; + } + + BUG: unable to handle kernel NULL pointer dereference at 00000100 + IP: crush+0x80/0xb0 [list] + +Memory debugging +================ + +There are several tools for memory debugging: + +.. slide:: Memory debugging + :inline-contents: True + :level: 2 + + * SLAB/SLUB debugging + * KASAN + * kmemcheck + * DEBUG_PAGEALLOC + +Slab debugging +--------------- + +Slab debugging uses a memory poison technique to detect several types of memory +bugs in the SLAB/SUB allocators. + +The allocated buffers are guarded with memory that has been filled in with +special markers. Any adjacent writes to the buffer will be detected at a later +time when other memory management operations on that buffer are performed +(e.g. when the buffer is freed). + +Upon allocation of the buffer, the buffer it is also filled in with a special +value to potentially detect buffer access before initialization (e.g. if the +buffer holds pointers). The value is selected in such a way that it is unlikely +to be a valid address and as such to trigger kernel bugs at the access time. + +A similar technique is used when freeing the buffer: the buffer is filled with +another special value that will cause kernel bugs if pointers are accessed after +the memory is freed. In this case, the allocator also checks the next time the +buffer is allocated that the buffer was not modified. + +The diagram bellow shows a summary of the way SLAB/SLUB poisoning works: + + +.. slide:: Slab debugging + :inline-contents: True + :level: 2 + + * CONFIG_DEBUG_SLAB + * poisoned based memory debuggers + + .. ditaa:: + +--------------+-----------------------+--------------+ + | cF88 | c8F8 | cF88 | + | Buffer | Allocated buffer | Buffer | + | Underflow | 0x5a5a5a5a | Overflow | + | Poison | 0x5a5a5a5a | Poison | + | | 0x5a5a5a5a | | + +--------------+-----------------------+--------------+ + + +--------------+-----------------------+--------------+ + | cF88 | c888 | cF88 | + | Buffer | Freed buffer | Buffer | + | Underflow | 0x6b6b6b6b | Overflow | + | Poison | 0x6b6b6b6b | Poison | + | | 0x6b6b6b6b | | + +--------------+-----------------------+--------------+ + + +Example of an use before initialize bug: + +.. slide:: Use before initialize bugs + :inline-contents: True + :level: 2 + + :: + + BUG: unable to handle kernel paging request at 5a5a5a5a + IP: [] __list_del_entry+0x37/0x71 + … + Call Trace: + [] list_del+0xb/0x1b + [] use_before_init+0x31/0x38 [crusher] + [] crush_it+0x38/0xa9 [crusher] + [] init_module+0x8/0xa [crusher] + [] do_one_initcall+0x72/0x119 + [] ? crush_it+0xa9/0xa9 [crusher] + [] sys_init_module+0xc8d/0xe77 + [] syscall_call+0x7/0xb + + .. code-block:: c + + noinline void use_before_init(void) + { + struct list_m *m = kmalloc(sizeof(*m), GFP_KERNEL); + + printk("%s\n", __func__); + list_del(&m->lh); + } + +Example of an use after free bug: + +.. slide:: Use after free bug + :inline-contents: True + :level: 2 + + :: + + BUG: unable to handle kernel paging request at 6b6b6b6b + IP: [] __list_del_entry+0x37/0x71 + … + Call Trace: + [] list_del+0xb/0x1b + [] use_after_free+0x38/0x3f [crusher] + [] crush_it+0x52/0xa9 [crusher] + [] init_module+0x8/0xa [crusher] + [] do_one_initcall+0x72/0x119 + [] ? crush_it+0xa9/0xa9 [crusher] + [] sys_init_module+0xc8d/0xe77 + [] syscall_call+0x7/0xb + + .. code-block:: c + + noinline void use_after_free(void) + { + struct list_m *m = kmalloc(sizeof(*m), GFP_KERNEL); + + printk("%s\n", __func__); + kfree(m); + list_del(&m->lh); + } + +Another example of an use after free bug is shown below. Note that this time the +bug is detected at the next allocation. + +.. slide:: Use after free bug + :inline-contents: True + :level: 2 + + :: + + # insmod /system/lib/modules/crusher.ko test=use_before_init + Slab corruption: size-4096 start=ed612000, len=4096 + 000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 6b 6b + + .. code-block:: c + + noinline void use_after_free2(void) + { + char *b = kmalloc(3000, GFP_KERNEL); + kfree(b); + memset(b, 0, 30); + b = kmalloc(3000, GFP_KERNEL); + kfree(b); + } + +Finally this is an example of a buffer overflow bug: + +.. slide:: Buffer overflow bugs + :inline-contents: True + :level: 2 + + :: + + slab error in verify_redzone_free(): cache `dummy': memory outside object was overwritten + Pid: 1282, comm: insmod Not tainted 3.0.16-mid10-00007-ga4a6b62-dirty #70 + Call Trace: + [] __slab_error+0x17/0x1c + [] __cache_free+0x12c/0x317 + [] kmem_cache_free+0x2b/0xaf + [] buffer_overflow+0x4c/0x57 [crusher] + [] crush_it+0x6c/0xa9 [crusher] + [] init_module+0x8/0xd [crusher] + [] do_one_initcall+0x72/0x119 + [] sys_init_module+0xc8d/0xe77 + [] syscall_call+0x7/0xb + eb002bf8: redzone 1:0xd84156c5635688c0, redzone 2:0x0 + + .. code-block:: c + + noinline void buffer_overflow(void) + { + struct kmem_cache *km = kmem_cache_create("dummy", 3000, 0, 0, NULL); + char *b = kmem_cache_alloc(km, GFP_KERNEL); + + printk("%s\n", __func__); + memset(b, 0, 3016); + kmem_cache_free(km, b); + } + + +DEBUG_PAGEALLOC +--------------- + +.. slide:: DEBUG_PAGEALLOC + :inline-contents: True + :level: 2 + + * Memory debugger that works at a page level + * Detects invalid accesses either by: + + * Filling pages with poison byte patterns and checking the pattern at + reallocation + * Unmapping the dellocated pages from kernel space (just a few + architectures) + + +KASan +----- + +KASan is a dynamic memory error detector designed to find use-after-free +and out-of-bounds bugs. + +The main idea of KASAN is to use shadow memory to record whether each byte +of memory is safe to access or not, and use compiler's instrumentation to +check the shadow memory on each memory access. + +Address sanitizer uses 1 byte of shadow memory to track 8 bytes of kernel +address space. It uses 0-7 to encode the number of consecutive bytes at +the beginning of the eigh-byte region that are valid. + +See `The Kernel Address Sanitizer (KASAN)` for more information and have a look +at lib/test_kasan.c for an example of problems that KASan can detect. + +.. slide:: KASan + :inline-contents: True + :level: 2 + + * dynamic memory error detector + * finds user-after-free or out-of-bound bugs + * uses shadow memory to track memory operations + * lib/test_kasan.c + + +KASan vs DEBUG_PAGEALLOC +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. slide:: KASan vs DEBUG_PAGEALLOC + :inline-contents: True + :level: 2 + + KASan is slower than DEBUG_PAGEALLOC, but KASan works on sub-page granularity + level, so it able to find more bugs. + + +KASan vs SLUB_DEBUG +~~~~~~~~~~~~~~~~~~~ + +.. slide:: KASan vs SLUB_DEBUG + :inline-contents: True + :level: 2 + + * SLUB_DEBUG has lower overhead than KASan. + * SLUB_DEBUG in most cases are not able to detect bad reads, KASan able to + detect both reads and writes. + * In some cases (e.g. redzone overwritten) SLUB_DEBUG detect bugs only on + allocation/freeing of object. KASan catch bugs right before it will happen, + so we always know exact place of first bad read/write. + + +Kmemleak +-------- + +Kmemleak provides a way of detecting kernel memory leaks in a way similar to a +tracing garbage collector. Since tracing pointers is not possible in C, kmemleak +scans the kernel stacks as well as dynamically and statically kernel memory for +pointers to allocated buffers. A buffer for which there is no pointer is +considered as leaked. The basic steps to use kmemleak are presented bellow, for +more information see `Kernel Memory Leak Detector` + + +.. slide:: Kmemleak + :inline-contents: True + :level: 2 + + * enable kernel config: `CONFIG_DEBUG_KMEMLEAK` + * setup: `mount -t debugfs nodev /sys/kernel/debug` + * trigger a memory scan: `echo scan > /sys/kernel/debug/kmemleak` + * show memory leaks: `cat /sys/kernel/debug/kmemleak` + * clear all possible leaks: `echo clear > /sys/kernel/debug/kmemleak` + +As an example, lets look at the following simple module: + +.. slide:: Kmemleak example + :inline-contents: True + :level: 2 + + .. code-block:: c + + static int leak_init(void) + { + pr_info("%s\n", __func__); + + (void)kmalloc(16, GFP_KERNEL); + + return 0; + } + + MODULE_LICENSE("GPL v2"); + module_init(leak_init); + +Loading the module and triggering a kmemleak scan will issue the +following report: + +.. slide:: Kmemleak report + :inline-contents: True + :level: 2 + + :: + + root@qemux86:~# insmod skels/debugging/leak/leak.ko + leak: loading out-of-tree module taints kernel. + leak_init + root@qemux86:~# echo scan > /sys/kernel/debug/kmemleak + root@qemux86:~# echo scan > /sys/kernel/debug/kmemleak + kmemleak: 1 new suspected memory leaks (see /sys/kernel/debug/kmemleak) + root@qemux86:~# cat /sys/kernel/debug/kmemleak + unreferenced object 0xd7871500 (size 32): + comm "insmod", pid 237, jiffies 4294902108 (age 24.628s) + hex dump (first 32 bytes): + 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ + 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a a5 ZZZZZZZZZZZZZZZ. + backtrace: + [<(ptrval)>] kmem_cache_alloc_trace+0x163/0x310 + [<(ptrval)>] leak_init+0x2f/0x1000 [leak] + [<(ptrval)>] do_one_initcall+0x57/0x2e0 + [<(ptrval)>] do_init_module+0x4b/0x1be + [<(ptrval)>] load_module+0x201a/0x2590 + [<(ptrval)>] sys_init_module+0xfd/0x120 + [<(ptrval)>] do_int80_syscall_32+0x6a/0x1a0 + + +.. note:: Notice that we did not had to unload the module to detect the memory + leak since kmemleak detects that the allocated buffer is not + reachable anymore. + + +Lockdep checker +=============== + +.. slide:: Lockdep checker + :inline-contents: True + :level: 2 + + * CONFIG_DEBUG_LOCKDEP + * Detects lock inversio, circular dependencies, incorrect usage of locks + (including interrupt context) + * Maintains dependency between classes of locks not individual locks + * Each scenario is only checked once and hashed + + +Lets take for example the following kernel module that runs two kernel threads: + +.. slide:: AB BA Deadlock Example + :inline-contents: True + :level: 2 + + .. code-block:: c + + static noinline int thread_a(void *unused) + { + mutex_lock(&a); pr_info("%s acquired A\n", __func__); + mutex_lock(&b); pr_info("%s acquired B\n", __func__); + + mutex_unlock(&b); + mutex_unlock(&a); + + return 0; + } + + .. code-block:: c + + static noinline int thread_b(void *unused) + { + mutex_lock(&b); pr_info("%s acquired B\n", __func__); + mutex_lock(&a); pr_info("%s acquired A\n", __func__); + + mutex_unlock(&a); + mutex_unlock(&b); + + return 0; + } + + +Loading this module with lockdep checker active will produce the following +kernel log: + +.. slide:: AB BA Deadlock Report + :inline-contents: True + :level: 2 + + :: + + thread_a acquired A + thread_a acquired B + thread_b acquired B + + ====================================================== + WARNING: possible circular locking dependency detected + 4.19.0+ #4 Tainted: G O + ------------------------------------------------------ + thread_b/238 is trying to acquire lock: + (ptrval) (a){+.+.}, at: thread_b+0x48/0x90 [locking] + + but task is already holding lock: + (ptrval) (b){+.+.}, at: thread_b+0x27/0x90 [locking] + + which lock already depends on the new lock. + + +As you can see, although the deadlock condition did not trigger (because thread +A did not complete execution before thread B started execution) the lockdep +checker identified a potential deadlock scenario. + +Lockdep checker will provide even more information to help determine what caused +the deadlock, like the dependency chain: + +.. slide:: AB BA Deadlock Report (dependency chain) + :inline-contents: True + :level: 2 + + :: + + the existing dependency chain (in reverse order) is: + + -> #1 (b){+.+.}: + __mutex_lock+0x60/0x830 + mutex_lock_nested+0x20/0x30 + thread_a+0x48/0x90 [locking] + kthread+0xeb/0x100 + ret_from_fork+0x2e/0x38 + + -> #0 (a){+.+.}: + lock_acquire+0x93/0x190 + __mutex_lock+0x60/0x830 + mutex_lock_nested+0x20/0x30 + thread_b+0x48/0x90 [locking] + kthread+0xeb/0x100 + ret_from_fork+0x2e/0x38 + +and even an unsafe locking scenario: + +.. slide:: AB BA Deadlock Report (unsafe locking scenario) + :inline-contents: True + :level: 2 + + :: + + other info that might help us debug this: + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(b); + lock(a); + lock(b); + lock(a); + + *** DEADLOCK *** + + +Another example of unsafe locking issues that lockdep checker detects +is unsafe locking from interrupt context. Lets consider the following +kernel module: + +.. slide:: IRQ Deadlock Example + :inline-contents: True + :level: 2 + + .. code-block:: c + + static DEFINE_SPINLOCK(lock); + + static void timerfn(struct timer_list *unused) + { + pr_info("%s acquiring lock\n", __func__); + spin_lock(&lock); pr_info("%s acquired lock\n", __func__); + spin_unlock(&lock); pr_info("%s released lock\n", __func__); + } + + static DEFINE_TIMER(timer, timerfn); + + int init_module(void) + { + mod_timer(&timer, jiffies); + + pr_info("%s acquiring lock\n", __func__); + spin_lock(&lock); pr_info("%s acquired lock\n", __func__); + spin_unlock(&lock); pr_info("%s released lock\n", __func__); + return 0; + } + + +As in the previous case, loading the module will trigger a lockdep +warning: + +.. slide:: IRQ Deadlock Report + :inline-contents: True + :level: 2 + + :: + + init_module acquiring lock + init_module acquired lock + init_module released lock + timerfn acquiring lock + + ================================ + WARNING: inconsistent lock state + 4.19.0+ #4 Tainted: G O + -------------------------------- + inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. + ksoftirqd/0/9 [HC0[0]:SC1[1]:HE1:SE0] takes: + (ptrval) (lock#4){+.?.}, at: timerfn+0x25/0x60 [locking2] + {SOFTIRQ-ON-W} state was registered at: + lock_acquire+0x93/0x190 + _raw_spin_lock+0x39/0x50 + init_module+0x35/0x70 [locking2] + do_one_initcall+0x57/0x2e0 + do_init_module+0x4b/0x1be + load_module+0x201a/0x2590 + sys_init_module+0xfd/0x120 + do_int80_syscall_32+0x6a/0x1a0 + restore_all+0x0/0x8d + + +The warning will also provide additional information and a potential unsafe +locking scenario: + +.. slide:: IRQ Deadlock Report + :inline-contents: True + :level: 2 + + :: + + Possible unsafe locking scenario: + + CPU0 + ---- + lock(lock#4); + + lock(lock#4); + + *** DEADLOCK *** + + 1 lock held by ksoftirqd/0/9: + #0: (ptrval) (/home/tavi/src/linux/tools/labs/skels/./debugging/locking2/locking2.c:13){+.-.}, at: call_timer_f0 + stack backtrace: + CPU: 0 PID: 9 Comm: ksoftirqd/0 Tainted: G O 4.19.0+ #4 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 + Call Trace: + dump_stack+0x66/0x96 + print_usage_bug.part.26+0x1ee/0x200 + mark_lock+0x5ea/0x640 + __lock_acquire+0x4b4/0x17a0 + lock_acquire+0x93/0x190 + _raw_spin_lock+0x39/0x50 + timerfn+0x25/0x60 [locking2] + + +perf +==== + +.. slide:: perf + :inline-contents: True + :level: 2 + + * performance counters, tracepoints, kprobes, uprobes + * hardware events: CPU cycles, TLB misses, cache misses + * software events: page fauls , context switches + * collects backtraces (user + kernel) + +Other tools +=========== + +.. slide:: Other tools + :inline-contents: True + :level: 2 + + * ftrace + * kprobes + * sparse + * coccinelle + * checkpatch.pl + * printk + * dump_stack() diff --git a/Documentation/teaching/lectures/interrupts.rst b/Documentation/teaching/lectures/interrupts.rst new file mode 100644 index 00000000000000..db27d3714badbc --- /dev/null +++ b/Documentation/teaching/lectures/interrupts.rst @@ -0,0 +1,733 @@ +========== +Interrupts +========== + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +Lecture objectives +================== + +.. slide:: Interrupts + :inline-contents: True + :level: 2 + + * Interrupts and exceptions (x86) + + * Interrupts and exceptions (Linux) + + * Deferrable work + + * Timers + +What is an interrupt? +===================== + +An interrupt is an event that alters the normal execution flow of a program and +can be generated by hardware devices or even by the CPU itself. + +Interrupts can be grouped into two categories based on the source of the interrupt: + +.. slide:: Interrupts + :inline-contents: True + :level: 2 + + * **synchronous**, generated by executing an instruction + * **asynchronous**, generated by an external event + * **maskable** + + * can be ignored + * signalled via INT pin + * **non-maskable** + + * cannot be ignored + * signalled via NMI pin + +Synchronous interrupts, usually named exceptions, handle conditions detected by the +processor itself in the course of executing an instruction. Divide by zero or +a system call are examples of exceptions. + +Asynchronous interrupts, usually named interrupts, are external events generated +by I/O devices. For example a network card generates an interrupts to signal +that a packet has arrived. + +Exceptions +---------- + +There are two sources for exceptions: + +.. slide:: Exceptions + :inline-contents: True + :level: 2 + + * processor detected + + - **faults** + - **traps** + - **aborts** + * programmed + + + - **int n** + +Processor detected exceptions are raised when an abornmal condition is +detected while executing an instruction. + +A fault is a type of exception that is reported before the execution of the +instruction and can be usually corrected. The saved EIP is the address of +the instruction that caused the fault, so after the fault is corrected +the program can re-execute the faulty instruction. (e.g page fault). + +A trap is a type of exception that is reported after the execution of the +instruction in which the exception was detected. The saved EIP is the address +of the instruction after the instuction that caused the trap. (e.g debug trap). + +Hardware Concepts +================= + +Programmable Interrupt Controller +--------------------------------- + +.. slide:: Hardware (PIC) + :inline-contents: True + :level: 2 + + .. ditaa:: + + +-----------+ NMI + | | + | |<----------+ + | | + | | +------------+ + | | | | IRQ0 + | | | |<------------+ device0 + | CPU | | | IRQ1 + | | INTR | PIC |<------------+ device1 + | |<----------+ | IRQN + | | | |<------------+ deviceN + | | | | + +-----------+ +------------+ + +A device supporting interrupts has an output pin used for signalling an Interrupt ReQuest. IRQ +pins are connected to a device named Programmable Interrupt Controller (PIC) which is connected +to CPU's INTR pin. + +A PIC usually has a set of ports used to exchange information with the CPU. When a device +connected to one of the PIC's IRQ lines needs CPU attention the following flow happens: + + * device raises an interrupt on the corresponding IRQn pin + * PIC converts the IRQ into a vector number and writes it to a port for CPU to read + * PIC raises an interrupt on CPU INTR pin + * PIC waits for CPU to acknowledge an interrupt + * CPU handles the interrupt + +Will see later how the CPU handles the interrupt. Important to notice is that by design PIC won't raise +another interrupt until the CPU acknowledged the current interrupt. + +Each IRQ line can be individually disabled. This allows simplifying design by making sure that +interrupt handlers are always executed serially. + +Advanced Programmable Interrupt Controller +------------------------------------------ + +.. slide:: Hardware (APIC) + :inline-contents: True + :level: 2 + + .. ditaa:: + + + CPU0 CPU1 + +-------------+ +-------------+ + | | | | + | |local IRQs | |local IRQs + | +---------- | +---------- + | | | | + | local APIC | | local APIC | + | | LINT0, LINT1 | | LINT0, LINT1 + | +------------- | +------------- + | | | | + +-------+-----+ +------+------+ + | | + | | + | | + +-------+--------------------------------+------+ + | | + | Interrupt Controller Communication BUS | + +----------------------+------------------------+ + | + | + +--------+--------+ + | | + | I/O APIC | + | | + +--------+--------+ + | + | + | + External interrupts + +With multicore systems, each core has a local APIC used to process interrupts +from locally connected devices like timers or thermals sensors. + +I/O APIC is used to distribute IRQ from external devices to CPU cores. + +After discussing the hardware, now let's see how the processor handles an interrupt. + +Interrupt Control +----------------- + +In order to synchronize access to shared data between the interrupt handler +and other potential concurrent activities such as driver initialization or +driver data processing, it is often required to enable and disable interrupts in +a controlled fashion. + +This can be accomplished at several levels: + +.. slide:: Enabling/disabling the interrupts + :inline-contents: True + :level: 2 + + * at the device level + + * by programming the device control registers + + * at the PIC level + + * PIC can be programmed to disable a given IRQ line + + * at the CPU level; for example, on x86 one can use the following + instructions: + + * cli (CLear Interrupt flag) + * sti (SeT Interrupt flag) + + +Architecture specific interrupt handling in Linux +================================================= + +In this section we will discuss how Linux handles interrupts for the x86 architecture. + +Interrupt Descriptor Table +-------------------------- + +The interrupt descriptor table (IDT) associates each interrupt or exception +identifier with a descriptor for the instructions that service the associated +event. We will name the identifier as vector number and the associated +instructions as interrupt/exception handler. + +An IDT has the following characteristics: + +.. slide:: Interrupt Descriptor Table + :inline-contents: True + :level: 2 + + * it is used as a jump table by the CPU when a given vector is triggered + * it is an array of 256 x 8 bytes entries + * may reside anywhere in physical memory + * processor locates IDT by the means of IDTR + +Below we can find Linux IRQ vector layout. The first 32 entries are reserved +for exceptions, vector 128 is used for sycall interface and the rest are +used mostly for hardware interrupts handlers. + +.. slide:: Linux IRQ vector layout + :inline-contents: True + :level: 2 + + .. ditaa:: + + arch/x86/include/asm/irq_vectors.h + +------+ + | 0 | 0..31, system traps and exceptions + +------+ + | 1 | + +------+ + | | + +------+ + | | + | | + | | + +------+ + | 32 | 32..127, device interrupts + +------+ + | | + | | + | | + +------+ + | 128 | int80 syscall interface + +------+ + | 129 | 129..255, other interrupts + +------+ + | | + | | + | | + +------+ + | 255 | + +------+ + +On x86 an IDT entry has 8 bytes and it is named gate. There can be 3 types of gates: + + * interrupt gate, holds the address of an interupt or exception handler. + Jumping to the handler disables maskable interrupts (IF flag is cleared). + * trap gates, similar with an interrupt gate but it does not disable maskable + interrupts while jumping to interupt/exception handler. + * task gates (not used in Linux) + +Lets have a look at several fields of an IDT entry: + + * segment selector, index into GDT/LDT to find the start of the code segment where + the interupt handlers resides + * offset, offset inside the code segment + * T, represents the type of gate + * DPL, minimum privilege required for using the segments content. + +.. slide:: Interrupt descriptor table entry (gate) + :inline-contents: True + :level: 2 + + .. ditaa:: + + 63 47 42 32 + +------------------------------+---+---+----+---+---------------+ + | | | D | | | | + | offset (16..31 | P | P | | T | | + | | | L | | | | + +------------------------------+---+---+----+---+---------------+ + | | | + | segment selector | offset (0..15) | + | | | + +------------------------------+--------------------------------+ + 31 15 0 + + +Interrupt handler address +------------------------- + +In order to find the interrupt handler address we first need to find the start +address of the code segment where interrupt handler resides. For this we +use the segment selector to index into GDT/LDT where we can find the corresponding +segment descriptor. This will provide the start address kept in the 'base' field. +Using base address and the offset we can now go at the start of the interrupt handler. + + +.. slide:: Interrupt handler address + :inline-contents: True + :level: 2 + + .. ditaa:: + + + Interrupt Descriptor + +----------------------------------------------+ + | | + | +------------------+ +--------+ +------+ | + | | segment selector | | offset| | PL | | + | +----+-------------+ +---+----+ +------+ | + | | | | + +----------------------------------------------+ + | | + | | + +-------------+ +----------------------------> +---------------+ + | ^ | ISR address | + | Segment Descriptor | +---------------+ + | +----------------------------------------------+ | + | | | | + +---->| +------------------+ +--------+ +------+ | | + | | base | | limit | | PL | | | + | +---------+--------+ +--------+ +------+ | | + | | | | + +----------------------------------------------+ | + | | + +--------------------------------------------+ + + +Stack of interrupt handler +-------------------------- + +Similar with control transfer to a normal function, a control transfer +to an interrupt or exception handler uses the stack to store the +information needed for returning to the interrupted code. + +As can be seen in the figure below, an interrupt pushes the EFLAGS register +before saving the address of the interrupted instruction. Certain types +of exceptions also cause an error code to be pushed on the stack to help +debug the exception. + + +.. slide:: Interrupt handler stack + :inline-contents: True + :level: 2 + + .. ditaa:: + + + w/o privilege transition w/ privilege transition + + + +---------------------+ +---------------------+ + | | | | | + | | | OLD SS:ESP | OLD SS | NEW SS:ESP from TSS + | +---------------------+ +---------------------+ + | | | | | + | | OLD EFLAGS | | OLD ESP | + | +---------------------+ +---------------------+ + | | | | | + | | OLD CS | | OLD EFLAGS | + | +---------------------+ +---------------------+ + | | | | | + | | OLD EIP | | OLD CS | + | +---------------------+ +---------------------+ + | | | | | + | | (error code) | NEW SS:ESP | OLD EIP | + | +---------------------+ +---------------------+ + | | | | | + | | | | (error code) | NEW SS:ESP + | | | +---------------------+ + | | | | | + | | | | | + | | | | | + | | | | | + | | | | | + | | | | | + | | | | | + v +---------------------+ +---------------------+ + + +Handling an interrupt request +----------------------------- + +After an interrupt request has been generated the processor runs a sequence of +events that eventually ends up with running the kernel interrupt handler: + + +.. slide:: Handling an interrupt request + :inline-contents: True + :level: 2 + + + * CPU checks the current privilege level + * if need to change privilege level + + * change stack with the one associated with new privilege + * save old stack information on the new stack + + * save EFLAGS, CS, EIP on stack + * save error code on stack in case of an abort + * execute the kernel interrupt handler + +Returning from an interrupt handler +----------------------------------- + +Most architectures offers special instructions to clean-up the stack and resume +the execution after the interrupt handler has been executed. On x86 IRET is used +to return from an interrupt handler. IRET is similar with RET except that IRET +increments ESP by extra four bytes (because of the flags on stack) and moves the +saved flags into EFLAGS register. + +To resume the execution after an interrupt the following sequence is used (x86): + +.. slide:: Returning from an interrupt + :inline-contents: True + :level: 2 + + * pop the eror code (in case of an abort) + * call IRET + + * pops values from the stack and restore the following register: CS, EIP, EFLAGS + * if privilege level changed returns to the old stack and old privilege level + + +Generic interrupt handling in Linux +=================================== + +In Linux the interrupt handling is done in three phases: critical, immediate and +deferred. + +In the first phase the kernel will run the generic interrupt handler that +determines the interrupt number, the interrupt handler for this particular +interrupt and the interrupt controller. At this point any timing critical +actions will also be performed (e.g. acknowledge the interrupt at the interrupt +controller level). Local processor interrupts are disabled for the duration of +this phase and continue to be disabled in the next phase. + +In the second phase all of the device drivers handler associated with this +interrupt will be executed [#f1]_. At the end of this phase the interrupt controller's +"end of interrupt" method is called to allow the interrupt controller to +reassert this interrupt. The local processor interrupts are enabled at this +point. + +.. [#f1] Note that it is possible that one interrupt is associated with multiple + devices and in this case it is said that the interrupt is + shared. Usually, when using shared interrupts it is the responsibility + of the device driver to determine if the interrupt is target to it's + device or not. + +Finally, in the last phase of interrupt handling interrupt context deferrable +actions will be run. These are also sometimes known as "bottom half" of the +interrupt (the upper half being the part of the interrupt handling that runs +with interrupts disabled). At this point interrupts are enabled on the local +processor. + +.. slide:: Interrupt handling in Linux + :inline-contents: True + :level: 2 + + .. ditaa:: + + + phase 1 + +----------------+ + | critical | phase 2 + +----------------+ +-----------------+ + | | | immediate | phase 3 + | - IRQ disabled | +-----------------+ +----------------+ + | - ACK IRQ +-----+ | | | deferred | + | | +---> - IRQ disabled | +----------------+ + +----------------+ | - device handler| | | + | - EOI IRQ +-----+ | - IRQ enabled | + +-----------------+ +----> - execute later| + | | + +----------------+ + + +Nested interrupts and exceptions +-------------------------------- + +Nesting interrupts is permitted on many architectures. Some architectures define +interrupt levels that allow preemption of an interrupt only if the pending +interrupt has a greater priority then the current (settable) level (e.g see +ARM's priority mask). + +In order to support as many architectures as possible, Linux has a more +restrictive interrupt nesting implementation: + +.. slide:: IRQ nesting in Linux + :inline-contents: True + :level: 2 + + * an exception (e.g. page fault, system call) can not preempt an interrupt; + if that occurs it is considered a bug + + * an interrupt can preempt an exception or other interrupts; however, only + one level of interrupt nesting is allowed + +The diagram below shows the possible nesting scenarios: + +.. slide:: Interrupt/Exception nesting + :inline-contents: True + :level: 2 + + .. ditaa:: + + + ^ + ^ + | | | | + | Syscall | | IRQi | + User Mode | Exception (e.g. page fault) | | | + | | | | + | | | | + +-------------------------------------------------------+-----------+-- + | iret| | | + | | | | + Kernel Mode v-------+ ^-------+ ^--------+ +-----+ | + | | | | | | + IRQi| iret| IRQj| iret| IRQj| | + v------+ v-----+ ^-----+ v-----+ + | | + IRQk | iret| + v-----+ + +Interrupt context +----------------- + +While an interrupt is handled (from the time the CPU jumps to the interrupt +handler until the interrupt handler returns - e.g. IRET is issued) it is said +that code runs in "interrupt context". + +Code that runs in interrupt context has the following properties: + +.. slide:: Interrupt context + :inline-contents: True + :level: 2 + + * it runs as a result of an IRQ (not of an exception) + * there is no well defined process context associated + * not allowed to trigger a context switch (no sleep, schedule, or user memory access) + +Deferrable actions +------------------ + +Deferrable actions are used to run callback functions at a later time. If +deferrable actions scheduled from an interrupt handler, the associated callback +function will run after the interrupt handler has completed. + +There are two large categories of deferrable actions: those that run in +interrupt context and those that run in process context. + +The purpose of interrupt context deferrable actions is to avoid doing too much +work in the interrupt handler function. Running for too long with interrupts +disabled can have undesired effects such as increased latency or poor system +performance due to missing other interrupts (e.g. dropping network packets +because the CPU did not react in time to dequeue packets from the network +interface and the network card buffer is full). + +In Linux there are three types of deferrable actions: + +.. slide:: Deferrable actions in Linux + :inline-contents: True + :level: 2 + + + * softIRQ + + * runs in interrupt context + * statically allocated + * same handler may run in parallel on multiple cores + + * tasklet + + * runs in interrupt context + * can be dynamically allocated + * same handler runs are serialized + + * workqueues + + * run in process context + +Deferrable actions have APIs to: **initialize** an instance, **activate** or +**schedule** the action and **mask/disable** and **unmask/enable** the execution +of the callback function. The later is used for synchronization purposes between +the callback function and other contexts. + +Soft IRQs +--------- + +Soft IRQs is the term used for the low level mechanism that implements deferring +work from interrupt handlers but that still runs in interrupt context. + +.. slide:: Soft IRQs + :inline-contents: True + :level: 2 + + Soft IRQ APIs: + + * initialize: :c:func:`open_softirq` + * activation: :c:func:`raise_softirq` + * masking: :c:func:`local_bh_disable`, :c:func:`local_bh_enable` + + Once activated, the callback function :c:func:`do_softirq` runs either: + + * after an interrupt handler or + * from the ksoftirqd kernel thread + + +.. slide:: ksoftirqd + :inline-contents: False + :level: 2 + + * minimum priority kernel thread + * runs softirqs after certain limits are reached + * tries to achieve good latency and avoid process starvation + + +Since softirqs can reschedule themselves or other interrupts can occur that +reschedules them, they can potentially lead to (temporary) process starvation if +checks are not put into place. Currently, the Linux kernel does not allow +running soft irqs for more than :c:macro:`MAX_SOFTIRQ_TIME` or rescheduling for +more than :c:macro:`MAX_SOFTIRQ_RESTART` consecutive times. + +Once these limits are reached a special kernel thread, **ksoftirqd** is wake-up +and all of the rest of pending soft irqs will be run from the context of this +kernel thread. + +Soft irqs usage is restricted, they are use by a handful of subsystems that have +low latency requirements. For 4.19 this is the full list of soft irqs: + +.. slide:: Types of soft IRQ + :inline-contents: True + :level: 2 + + * HI_SOFTIRQ + * TIMER_SOFTIRQ + * NET_TX_SOFTIRQ + * NET_RX_SOFTIRQ + * BLOCK_SOFTIRQ + * IRQ_POLL_SOFTIRQ + * TASKLET_SOFTIRQ + * SCHED_SOFTIRQ + * HRTIMER_SOFTIRQ, + * RCU_SOFTIRQ + +Tasklets +-------- + +.. slide:: Tasklets + :inline-contents: True + :level: 2 + + Tasklets are a dynamic type (not limited to a fixed number) of + deferred work running in interrupt context. + + Tasklets API: + + * initialization: :c:func:`tasklet_init` + * activation: :c:func:`tasklet_schedule` + * masking: :c:func:`tasklet_disable`, :c:func:`tasklet_enable` + + Tasklets are implemented on top of two dedicated softirqs: + :c:macro:`TASKLET_SOFITIRQ` and :c:macro:`HI_SOFTIRQ` + + Tasklets are also serialized, i.e. the same tasklet can only execute on one processor. + + +Workqueues +---------- + + .. slide:: Workqueues + :inline-contents: True + :level: 2 + + Workqueues are a type of deferred work that runs in process context. + + They are implemented on top of kernel threads. + + Workqueues API: + + * init: :c:macro:`INIT_WORK` + * activation: :c:func:`schedule_work` + +Timers +------ + +.. slide:: Timers + :inline-contents: True + :level: 2 + + Timers are implemented on top of the :c:macro:`TIMER_SOFTIRQ` + + Timer API: + + * initialization: :c:func:`setup_timer` + * activation: :c:func:`mod_timer` + + + + + + + + + + + + + + + + + + + + diff --git a/Documentation/teaching/lectures/intro.rst b/Documentation/teaching/lectures/intro.rst new file mode 100644 index 00000000000000..7d336d39ffd17a --- /dev/null +++ b/Documentation/teaching/lectures/intro.rst @@ -0,0 +1,1166 @@ +============ +Introduction +============ + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +Lecture objectives: +=================== + +.. slide:: Introduction + :inline-contents: True + :level: 2 + + * Basic operating systems terms and concepts + + * Overview of the Linux kernel + + +Basic operating systems terms and concepts +========================================== + +User vs Kernel +-------------- + +.. slide:: User vs Kernel + :level: 2 + + * Execution modes + + * Kernel mode + + * User mode + + * Memory protection + + * Kernel-space + + * User-space + + +Kernel and user are two terms that are often used in operating +systems. Their definition is pretty straight forward: The kernel is +the part of the operating system that runs with higher privileges +while user (space) usually means by applications running with low +privileges. + +However these terms are heavily overloaded and might have very +specific meanings in some contexts. + +User mode and kernel mode are terms that may refer specifically to the +processor execution mode. Code that runs in kernel mode can fully +[#hypervisor]_ control the CPU while code that runs in user mode has +certain limitations. For example, local CPU interrupts can only be +disabled or enable while running in kernel mode. If such an operation +is attempted while running in user mode an exception will be generated +and the kernel will take over to handle it. + +.. [#hypervisor] some processors may have even higher privileges than + kernel mode, e.g. a hypervisor mode, that is only + accessible to code running in a hypervisor (virtual + machine monitor) + +User space and kernel space may refer specifically to memory +protection or to virtual address spaces associated with either the +kernel or user applications. + +Grossly simplifying, the kernel space is the memory area that is +reserved to the kernel while user space is the memory area reserved to +a particular user process. The kernel space is accessed protected so +that user applications can not access it directly, while user space +can be directly accessed from code running in kernel mode. + + +Typical operating system architecture +------------------------------------- + +In the typical operating system architecture (see the figure below) +the operating system kernel is responsible for access and sharing the +hardware in a secure and fair manner with multiple applications. + +.. slide:: Typical operating system architecture + :level: 2 + :inline-contents: True + + .. ditaa:: + + +---------------+ +--------------+ +---------------+ -\ + | Application 1 | | Application2 | ... | Application n | | + +---------------+ +--------------+ +---------------+ |> User space + | | | | + v v v -/ + +--------------------------------------------------------+ -\ + | System Call Interface | | + +--------------------------------------------------------+ | + | | | | + v v v |> Kernel space + +--------------------------------------------------------+ | + | Kernel | | + +--------------------------------------------------------+ | + | Device drivers | | + +--------------------------------------------------------+ -/ + | | | -\ + v v v |> Hardware + -/ + + + +The kernel offers a set of APIs that applications issue which are +generally referred to as "System Calls". These APIs are different from +regular library APIs because they are the boundary at which the +execution mode switch from user mode to kernel mode. + +In order to provide application compatibility, system calls are rarely +changed. Linux particularly enforces this (as opposed to in kernel +APIs that can change as needed). + +The kernel code itself can be logically separated in core kernel +code and device drivers code. Device drivers code is responsible of +accessing particular devices while the core kernel code is +generic. The core kernel can be further divided into multiple logical +subsystems (e.g. file access, networking, process management, etc.) + + +Monolithic kernel +----------------- + +A monolithic kernel is one where there is no access protection between +the various kernel subsystems and where public functions can be +directly called between various subsystems. + + +.. slide:: Monolithic kernel + :level: 2 + :inline-contents: True + + .. ditaa:: + + +-----+ +-----+ +-----+ + | App | | App | | App | + +-----+ +-----+ +-----+ + | | | User + =--|-------=--------|--------=-------|-------------------=- + | | | Kernel + v v v + +--------------------------------------------------------+ + | System Call Interface | + +--------------------------------------------------------+ + | | + v v + +-----+ +-----+ + | |<---------------------------->| | Kernel + | |<---+ +------->| | functions + +--+--+ | | +-----+ + | | | ^ + | | +-----+ | | + |+------+---->| |<---+ | + || | +-----+ | + || | | + vv | v + +--++-+ | +-----+ + | | +------------------------>| | Device + | |<---------------------------->| | Drivers + +--+--+ +--+--+ + | | + v v + +--------------------------------------------------------+ + | Hardware | + +--------------------------------------------------------+ + + +However, most monolithic kernels do enforce a logical separation +between subsystems especially between the core kernel and device +drivers with relatively strict APIs (but not necessarily fixed in +stone) that must be used to access services offered by one subsystem +or device drivers. This, of course, depends on the particular kernel +implementation and the kernel's architecture. + + +Micro kernel +------------ + +A micro-kernel is one where large parts of the kernel are protected +from each-other, usually running as services in user space. Because +significant parts of the kernel are now running in user mode, the +remaining code that runs in kernel mode is significantly smaller, hence +micro-kernel term. + +.. slide:: Micro-kernel + :level: 2 + :inline-contents: True + + .. ditaa:: + + +-----+ +--------+ +---------+ +---------+ + | App | | File | | Network | | Display |<--+ + | | | Server | | Server | | Server |-+ | + +-----+ +--------+ +---------+ +---------+ | | + | ^ | | User + -|-|----------------------------------------=-|-|-------=- + | | | | Kernel + | | | | + | | | | + | | | | + | | Reply +----------------------------+ | | + | +--------| |----+ | + +--------->| Micro kernel |------+ + Request | (IPC, Memory, Scheduler) | + | | + +----------------------------+ + | + v + +--------------------------------------------------------+ + | Hardware | + +--------------------------------------------------------+ + + +In a micro-kernel architecture the kernel contains just enough code +that allows for message passing between different running +processes. Practically that means implement the scheduler and an IPC +mechanism in the kernel, as well as basic memory management to setup +the protection between applications and services. + +One of the advantages of this architecture is that the services are +isolated and hence bugs in one service won't impact other services. + +As such, if a service crashes we can just restart it without affecting +the whole system. However, in practice this is difficult to achieve +since restarting a service may affect all applications that depend on +that service (e.g. if the file server crashes all applications with +opened file descriptors would encounter errors when accessing them). + +This architecture imposes a modular approach to the kernel and offers +memory protection between services but at a cost of performance. What +is a simple function call between two services on monolithic kernels +now requires going through IPC and scheduling which will incur a +performance penalty [#minix-vs-linux]_. + +.. [#minix-vs-linux] https://lwn.net/Articles/220255/ + + +Micro-kernels vs monolithic kernels +----------------------------------- + +Advocates of micro-kernels often suggest that micro-kernel are +superior because of the modular design a micro-kernel +enforces. However, monolithic kernels can also be modular and there +are several approaches that modern monolithic kernels use toward this +goal: + +.. slide:: Monolithic kernels *can* be modular + :level: 2 + :inline-contents: True + + * Components can enabled or disabled at compile time + + * Support of loadable kernel modules (at runtime) + + * Organize the kernel in logical, independent subsystems + + * Strict interfaces but with low performance overhead: macros, + inline functions, function pointers + + +There is a class of operating systems that (used to) claim to be +hybrid kernels, in between monolithic and micro-kernels (e.g. Windows, +Mac OS X). However, since all of the typical monolithic services run +in kernel-mode in these operating systems, there is little merit to +qualify them other then monolithic kernels. + +.. slide:: "Hybrid" kernels + :level: 2 + :inline-contents: True + + Many operating systems and kernel experts have dismissed the label + as meaningless, and just marketing. Linus Torvalds said of this + issue: + + "As to the whole 'hybrid kernel' thing - it's just marketing. It's + 'oh, those microkernels had good PR, how can we try to get good PR + for our working kernel? Oh, I know, let's use a cool name and try + to imply that it has all the PR advantages that that other system + has'." + + +Address space +------------- + +.. slide:: Address space + :level: 2 + + * Physical address space + + * RAM and peripheral memory + + * Virtual address space + + * How the CPU sees the memory (when in protected / paging mode) + + * Process address space + + * Kernel address space + + +The address space term is an overload term that can have different +meanings in different contexts. + +The physical address space refers to the way the RAM and device +memories are visible on the memory bus. For example, on 32bit Intel +architecture, it is common to have the RAM mapped into the lower +physical address space while the graphics card memory is mapped high +in the physical address space. + +The virtual address space (or sometimes just address space) refers to +the way the CPU sees the memory when the virtual memory module is +activated (sometime called protected mode or paging enabled). The +kernel is responsible of setting up a mapping that creates a virtual +address space in which areas of this space are mapped to certain +physical memory areas. + +Related to the virtual address space there are two other terms that +are often used: process (address) space and kernel (address) space. + +The process space is (part of) the virtual address space associated +with a process. It is the "memory view" of processes. It is a +continuous area that starts at zero. Where the process's address space +ends depends on the implementation and architecture. + +The kernel space is the "memory view" of the code that runs in kernel +mode. + + +User and kernel sharing the virtual address space +------------------------------------------------- + +A typical implementation for user and kernel spaces is one where the +virtual address space is shared between user processes and the kernel. + +In this case kernel space is located at the top of the address space, +while user space at the bottom. In order to prevent the user processes +from accessing kernel space, the kernel creates mappings that prevent +access to the kernel space from user mode. + +.. slide:: User and kernel sharing the virtual address space + :level: 2 + :inline-contents: True + + .. ditaa:: + + +-------------------+ ^ + 0xFFFFFFFF | | | + | | | Kernel space + | | | + +-------------------+ v + 0xC0000000 | | ^ + | | | User space + | | | + | | | + | | | + | | | + | | | + | | | + | | | + 0x00000000 +-------------------+ v + + 32bit Virtual Address Space + +Execution contexts +------------------ + +.. slide:: Execution contexts + :level: 2 + + * Process context + + * Code that runs in user mode, part of a process + + * Code that runs in kernel mode, as a result of a system call + issued by a process + + * Interrupt context + + * Code that runs as a result of an interrupt + + * Always runs in kernel mode + + +One of the most important jobs of the kernel is to service interrupts +and to service them efficiently. This is so important that a special +execution context is associated with it. + +The kernel executes in interrupt context when it runs as a result of +an interrupt. This includes the interrupt handler, but it is not +limited to it, there are other special (software) constructs that run +in interrupt mode. + +Code running in interrupt context always runs in kernel mode and there +are certain limitations that the kernel programmer has to be aware of +(e.g. not calling blocking functions or accessing user space). + +Opposed to interrupt context there is process context. Code that runs +in process context can do so in user mode (executing application code) +or in kernel mode (executing a system call). + + +Multi-tasking +------------- + +.. slide:: Multi-tasking + :level: 2 + + * An OS that supports the "simultaneous" execution of multiple processes + + * Implemented by fast switching between running processes to allow + the user to interact with each program + + * Implementation: + + * Cooperative + + * Preemptive + +Multitasking is the ability of the operating system to +"simultaneously" execute multiple programs. It does so by quickly +switching between running processes. + +Cooperative multitasking requires the programs to cooperate to achieve +multitasking. A program will run and relinquish CPU control back +to the OS, which will then schedule another program. + +With preemptive multitasking the kernel will enforce strict limits for +each process, so that all processes have a fair chance of +running. Each process is allowed to run a time slice (e.g. 100ms) +after which, if it is still running, it is forcefully preempted and +another task is scheduled. + +Preemptive kernel +----------------- + +.. slide:: Preemptive kernel + :level: 2 + :inline-contents: True + + Preemptive multitasking and preemptive kernels are different terms. + + A kernel is preemptive if a process can be preempted while running + in kernel mode. + + However, note that non-preemptive kernels may support preemptive + multitasking. + + +Pageable kernel memory +---------------------- + +.. slide:: Pageable kernel memory + :level: 2 + :inline-contents: True + + A kernel supports pageable kernel memory if parts of kernel memory + (code, data, stack or dynamically allocated memory) can be swapped + to disk. + +Kernel stack +------------ + +.. slide:: Kernel stack + :level: 2 + :inline-contents: True + + Each process has a kernel stack that is used to maintain the + function call chain and local variables state while it is executing + in kernel mode, as a result of a system call. + + The kernel stack is small (4KB - 12 KB) so the kernel developer has + to avoid allocating large structures on stack or recursive calls + that are not properly bounded. + +Portability +----------- + +In order to increase portability across various architectures and +hardware configurations, modern kernels are organized as follows at the +top level: + +.. slide:: Portability + :level: 2 + :inline-contents: True + + * Architecture and machine specific code (C & ASM) + + * Independent architecture code (C): + + * kernel core (further split in multiple subsystems) + + * device drivers + +This makes it easier to reuse code as much as possible between +different architectures and machine configurations. + + +Asymmetric MultiProcessing (ASMP) +--------------------------------- + +Asymmetric MultiProcessing (ASMP) is a way of supporting multiple +processors (cores) by a kernel, where a processor is dedicated to the +kernel and all other processors run user space programs. + +The disadvantage of this approach is that the kernel throughput +(e.g. system calls, interrupt handling, etc.) does not scale with the +number of processors and hence typical processes frequently use system +calls. The scalability of the approach is limited to very specific +systems (e.g. scientific applications). + + +.. slide:: Asymmetric MultiProcessing (ASMP) + :level: 2 + :inline-contents: True + + .. ditaa:: + + +-----------+ + | | + +------------------>| Memory |<-----------------+ + | | | | + | +-----------+ | + | ^ | + | | | + v v v + +--------------+ +---------------+ +---------------+ + | | | | | | + | Processor A | | Processor B | | Processor C | + | | | | | | + | | | +-----------+ | | +-----------+ | + | | | | Process 1 | | | | Process 1 | | + | | | +-----------+ | | +-----------+ | + | | | | | | + | +----------+ | | +-----------+ | | +-----------+ | + | | kernel | | | | Process 2 | | | | Process 2 | | + | +----------+ | | +-----------+ | | +-----------+ | + | | | | | | + | | | +-----------+ | | +-----------+ | + | | | | Process 3 | | | | Process 3 | | + | | | +-----------+ | | +-----------+ | + +--------------+ +---------------+ +---------------+ + + +Symmetric MultiProcessing (SMP) +------------------------------- + +As opposed to ASMP, in SMP mode the kernel can run on any of the +existing processors, just as user processes. This approach is more +difficult to implement, because it creates race conditions in the +kernel if two processes run kernel functions that access the same +memory locations. + +In order to support SMP the kernel must implement synchronization +primitives (e.g. spin locks) to guarantee that only one processor is +executing a critical section. + +.. slide:: Symmetric MultiProcessing (SMP) + :level: 2 + :inline-contents: True + + .. ditaa:: + + +-----------+ + | | + +------------------->| Memory |<------------------+ + | | | | + | +-----------+ | + | ^ | + | | | + v v v + +---------------+ +---------------+ +---------------+ + | | | | | | + | Processor A | | Processor B | | Processor C | + | | | | | | + | +-----------+ | | +-----------+ | | +-----------+ | + | | Process 1 | | | | Process 1 | | | | Process 1 | | + | +-----------+ | | +-----------+ | | +-----------+ | + | | | | | | + | +-----------+ | | +-----------+ | | +-----------+ | + | | Process 2 | | | | Process 2 | | | | Process 2 | | + | +-----------+ | | +-----------+ | | +-----------+ | + | | | | | | + | +-----------+ | | +-----------+ | | +-----------+ | + | | kernel | | | | kernel | | | | kernel | | + | +-----------+ | | +-----------+ | | +-----------+ | + +---------------+ +---------------+ +---------------+ + + +CPU Scalability +--------------- + +CPU scalability refers to how well the performance scales with +the number of cores. There are a few things that the kernel developer +should keep in mind with regard to CPU scalability: + +.. slide:: CPU Scalability + :level: 2 + :inline-contents: True + + * Use lock free algorithms when possible + + * Use fine grained locking for high contention areas + + * Pay attention to algorithm complexity + + +Overview of the Linux kernel +============================ + + +Linux development model +----------------------- + +.. slide:: Linux development model + :level: 2 + + * Open source, GPLv2 License + + * Contributors: companies, academia and independent developers + + * Development cycle: 3 – 4 months which consists of a 1 - 2 week + merge window followed by bug fixing + + * Features are only allowed in the merge window + + * After the merge window a release candidate is done on a weekly + basis (rc1, rc2, etc.) + +The Linux kernel is one the largest open source projects in the world +with thousands of developers contributing code and millions of lines of +code changed for each release. + +It is distributed under the GPLv2 license, which simply put, +requires that any modification of the kernel done on software that is +shipped to customer should be made available to them (the customers), +although in practice most companies make the source code publicly +available. + +There are many companies (often competing) that contribute code to the +Linux kernel as well as people from academia and independent +developers. + +The current development model is based on doing releases at fixed +intervals of time (usually 3 - 4 months). New features are merged into +the kernel during a one or two week merge window. After the merge +window, a release candidate is done on a weekly basis (rc1, rc2, etc.) + + +Maintainer hierarchy +-------------------- + +In order to scale the development process, Linux uses a hierarchical +maintainership model: + +.. slide:: Maintainer hierarchy + :level: 2 + :inline-contents: True + + * Linus Torvalds is the maintainer of the Linux kernel and merges pull + requests from subsystem maintainers + + * Each subsystem has one or more maintainers that accept patches or + pull requests from developers or device driver maintainers + + * Each maintainer has its own git tree, e.g.: + + * Linux Torvalds: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git + + * David Miller (networking): git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git/ + + * Each subsystem may maintain a -next tree where developers can submit + patches for the next merge window + +Since the merge window is only a maximum of two weeks, most of the +maintainers have a -next tree where they accept new features from +developers or maintainers downstream while even when the merge window +is closed. + +Note that bug fixes are accepted even outside merge window in the +maintainer's tree from where they are periodically pulled by the +upstream maintainer regularly, for every release candidate. + + + +Linux source code layout +------------------------- + +.. slide:: Linux source code layout + :level: 2 + :inline-contents: True + + .. ditaa:: + + +-------+ + | linux | + +-+-----+ + | + +------+--------+---------+---------+--------------+--------------+ + | | | | | | | + | v v v v v v + | +------+ +-------+ +-------+ +--------+ +---------------+ +---------+ + | | arch | | block | | certs | | crypto | | Documentation | | drivers | + | +------+ +-------+ +-------+ +--------+ +---------------+ +---------+ + | + +-------+----------+--------+---------+--------+--------+---------+ + | | | | | | | | + | v v v v v v v + | +----------+ +----+ +---------+ +------+ +-----+ +--------+ +-----+ + | | firmware | | fs | | include | | init | | ipc | | kernel | | lib | + | +----------+ +----+ +---------+ +------+ +-----+ +--------+ +-----+ + | + +-----+------+---------+------------+------------+------------+ + | | | | | | | + | v v v v v v + | +----+ +-----+ +---------+ +---------+ +----------+ +-------+ + | | mm | | net | | samples | | scripts | | security | | sound | + | +----+ +-----+ +---------+ +---------+ +----------+ +-------+ + | + +------+--------+--------+ + | | | + v v v + +-------+ +-----+ +------+ + | tools | | usr | | virt | + +-------+ +-----+ +------+ + + +These are the top level of the Linux source code folders: + +* arch - contains architecture specific code; each architecture is + implemented in a specific sub-folder (e.g. arm, arm64, x86) + +* block - contains the block subsystem code that deals with reading + and writing data from block devices: creating block I/O requests, + scheduling them (there are several I/O schedulers available), + merging requests, and passing them down through the I/O stack to the + block device drivers + +* certs - implements support for signature checking using certificates + +* crypto - software implementation of various cryptography algorithms + as well as a framework that allows offloading such algorithms in + hardware + +* Documentation - documentation for various subsystems, Linux kernel + command line options, description for sysfs files and format, device + tree bindings (supported device tree nodes and format) + +* drivers - driver for various devices as well as the Linux driver + model implementation (an abstraction that describes drivers, devices + buses and the way they are connected) + +* firmware - binary or hex firmware files that are used by various + device drivers + +* fs - home of the Virtual Filesystem Switch (generic filesystem code) + and of various filesystem drivers + +* include - header files + +* init - the generic (as opposed to architecture specific) + initialization code that runs during boot + +* ipc - implementation for various Inter Process Communication system + calls such as message queue, semaphores, shared memory + +* kernel - process management code (including support for kernel + thread, workqueues), scheduler, tracing, time management, generic + irq code, locking + +* lib - various generic functions such as sorting, checksums, + compression and decompression, bitmap manipulation, etc. + +* mm - memory management code, for both physical and virtual memory, + including the page, SL*B and CMA allocators, swapping, virtual memory + mapping, process address space manipulation, etc. + +* net - implementation for various network stacks including IPv4 and + IPv6; BSD socket implementation, routing, filtering, packet + scheduling, bridging, etc. + +* samples - various driver samples + +* scripts - parts the build system, scripts used for building modules, + kconfig the Linux kernel configurator, as well as various other + scripts (e.g. checkpatch.pl that checks if a patch is conform with + the Linux kernel coding style) + +* security - home of the Linux Security Module framework that allows + extending the default (Unix) security model as well as + implementation for multiple such extensions such as SELinux, smack, + apparmor, tomoyo, etc. + +* sound - home of ALSA (Advanced Linux Sound System) as well as the + old Linux sound framework (OSS) + +* tools - various user space tools for testing or interacting with + Linux kernel subsystems + +* usr - support for embedding an initrd file in the kernel image + +* virt - home of the KVM (Kernel Virtual Machine) hypervisor + + +Linux kernel architecture +------------------------- + +.. slide:: Linux kernel architecture + :level: 2 + :inline-contents: True + + .. ditaa:: + :height: 100% + + +---------------+ +--------------+ +---------------+ + | Application 1 | | Application2 | ... | Application n | + +---------------+ +--------------+ +---------------+ + | | | + v v v + +--------------------------------------------------------+ + | Kernel | + | | + | +----------------------+ +-------------------+ | + | | Process Management | | Memory Management | | + | +----------------------+ +-------------------+ | + | | + | +------------+ +------------+ +------------+ | + | | Block I/O | | VFS | | Networking | | + | +------------+ +------------+ +------------+ | + | | + | +------------+ +------------+ +------------+ | + | | IPC | | Security | | Crypto | | + | +------------+ +------------+ +------------+ | + | | + | +------------+ +------------+ +------------+ | + | | DRM | | ALSA | | USB | | + | +------------+ +------------+ +------------+ | + | ... | + +--------------------------------------+-----------------+ + | Device drivers | arch | + | | | + | +----+ +-----+ +--------+ +----+ | +----------+ | + | |char| |block| |ethernet| |wifi| | | machine 1| | + | +----+ +-----+ +--------+ +----+ | +----------+ | + | +----------+ +-----+ +----+ +---+ | +----------+ | + | |filesystem| |input| |iio | |usb| | | machine 2| | + | +----------+ +-----+ +----+ +---+ | +----------+ | + | +-----------+ +----------+ +---+ | | + | |framebuffer| | platform | |drm| | ... | + | +-----------+ +----------+ +---+ | | + +-------------------------+----+-------+-----------------+ + | | | + v v v + + +--------------------------------------------------------+ + | Hardware | + +--------------------------------------------------------+ + + +arch +.... + +.. slide:: arch + :level: 2 + :inline-contents: True + + * Architecture specific code + + * May be further sub-divided in machine specific code + + * Interfacing with the boot loader and architecture specific + initialization + + * Access to various hardware bits that are architecture or machine + specific such as interrupt controller, SMP controllers, BUS + controllers, exceptions and interrupt setup, virtual memory handling + + * Architecture optimized functions (e.g. memcpy, string operations, + etc.) + +This part of the Linux kernel contains architecture specific code and +may be further sub-divided in machine specific code for certain +architectures (e.g. arm). + +"Linux was first developed for 32-bit x86-based PCs (386 or +higher). These days it also runs on (at least) the Compaq Alpha AXP, +Sun SPARC and UltraSPARC, Motorola 68000, PowerPC, PowerPC64, ARM, +Hitachi SuperH, IBM S/390, MIPS, HP PA-RISC, Intel IA-64, DEC VAX, AMD +x86-64 and CRIS architectures.” + +It implements access to various hardware bits that are architecture or +machine specific such as interrupt controller, SMP controllers, BUS +controllers, exceptions and interrupt setup, virtual memory handling. + +It also implements architecture optimized functions (e.g. memcpy, +string operations, etc.) + + +Device drivers +.............. + +.. slide:: Device drivers + :level: 2 + + * Unified device model + + * Each subsystem has its own specific driver interfaces + + * Many device driver types (TTY, serial, SCSI, fileystem, ethernet, + USB, framebuffer, input, sound, etc.) + +The Linux kernel uses a unified device model whose purpose is to +maintain internal data structures that reflect the state and structure +of the system. Such information includes what devices are present, +what is their status, what bus they are attached to, to what driver +they are attached, etc. This information is essential for implementing +system wide power management, as well as device discovery and dynamic +device removal. + +Each subsystem has its own specific driver interface that is tailored +to the devices it represents in order to make it easier to write +correct drivers and to reduce code duplication. + +Linux supports one of the most diverse set of device drivers type, +some examples are: TTY, serial, SCSI, fileystem, ethernet, USB, +framebuffer, input, sound, etc. + + +Process management +.................. + +.. slide:: Process management + :level: 2 + + * Unix basic process management and POSIX threads support + + * Processes and threads are abstracted as tasks + + * Operating system level virtualization + + * Namespaces + + * Control groups + +Linux implements the standard Unix process management APIs such as +fork(), exec(), wait(), as well as standard POSIX threads. + +However, Linux processes and threads are implemented particularly +different than other kernels. There are no internal structures +implementing processes or threads, instead there is a :c:type:`struct +task_struct` that describe an abstract scheduling unit called task. + +A task has pointers to resources, such as address space, file +descriptors, IPC ids, etc. The resource pointers for tasks that are +part of the same process point to the same resources, while resources +of tasks of different processes will point to different resources. + +This peculiarity, together with the `clone()` and `unshare()` system +call allows for implementing new features such as namespaces. + +Namespaces are used together with control groups (cgroup) to implement +operating system virtualization in Linux. + +cgroup is a mechanism to organize processes hierarchically and +distribute system resources along the hierarchy in a controlled and +configurable manner. + + +Memory management +................. + +Linux memory management is a complex subsystem that deals with: + +.. slide:: Memory management + :level: 2 + :inline-contents: True + + * Management of the physical memory: allocating and freeing memory + + * Management of the virtual memory: paging, swapping, demand + paging, copy on write + + * User services: user address space management (e.g. mmap(), brk(), + shared memory) + + * Kernel services: SL*B allocators, vmalloc + + + +Block I/O management +.................... + +The Linux Block I/O subsystem deals with reading and writing data from +or to block devices: creating block I/O requests, transforming block I/O +requests (e.g. for software RAID or LVM), merging and sorting the +requests and scheduling them via various I/O schedulers to the block +device drivers. + +.. slide:: Block I/O management + :level: 2 + :inline-contents: True + + .. ditaa:: + :height: 100% + + +---------------------------------+ + | Virtual Filesystem Switch | + +---------------------------------+ + ^ + | + v + +---------------------------------+ + | Device Mapper | + +---------------------------------+ + ^ + | + v + +---------------------------------+ + | Generic Block Layer | + +---------------------------------+ + ^ + | + v + +--------------------------------+ + | I/O scheduler | + +--------------------------------+ + ^ ^ + | | + v v + +--------------+ +--------------+ + | Block device | | Block device | + | driver | | driver | + +--------------+ +--------------+ + + +Virtual Filesystem Switch +......................... + +The Linux Virtual Filesystem Switch implements common / generic +filesystem code to reduce duplication in filesystem drivers. It +introduces certain filesystem abstractions such as: + +* inode - describes the file on disk (attributes, location of data + blocks on disk) + +* dentry - links an inode to a name + +* file - describes the properties of an opened file (e.g. file + pointer) + +* superblock - describes the properties of a formatted filesystem + (e.g. number of blocks, block size, location of root directory on + disk, encryption, etc.) + +.. slide:: Virtual Filesystem Switch + :level: 2 + :inline-contents: True + + .. ditaa:: + :height: 100% + + + ^ ^ ^ + | stat | open | read + v v v + +------------------------------------------------------------+ + | Virtual Filesystem Switch | + | | + | | + | /-------\ /--------\ /--------\ | + | | inode |<----------+ dentry |<----------+ FILE | | + | \---+---/ \----+---/ \---+----/ | + | | | | | + | | | | | + | v v v | + | +-------+ +--------+ +-------+ | + | | inode | | dentry | | page | | + | | cache | | cache | | cache | | + | +-------+ +--------+ +-------+ | + | | + +------------------------------------------------------------+ + ^ ^ + | | + v v + +-------------+ +-------------+ + | Filesystem | | Filesystem | + | driver | | driver | + +-------------+ +-------------+ + + +The Linux VFS also implements a complex caching mechanism which +includes the following: + +* the inode cache - caches the file attributes and internal file + metadata + +* the dentry cache - caches the directory hierarchy of a filesystem + +* the page cache - caches file data blocks in memory + + + +Networking stack +................ + +.. slide:: Networking stack + :level: 2 + :inline-contents: True + + .. ditaa:: + :height: 100% + + +---------------------------+ + | Berkeley Socket Interface | + +---------------------------+ + + +---------------------------+ + | Transport layer | + +-------------+-------------+ + | TCP | UDP | + +-------------+-------------+ + + +---------------------------+ + | Network layer | + +-----+---------+-----------+ + | IP | Routing | NetFilter | + +-----+---------+-----------+ + + +---------------------------+ + | Data link layer | + +-------+-------+-----------+ + | ETH | ARP | BRIDGING | + +-------+-------+-----------+ + + +---------------------------+ + | Queuing discipline | + +---------------------------+ + + +---------------------------+ + | Network device drivers | + +---------------------------+ + +Linux Security Modules +...................... + +.. slide:: Linux Security Modules + :level: 2 + :inline-contents: True + + * Hooks to extend the default Linux security model + + * Used by several Linux security extensions: + + * Security Enhancened Linux + + * AppArmor + + * Tomoyo + + * Smack diff --git a/Documentation/teaching/lectures/smp.rst b/Documentation/teaching/lectures/smp.rst new file mode 100644 index 00000000000000..8e16ac48a87c53 --- /dev/null +++ b/Documentation/teaching/lectures/smp.rst @@ -0,0 +1,1184 @@ +========================== +Symmetric Multi-Processing +========================== + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +Lecture objectives: +=================== + +.. slide:: Symmetric Multi-Processing + :inline-contents: True + :level: 2 + + * Kernel Concurrency + + * Atomic operations + + * Spin locks + + * Cache thrashing + + * Optimized spin locks + + * Process and Interrupt Context Synchronization + + * Mutexes + + * Per CPU data + + * Memory Ordering and Barriers + + * Read-Copy Update + + +Synchronization basics +====================== + +Because the Linux kernel supports symmetric multi-processing (SMP) it +must use a set of synchronization mechanisms to achieve predictable +results, free of race conditions. + +.. note:: We will use the terms core, CPU and processor as + interchangeable for the purpose of this lecture. + +Race conditions can occur when the following two conditions happen +simultaneously: + +.. slide:: Race conditions + :inline-contents: True + :level: 2 + + * there are at least two execution contexts that run in "parallel": + + * truly run in parallel (e.g. two system calls running on + different processors) + + * one of the contexts can arbitrary preempt the other (e.g. an + interrupt preempts a system call) + + * the execution contexts perform read-write accesses to shared + memory + + +Race conditions can lead to erroneous results that are hard to debug, +because they manifest only when the execution contexts are scheduled +on the CPU cores in a very specific order. + +A classical race condition example is an incorrect implementation for +a release operation of a resource counter: + +.. slide:: Race condition: resource counter release + :inline-contents: True + :level: 2 + + .. code-block:: c + + void release_resource() + { + counter--; + + if (!counter) + free_resource(); + } + + +A resource counter is used to keep a shared resource available until +the last user releases it but the above implementation has a race +condition that can cause freeing the resource twice: + + +.. slide:: Race condition scenario + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + counter is 2 + + Thread A Thread B + + * + | + | + +---------------------+ + | dec counter | counter is 1 + | cEEE | + +---------------------+ + | + | B preempts A + +-----------------------------------------------+ + | + v + +----------------------+ + counter is 0 | dec counter | + | if (!counter) | + resource is freed | free_resource(); | + | cEEE | + +----------------------+ + B finishes, A continues | + +-----------------------------------------------+ + | + v + +----------------------+ + | if (!counter) | + | free_resource(); | resource is freed + | cEEE | + +----------------------+ + +In most cases the `release_resource()` function will only free the +resource once. However, in the scenario above, if thread A is +preempted right after decrementing `counter` and thread B calls +`release_resource()` it will cause the resource to be freed. When +resumed, thread A will also free the resource since the counter value +is 0. + +To avoid race conditions the programmer must first identify the +critical section that can generate a race condition. The critical +section is the part of the code that reads and writes shared memory +from multiple parallel contexts. + +In the example above, the minimal critical section is starting with +the counter decrement and ending with checking the counter's value. + +Once the critical section has been identified race conditions can be +avoided by using one of the following approaches: + +.. slide:: Avoiding race conditions + :inline-contents: True + :level: 2 + + * make the critical section **atomic** (e.g. use atomic + instructions) + + * **disable preemption** during the critical section (e.g. disable + interrupts, bottom-half handlers, or thread preemption) + + * **serialize the access** to the critical section (e.g. use spin + locks or mutexes to allow only one context or thread in the + critical section) + + + +Linux kernel concurrency sources +================================ + +There are multiple source of concurrency in the Linux kernel that +depend on the kernel configuration as well as the type of system it +runs on: + + +.. slide:: Linux kernel concurrency sources + :inline-contents: True + :level: 2 + + * **single core systems**, **non-preemptive kernel**: the current + process can be preempted by interrupts + + * **single core systems**, **preemptive kernel**: above + the + current process can be preempted by other processes + + * **multi-core systems**: above + the current process can run + in parallel with another process or with an interrupt running on + another processor + +.. note:: We only discuss kernel concurrency and that is why a + non-preemptive kernel running on an single core system + has interrupts as the only source of concurrency. + + +Atomic operations +================= + +In certain circumstances we can avoid race conditions by using atomic +operations that are provided by hardware. Linux provides a unified API +to access atomic operations: + +.. slide:: Atomic operations + :inline-contents: True + :level: 2 + + * integer based: + + * simple: :c:func:`atomic_inc`, :c:func:`atomic_dec`, + :c:func:`atomic_add`, :c:func:`atomic_sub` + + * conditional: :c:func:`atomic_dec_and_test`, :c:func:`atomic_sub_and_test` + + * bit based: + + * simple: :c:func:`test_bit`, :c:func:`set_bit`, + :c:func:`change_bit` + + * conditional: :c:func:`test_and_set_bit`, :c:func:`test_and_clear_bit`, + :c:func:`test_and_change_bit` + +For example, we could use :c:func:`atomic_dec_and_test` to implement +the resource counter decrement and value checking atomic: + +.. slide:: Using :c:func:`atomic_dec_and_test` to implement resource counter release + :inline-contents: True + :level: 2 + + .. code-block:: c + + void release_resource() + { + if (atomic_dec_and_test(&counter)) + free_resource(); + } + + +One complication with atomic operations is encountered in +multi-core systems, where an atomic operation is not longer +atomic at the system level (but still atomic at the core level). + +To understand why, we need to decompose the atomic operation in memory +loads and stores. Then we can construct race condition scenarios where +the load and store operations are interleaved across CPUs, like in the +example below where incrementing a value from two processors will +produce an unexpected result: + +.. slide:: Atomic operations may not be atomic on SMP systems + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + + +------------+ + | Memory | + +-------------+ LOAD (0) | | +-------------+ + | CPU 0 |<--------------| v <- 0 | LOAD (0) | CPU 1 | + | | STORE (1) | |-------------->| | + | inc v |-------------->| v <- 1 | STORE (1) | inc v | + | cEEE | | v <- 1 |<--------------| cEEE | + +-------------+ | cEEE | +-------------+ + +------------+ + + +In order to provide atomic operations on SMP systems different +architectures use different techniques. For example, on x86 a LOCK +prefix is used to lock the system bus while executing the prefixed +operation: + +.. slide:: Fixing atomic operations for SMP systems (x86) + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + +------------+ + +-------------+ BUS LOCK | Memory | + | CPU 1 |<------------->| | + | | LOAD (0) | | + | inc v |<--------------| v <- 0 | + | | STORE (1) | | + | |-------------->| v <- 1 | + | | BUS UNLOCK | | + | cEEE |<------------->| | BUS LOCK +-------------+ + +-------------+ | |<------------->| CPU 1 | + | | LOAD (1) | | + | |<--------------| inc v | + | v <- 2 | STORE (2) | | + | |-------------->| | + | | BUS UNLOCK | | + | cEEE |<------------->| cEEE | + +------------+ +-------------+ + + +On ARM the LDREX and STREX instructions are used together to guarantee +atomic access: LDREX loads a value and signals the exclusive monitor +that an atomic operation is in progress. The STREX attempts to store a +new value but only succeeds if the exclusive monitor has not detected +other exclusive operations. So, to implement atomic operations the +programmer must retry the operation (both LDREX and STREX) until the +exclusive monitor signals a success. + +Although they are often interpreted as "light" or "efficient" +synchronization mechanisms (because they "don't require spinning or +context switches", or because they "are implemented in hardware so +they must be more efficient", or because they "are just instructions +so they must have similar efficiency as other instructions"), as seen +from the implementation details, atomic operations are actually +expensive. + + +Disabling preemption (interrupts) +================================= + +On single core systems and non preemptive kernels the only source of +concurrency is the preemption of the current thread by an +interrupt. To prevent concurrency is thus sufficient to disable +interrupts. + +This is done with architecture specific instructions, but Linux offers +architecture independent APIs to disable and enable interrupts: + +.. slide:: Synchronization with interrupts (x86) + :inline-contents: True + :level: 2 + + .. code-block:: c + + #define local_irq_disable() \ + asm volatile („cli” : : : „memory”) + + #define local_irq_enable() \ + asm volatile („sti” : : : „memory”) + + #define local_irq_save(flags) \ + asm volatile ("pushf ; pop %0" :"=g" (flags) + : /* no input */: "memory") \ + asm volatile("cli": : :"memory") + + #define local_irq_restore(flags) \ + asm volatile ("push %0 ; popf" + : /* no output */ + : "g" (flags) :"memory", "cc"); + + +Although the interrupts can be explicitly disabled and enable with +:c:func:`local_irq_disable` and :c:func:`local_irq_enable` these APIs +should only be used when the current state and interrupts is +known. They are usually used in core kernel code (like interrupt +handling). + +For typical cases where we want to avoid interrupts due to concurrency +issues it is recommended to use the :c:func:`local_irq_save` and +:c:func:`local_irq_restore` variants. They take care of saving and +restoring the interrupts states so they can be freely called from +overlapping critical sections without the risk of accidentally +enabling interrupts while still in a critical section, as long as the +calls are balanced. + +Spin Locks +========== + +Spin locks are used to serialize access to a critical section. They +are necessary on multi-core systems where we can have true execution +parallelism. This is a typical spin lock implementation: + + +.. slide:: Spin Lock Implementation Example (x86) + :inline-contents: True + :level: 2 + + .. code-block:: asm + + spin_lock: + lock bts [my_lock], 0 + jc spin_lock + + /* critical section */ + + spin_unlock: + mov [my_lock], 0 + + **bts dts, src** - bit test and set; it copies the src bit from the dts + memory address to the carry flag and then sets it: + + .. code-block:: c + + CF <- dts[src] + dts[src] <- 1 + + +As it can be seen, the spin lock uses an atomic instruction to make +sure that only one core can enter the critical section. If there are +multiple cores trying to enter they will continuously "spin" until the +lock is released. + +While the spin lock avoids race conditions, it can have a significant +impact on the system's performance due to "lock contention": + + +.. slide:: Lock Contention + :inline-contents: True + :level: 2 + + * There is lock contention when at least one core spins trying to + enter the critical section lock + + * Lock contention grows with the critical section size, time spent + in the critical section and the number of cores in the system + + +Another negative side effect of spin locks is cache thrashing. + +.. slide:: Cache Thrashing + :inline-contents: True + :level: 2 + + Cache thrashing occurs when multiple cores are trying to read and + write to the same memory resulting in excessive cache misses. + + Since spin locks continuously access memory during lock contention, + cache thrashing is a common occurrence due to the way cache + coherency is implemented. + + +Cache coherency in multi-processor systems +========================================== + +The memory hierarchy in multi-processor systems is composed of local +CPU caches (L1 caches), shared CPU caches (L2 caches) and the main +memory. To explain cache coherency we will ignore the L2 cache and +only consider the L1 caches and main memory. + +In the figure below we present a view of the memory hierarchy with two +variables A and B that fall into different cache lines and where +caches and the main memory are synchronized: + +.. slide:: Synchronized caches and memory + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + +-------+ +-------+ + | CPU 0 | | CPU 1 | + +-------+ +-------+ + cache cache + +-------+ +-------+ + A | 1 | | 1 | A + +-------+ +-------+ + B | 2 | | 2 | B + +-------+ +-------+ + memory + +-----------------------------+ + A | 1 | + +-----------------------------+ + B | 2 | + +-----------------------------+ + + +In the absence of a synchronization mechanism between the caches and +main memory, when CPU 0 executes `A = A + B` and CPU 1 executes `B = +A + B` we will have the following memory view: + +.. slide:: Unsynchronized caches and memory + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + +-------+ +-------+ + | CPU 0 | | CPU 1 | + +-------+ +-------+ + A <- A + B B <- A + B + + +-------+ +-------+ + A | 3 | | 1 | A + +-------+ +-------+ + B | 2 | | 3 | B + +-------+ +-------+ + write back caches + +-----------------------------+ + A | 1 | + +-----------------------------+ + B | 2 | + +-----------------------------+ + + +In order to avoid the situation above multi-processor systems use +cache coherency protocols. There are two main types of cache coherency +protocols: + +.. slide:: Cache Coherency Protocols + :inline-contents: True + :level: 2 + + * Bus snooping (sniffing) based: memory bus transactions are + monitored by caches and they take actions to preserve + coherency + + * Directory based: there is a separate entity (directory) that + maintains the state of caches; caches interact with directory + to preserve coherency + + Bus snooping is simpler but it performs poorly when the number of + cores goes beyond 32-64. + + Directory based cache coherence protocols scale much better (up + to thousands of cores) and are usually used in NUMA systems. + + +A simple cache coherency protocol that is commonly used in practice is +MESI (named after the acronym of the cache line states names: +**Modified**, **Exclusive**, **Shared** and **Invalid**). It's main +characteristics are: + +.. slide:: MESI Cache Coherence Protocol + :inline-contents: True + :level: 2 + + * Caching policy: write back + + * Cache line states + + * Modified: owned by a single core and dirty + + * Exclusive: owned by a single core and clean + + * Shared: shared between multiple cores and clean + + * Invalid : the line is not cached + +Issuing read or write requests from CPU cores will trigger state +transitions, as exemplified below: + +.. slide:: MESI State Transitions + :inline-contents: True + :level: 2 + + * Invalid -> Exclusive: read request, all other cores have the line + in Invalid; line loaded from memory + + * Invalid -> Shared: read request, at least one core has the line + in Shared or Exclusive; line loaded from sibling cache + + * Invalid/Shared/Exclusive -> Modified: write request; **all + other** cores **invalidate** the line + + * Modified -> Invalid: write request from other core; line is + flushed to memory + + +.. note:: The most important characteristic of the MESI protocol is + that it is a write-invalidate cache protocol. When writing to a + shared location all other caches are invalidated. + +This has important performance impact in certain access patterns, and +one such pattern is contention for a simple spin lock implementation +like we discussed above. + +To exemplify this issue lets consider a system with three CPU cores, +where the first has acquired the spin lock and it is running the +critical section while the other two are spinning waiting to enter the +critical section: + +.. slide:: Cache thrashing due to spin lock contention + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + +-------+ +-------+ +-------+ + | CPU 0 |<---------------+ | CPU 1 | Invalidate | CPU 0 | + | cache |<-------------+ | | cache |<---+ +---------->| cache | + +-------+ Invalidate | | +-------+ | | +-------+ + | | | | + | | +----------------------------+ + spin_lock(&lock); | | | | + | | READ lock | | + | +---- WRITE lock ---+ | + | | + | READ lock | + +-------------------------------- WRITE lock ----+ + + ... ... ... + READ data READ lock READ lock + | | | + | | | + | | | + +------------------------------+-------------------------+ + | + v + + cache miss + +As it can be seen from the figure above due to the writes issued by +the cores spinning on the lock we see frequent cache line invalidate +operations which means that basically the two waiting cores will flush +and load the cache line while waiting for the lock, creating +unnecessary traffic on the memory bus and slowing down memory accesses +for the first core. + +Another issue is that most likely data accessed by the first CPU +during the critical section is stored in the same cache line with the +lock (common optimization to have the data ready in the cache after +the lock is acquired). Which means that the cache invalidation +triggered by the two other spinning cores will slow down the execution +of the critical section which in turn triggers more cache invalidate +actions. + +Optimized spin locks +==================== + +As we have seen simple spin lock implementations can have poor +performance issues due to cache thrashing, especially as the number of +cores increase. To avoid this issue there are two possible strategies: + +* reduce the number of writes and thus reduce the number of cache + invalidate operations + +* avoid the other processors spinning on the same cache line, and thus + avoid the cache invalidate operations + + +An optimized spin lock implementation that uses the first approach is +presented below: + +.. slide:: Optimized spin lock (KeAcquireSpinLock) + :inline-contents: True + :level: 2 + + |_| + + .. code-block:: asm + + spin_lock: + rep ; nop + test lock_addr, 1 + jnz spin_lock + lock bts lock_addr + jc spin_lock + + + * we first test the lock read only, using a non atomic + instructions, to avoid writes and thus invalidate operations + while we spin + + * only when the lock *might* be free, we try to acquire it + +The implementation also use the **PAUSE** instruction to avoid +pipeline flushes due to (false positive) memory order violations and +to add a small delay (proportional with the memory bus frequency) to +reduce power consumption. + +A similar implementation with support for fairness (the CPU cores are +allowed in the critical section based on the time of arrival) is used +in the Linux kernel (the `ticket spin lock `_) +for many architectures. + +However, for the x86 architecture, the current spin lock +implementation uses a queued spin lock where the CPU cores spin on +different locks (hopefully distributed in different cache lines) to +avoid cache invalidation operations: + +.. slide:: Queued Spin Locks + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + +-------------------------------------------+ + | Queued Spin Lock cEEE | + | | + | +---+ +---+ +---+ +---+ | + | | |----->| |----->| |----->| | | + | +---+ +---+ +---+ +---+ | + | ^ ^ ^ ^ | + | | | | | | + +-------------------------------------------+ + | | | | + CPU10 CPU17 CPU99 CPU0 + owns the spins on spins on spins on + lock private private private + lock lock lock + + + +Conceptually, when a new CPU core tries to acquire the lock and it +fails it will add its private lock to the list of waiting CPU +cores. When the lock owner exits the critical section it unlocks the +next lock in the list, if any. + +While a read spin optimized spin lock reduces most of the cache +invalidation operations, the lock owner can still generate cache +invalidate operations due to writes to data structures close to the +lock and thus part of the same cache line. This in turn generates +memory traffic on subsequent reads on the spinning cores. + +Hence, queued spin locks scale much better for large number of cores +as is the case for NUMA systems. And since they have similar fairness +properties as the ticket lock it is the preferred implementation on +the x86 architecture. + + +Process and Interrupt Context Synchronization +============================================= + +Accessing shared data from both process and interrupt context is a +relatively common scenario. On single core systems we can do this by +disabling interrupts, but that won't work on multi-core systems, +as we can have the process running on one CPU core and the interrupt +context running on a different CPU core. + +Using a spin lock, which was designed for multi-processor systems, +seems like the right solution, but doing so can cause common +deadlock conditions, as detailed by the following scenario: + + +.. slide:: Process and Interrupt Handler Synchronization Deadlock + :inline-contents: True + :level: 2 + + * In the process context we take the spin lock + + * An interrupt occurs and it is scheduled on the same CPU core + + * The interrupt handler runs and tries to take the spin lock + + * The current CPU will deadlock + + +To avoid this issue a two fold approach is used: + + +.. slide:: Interrupt Synchronization for SMP + :inline-contents: True + :level: 2 + + * In process context: disable interrupts and acquire a spin lock; + this will protect both against interrupt or other CPU cores race + conditions (:c:func:`spin_lock_irqsave` and + :c:func:`spin_lock_restore` combine the two operations) + + * In interrupt context: take a spin lock; this will will protect + against race conditions with other interrupt handlers or process + context running on different processors + + +We have the same issue for other interrupt context handlers such as +softirqs, tasklets or timers and while disabling interrupts might +work, it is recommended to use dedicated APIs: + +.. slide:: Bottom-Half Synchronization for SMP + :inline-contents: True + :level: 2 + + * In process context use :c:func:`spin_lock_bh` (which combines + :c:func:`local_bh_disable` and :c:func:`spin_lock`) and + :c:func:`spin_unlock_bh` (which combines :c:func:`spin_unlock` and + :c:func:`local_bh_enable`) + + * In bottom half context use: :c:func:`spin_lock` and + :c:func:`spin_unlock` (or :c:func:`spin_lock_irqsave` and + :c:func:`spin_lock_irqrestore` if sharing data with interrupt + handlers) + + +As mentioned before, another source of concurrency in the Linux kernel +can be other processes, due to preemption. + +.. slide:: Preemption + :inline-contents: True + :level: 2 + + |_| + + Preemption is configurable: when active it provides better latency + and response time, while when deactivated it provides better + throughput. + + Preemption is disabled by spin locks and mutexes but it can be + manually disabled as well (by core kernel code). + + +As for local interrupt enabling and disabling APIs, the bottom half +and preemption APIs allows them to be used in overlapping critical +sections. A counter is used to track the state of bottom half and +preemption. In fact the same counter is used, with different increment +values: + +.. slide:: Preemption and Bottom-Half Masking + :inline-contents: True + :level: 2 + + .. code-block:: c + + #define PREEMPT_BITS 8 + #define SOFTIRQ_BITS 8 + #define HARDIRQ_BITS 4 + #define NMI_BITS 1 + + #define preempt_disable() preempt_count_inc() + + #define local_bh_disable() add_preempt_count(SOFTIRQ_OFFSET) + + #define local_bh_enable() sub_preempt_count(SOFTIRQ_OFFSET) + + #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK)) + + #define in_interrupt() irq_count() + + asmlinkage void do_softirq(void) + { + if (in_interrupt()) return; + ... + + +Mutexes +======= + +Mutexes are used to protect against race conditions from other CPU +cores but they can only be used in **process context**. As opposed to +spin locks, while a thread is waiting to enter the critical section it +will not use CPU time, but instead it will be added to a waiting queue +until the critical section is vacated. + +Since mutexes and spin locks usage intersect, it is useful to compare +the two: + +.. slide:: Mutexes + :inline-contents: True + :level: 2 + + * They don't "waste" CPU cycles; system throughput is better than + spin locks if context switch overhead is lower than medium + spinning time + + * They can't be used in interrupt context + + * They have a higher latency than spin locks + +Conceptually, the :c:func:`mutex_lock` operation is relatively simple: +if the mutex is not acquired we an take the fast path via an atomic +exchange operation: + + +.. slide:: :c:func:`mutex_lock` fast path + :inline-contents: True + :level: 2 + + .. code-block:: c + + void __sched mutex_lock(struct mutex *lock) + { + might_sleep(); + + if (!__mutex_trylock_fast(lock)) + __mutex_lock_slowpath(lock); + } + + static __always_inline bool __mutex_trylock_fast(struct mutex *lock) + { + unsigned long curr = (unsigned long)current; + + if (!atomic_long_cmpxchg_acquire(&lock->owner, 0UL, curr)) + return true; + + return false; + } + + +otherwise we take the slow path where we add ourselves to the mutex +waiting list and put ourselves to sleep: + +.. slide:: :c:func:`mutex_lock` slow path + :inline-contents: True + :level: 2 + + .. code-block:: c + + ... + spin_lock(&lock->wait_lock); + ... + /* add waiting tasks to the end of the waitqueue (FIFO): */ + list_add_tail(&waiter.list, &lock->wait_list); + ... + waiter.task = current; + ... + for (;;) { + if (__mutex_trylock(lock)) + goto acquired; + ... + spin_unlock(&lock->wait_lock); + ... + set_current_state(state); + spin_lock(&lock->wait_lock); + } + spin_lock(&lock->wait_lock); + acquired: + __set_current_state(TASK_RUNNING); + mutex_remove_waiter(lock, &waiter, current); + spin_lock(&lock->wait_lock); + ... + +The full implementation is a bit more complex: instead of going to +sleep immediately it optimistic spinning if it detects that the lock +owner is currently running on a different CPU as chances are the owner +will release the lock soon. It also checks for signals and handles +mutex debugging for locking dependency engine debug feature. + + +The :c:func:`mutex_unlock` operation is symmetric: if there are no +waiters on the mutex then we an take the fast path via an atomic exchange +operation: + +.. slide:: :c:func:`mutex_unlock` fast path + :inline-contents: True + :level: 2 + + .. code-block:: c + + void __sched mutex_unlock(struct mutex *lock) + { + if (__mutex_unlock_fast(lock)) + return; + __mutex_unlock_slowpath(lock, _RET_IP_); + } + + static __always_inline bool __mutex_unlock_fast(struct mutex *lock) + { + unsigned long curr = (unsigned long)current; + + if (atomic_long_cmpxchg_release(&lock->owner, curr, 0UL) == curr) + return true; + + return false; + } + + void __mutex_lock_slowpath(struct mutex *lock) + { + ... + if (__mutex_waiter_is_first(lock, &waiter)) + __mutex_set_flag(lock, MUTEX_FLAG_WAITERS); + ... + + +.. note:: Because :c:type:`struct task_struct` is cached aligned the 7 + lower bits of the owner field can be used for various flags, + such as :c:type:`MUTEX_FLAG_WAITERS`. + + +Otherwise we take the slow path where we pick up first waiter from the +list and wake it up: + +.. slide:: :c:func:`mutex_unlock` slow path + :inline-contents: True + :level: 2 + + .. code-block:: c + + ... + spin_lock(&lock->wait_lock); + if (!list_empty(&lock->wait_list)) { + /* get the first entry from the wait-list: */ + struct mutex_waiter *waiter; + waiter = list_first_entry(&lock->wait_list, struct mutex_waiter, + list); + next = waiter->task; + wake_q_add(&wake_q, next); + } + ... + spin_unlock(&lock->wait_lock); + ... + wake_up_q(&wake_q); + + + +Per CPU data +============ + +Per CPU data avoids race conditions by avoiding to use shared +data. Instead, an array sized to the maximum possible CPU cores is +used and each core will use its own array entry to read and write +data. This approach certainly has advantages: + + +.. slide:: Per CPU data + :inline-contents: True + :level: 2 + + * No need to synchronize to access the data + + * No contention, no performance impact + + * Well suited for distributed processing where aggregation is only + seldom necessary (e.g. statistics counters) + + +Memory Ordering and Barriers +============================ + +Modern processors and compilers employ out-of-order execution to +improve performance. For example, processors can execute "future" +instructions while waiting for current instruction data to be fetched +from memory. + +Here is an example of out of order compiler generated code: + +.. slide:: Out of Order Compiler Generated Code + :inline-contents: True + :level: 2 + + +-------------------+-------------------------+ + | C code | Compiler generated code | + +-------------------+-------------------------+ + |.. code-block:: c |.. code-block:: asm | + | | | + | a = 1; | MOV R10, 1 | + | b = 2; | MOV R11, 2 | + | | STORE R11, b | + | | STORE R10, a | + +-------------------+-------------------------+ + + +.. note:: When executing instructions out of order the processor makes + sure that data dependency is observed, i.e. it won't execute + instructions whose input depend on the output of a previous + instruction that has not been executed. + +In most cases out of order execution is not an issue. However, in +certain situations (e.g. communicating via shared memory between +processors or between processors and hardware) we must issue some +instructions before others even without data dependency between them. + +For this purpose we can use barriers to order memory operations: + +.. slide:: Barriers + :inline-contents: True + :level: 2 + + * A read barrier (:c:func:`rmb()`, :c:func:`smp_rmb()`) is used to + make sure that no read operation crosses the barrier; that is, + all read operation before the barrier are complete before + executing the first instruction after the barrier + + * A write barrier (:c:func:`wmb()`, :c:func:`smp_wmb()`) is used to + make sure that no write operation crosses the barrier + + * A simple barrier (:c:func:`mb()`, :c:func:`smp_mb()`) is used + to make sure that no write or read operation crosses the barrier + + +Read Copy Update (RCU) +====================== + +Read Copy Update is a special synchronization mechanism similar with +read-write locks but with significant improvements over it (and some +limitations): + +.. slide:: Read Copy Update (RCU) + :level: 2 + :inline-contents: True + + * **Read-only** lock-less access at the same time with write access + + * Write accesses still requires locks in order to avoid races + between writers + + * Requires unidirectional traversal by readers + + +In fact, the read-write locks in the Linux kernel have been deprecated +and then removed, in favor of RCU. + +Implementing RCU for a new data structure is difficult, but a few +common data structures (lists, queues, trees) do have RCU APIs that +can be used. + +RCU splits removal updates to the data structures in two phases: + +.. slide:: Removal and Reclamation + :inline-contents: True + :level: 2 + + * **Removal**: removes references to elements. Some old readers may + still see the old reference so we can't free the element. + + * **Elimination**: free the element. This action is postponed until + all existing readers finish traversal (quiescent cycle). New + readers won't affect the quiescent cycle. + + +As an example, lets take a look on how to delete an element from a +list using RCU: + +.. slide:: RCU List Delete + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + (1) List Traversal (2) Removal + +-----------+ + +-----+ +-----+ +-----+ +-----+ | +-----+ | +-----+ + | | | | | | | | | | | | | | + | A |---->| B |---->| C | | A |--+ | B |--+->| C | + | | | | | | | | | | | | + +-----+ +-----+ +-----+ +-----+ +-----+ +-----+ + ^ ^ ^ ^ ^ ^ + | | | | | | + + + + + + + + (3) Quiescent cycle over (4) Reclamation + +-----------+ + +-----+ | +-----+ | +-----+ +-----+ +-----+ + | | | | | | | | | | | | + | A |--+ | B | +->| C | | A |---------------->| C | + | | | | | | | | | | + +-----+ +-----+ +-----+ +-----+ +-----+ + ^ ^ ^ ^ + | | | | + + +In the first step it can be seen that while readers traverse the list +all elements are referenced. In step two a writer removes +element B. Reclamation is postponed since there are still readers that +hold references to it. In step three a quiescent cycle just expired +and it can be noticed that there are no more references to +element B. Other elements still have references from readers that +started the list traversal after the element was removed. In step 4 we +finally perform reclamation (free the element). + + +Now that we covered how RCU functions at the high level, lets looks at +the APIs for traversing the list as well as adding and removing an +element to the list: + + +.. slide:: RCU list APIs cheat sheet + :inline-contents: True + :level: 2 + + .. code-block:: c + + /* list traversal */ + rcu_read_lock(); + list_for_each_entry_rcu(i, head) { + /* no sleeping, blocking calls or context switch allowed */ + } + rcu_read_unlock(); + + + /* list element delete */ + spin_lock(&lock); + list_del_rcu(&node->list); + spin_unlock(&lock); + synchronize_rcu(); + kfree(node); + + /* list element add */ + spin_lock(&lock); + list_add_rcu(head, &node->list); + spin_unlock(&lock); + diff --git a/Documentation/teaching/lectures/so2.cs.pub.ro.rst b/Documentation/teaching/lectures/so2.cs.pub.ro.rst new file mode 100644 index 00000000000000..fb514430ad347d --- /dev/null +++ b/Documentation/teaching/lectures/so2.cs.pub.ro.rst @@ -0,0 +1,189 @@ +==================== +Sisteme de operare 2 +==================== + +`View slides `_ + +.. slideconf:: + :autoslides: True + :theme: single-level + + +The team +======== + +* Daniel Băluță (Daniel), Răzvan Deaconescu (Răzvan, RD), Claudiu + Ghioc (Claudiu), Valentin Ghiță (Vali), Sergiu Weisz (Sergiu), + Octavian Purdilă (Tavi) +* Iulian Mateșică (Iulian), Alexandra Militaru (Alex), Teodora + Șerbănescu (Teo), Ștefan Teodorescu (Ștefan, Fane), Mihai Popescu + (Mihai, Mișu) +* Mult succes în noul semestru! + +Where do we stand? +================== + +.. ditaa:: + + +---------------------------------------------------------+ + | application programming (EGC, SPG, PP, SPRC, IOC, etc.) | + +---------------------------------------------------------+ + + +----------------------------------+ + | system programming (PC, SO, CPL) | + +----------------------------------+ + user space + ----------------------------------------------------------=- + kernel space + +--------------------------+ + | kernel programming (SO2) | + +--------------------------+ + + ----------------------------------------------------------=- + + +----------------------------------+ + | hardware (PM, CN1, CN2, PL ) | + +----------------------------------+ + +Resources +========= + +* wiki: http://ocw.cs.pub.ro/courses/so2 +* NeedToKnow: http://ocw.cs.pub.ro/courses/so2/need-to-know +* Linux Kernel Labs: https://linux-kernel-labs.github.io/ +* mailing list: so2@cursuri.cs.pub.ro +* Facebook +* vmchecker +* catalog Google, calendar Google +* LXR +* cs.curs.pub.ro - rol de portal +* karma awards + +Community +========= + +* contribuții via https://github.com/linux-kernel-labs/linux (PR sau + issues) +* corecții, ajustări, precizări, informații utile +* listă de discuții +* răspundeți la întrebările colegilor voștri +* propuneți subiecte de discuție care au legătură cu disciplina +* Facebook +* sugestii, propuneri, feedback +* Primiți puncte de karma + +Grading +======= + +* 2 puncte activitate la laborator +* 3 puncte „examen”, notare pe parcurs +* 10 puncte teme de casă +* Punctajul > 5 puncte e corelat direct proportional cu nota de la examen (la fel ca la SO) +* Tema 0 - 0,5 puncte +* Temele 1, 2, 3 - câte 1,5 puncte fiecare +* Activități “extra” +* Ixia challenge - 2 puncte +* Kernel (filesystem) hackaton - 2 puncte +* SO2 transport protocol - 1 punct +* Condiţii de promovare: nota finală 4.5, nota minimă examen 3 + +Obiectivele cursului +==================== + +* Prezentarea structurii interne a unui sistem de operare +* Target: sisteme de operare de uz general +* Structura și componentele unui kernel monolitic +* Procese, FS, Networking +* Memory management +* Exemplificare pe Linux + +Obiectivele laboratorului/temelor +================================= +* Însușirea cunoștințelor necesare implementării de device drivere +* Înțelegerea în profunzime a cunoștințelor prin rezolvarea de exerciții + +Cursuri necesare +================ + +* Programare: C +* SD: tabele de dispersie, arbori echilibrați +* IOCLA: lucrul cu registre și instrucțiuni de bază (adunări, comparaţii, salturi) +* CN: TLB/CAM, memorie, procesor, I/O +* PC, RL: ethernet, IP, sockeți +* SO: procese, fișiere, thread-uri, memorie virtuală + +Despre curs +=========== + +* 12 cursuri +* interactiv +* participaţi la discuţii +* întrebaţi atunci când nu aţi înţeles +* destul de “dens”, se recomandă călduros parcurgerea suportului bibliografic înainte şi după curs +* 1h:30 prezentare + 30min test si discutii pe marginea testului + +Despre curs (2) +=============== + +.. hlist:: + :columns: 2 + + * Introducere + * Procese + * Scheduling + * Apeluri de sistem + * Traps + * Spaţiul de adresă + * Memorie virtuală + * Memorie fizică + * Kernel debugging + * Block I/O + * Sisteme de fişiere + * SMP + * Networking + * Virtualizare + + +Despre laborator +================ + +* Kernel Modules and Device Drivers +* 15 min prezentare / 80 de minute lucru +* se punctează activitatea +* learn by doing + +Despre teme +=========== + +* Tema 0 +* Kprobe based tracer +* Driver pentru portul serial +* Software RAID +* Teme “extra” +* Filesystem driver - hackaton +* E100 driver - Ixia challenge +* Network transport protocol + + +Despre teme (2) +=============== + +* necesare: aprofundare API (laborator) și concepte (curs) +* teste publice +* suport de testare (vmchecker) +* relativ puţin cod de scris dar relativ dificile +* dificultatea constă în acomodarea cu noul mediu + +Bibliografie curs +================= + +* Linux Kernel Development, 3rd edition, Robert Love, Addison Wesley, 2010 +* Understanding the Linux Kernel, 3rd edition, Daniel P. Bovet & Marco Cesati, O'Reilly 2005 +* Linux Networking Architecture, Klaus Wehrle, Frank Pahlke, Hartmut Ritter, Daniel Muller, Marc Bechler, Prentice Hall 2004 +* Understanding Linux Network Internals, Christian Benvenuti, O'Reilly 2005 + +Bibliografie laborator +====================== + +* Linux Device Drivers, 3nd edition, Alessandro Rubini & Jonathan Corbet, O'Reilly 2006 +* Linux Kernel in a Nutshell, Greg Kroah-Hartman, O'Reilly 2005 diff --git a/Documentation/teaching/lectures/syscalls-inspection.cast b/Documentation/teaching/lectures/syscalls-inspection.cast new file mode 100644 index 00000000000000..ca749a423021e3 --- /dev/null +++ b/Documentation/teaching/lectures/syscalls-inspection.cast @@ -0,0 +1,1389 @@ +{"title": "System Call Inspection", "height": 24, "idle_time_limit": 1.0, "version": 2, "env": {"SHELL": "/bin/bash", "TERM": "xterm-256color"}, "width": 80, "timestamp": 1519682642} +[0.02593, "o", "\u001b]0;tavi@lktp: ~/src/linux/tools/labs\u0007\u001b[01;32mtavi@lktp\u001b[00m:\u001b[01;34m~/src/linux/tools/labs\u001b[00m$ "] +[2.585046, "o", "#"] +[2.812131, "o", " "] +[2.94729, "o", "a"] +[3.187178, "o", "t"] +[3.308689, "o", "t"] +[3.380836, "o", "a"] +[3.587609, "o", "c"] +[3.660319, "o", "h"] +[3.74021, "o", " "] +[3.935004, "o", "g"] +[4.157892, "o", "d"] +[4.34303, "o", "b"] +[4.527084, "o", " "] +[4.711204, "o", "t"] +[4.768411, "o", "o"] +[4.85479, "o", " "] +[5.081524, "o", "V"] +[5.193867, "o", "M"] +[5.366551, "o", "\r\n"] +[5.367316, "o", "\u001b]0;tavi@lktp: ~/src/linux/tools/labs\u0007\u001b[01;32mtavi@lktp\u001b[00m:\u001b[01;34m~/src/linux/tools/labs\u001b[00m$ "] +[6.562559, "o", "m"] +[6.655394, "o", "a"] +[6.702303, "o", "k"] +[6.826912, "o", "e"] +[6.898232, "o", " "] +[7.081019, "o", "g"] +[7.184305, "o", "d"] +[7.250501, "o", "b"] +[7.464891, "o", "\r\n"] +[7.487695, "o", "gdb -ex \"target remote localhost:1234\" /home/tavi/src/linux/vmlinux\r\n"] +[7.552276, "o", "GNU gdb (Ubuntu 7.11.1-0ubuntu1~16.5) 7.11.1\r\nCopyright (C) 2016 Free Software Foundation, Inc.\r\nLicense GPLv3+: GNU GPL version 3 or later \r\nThis is free software: you are free to change and redistribute it.\r\nThere is NO WARRANTY, to the extent permitted by law. Type \"show copying\"\r\nand \"show warranty\" for details.\r\nThis GDB was configured as \"x86_64-linux-gnu\".\r\nType \"show configuration\" for configuration details.\r\nFor bug reporting instructions, please see:\r\n.\r\nFind the GDB manual and other documentation resources online at:\r\n.\r\nFor help, type \"help\".\r\nType \"apropos word\" to search for commands related to \"word\"...\r\n"] +[7.552711, "o", "Reading symbols from /home/tavi/src/linux/vmlinux..."] +[8.0237, "o", "done.\r\n"] +[8.040804, "o", "Remote debugging using localhost:1234\r\n"] +[8.049686, "o", "default_idle () at arch/x86/kernel/process.c:357\r\n"] +[8.049841, "o", "357\t}\r\n"] +[8.049944, "o", "(gdb) "] +[8.617598, "o", "b"] +[8.712276, "o", "t"] +[8.906112, "o", "\r\n"] +[8.907612, "o", "#0 default_idle () at arch/x86/kernel/process.c:357\r\n#1 0xc101fcfd in arch_cpu_idle () at arch/x86/kernel/process.c:346\r\n"] +[8.916461, "o", "#2 0xc14639f9 in default_idle_call () at kernel/sched/idle.c:98\r\n"] +[8.916818, "o", "#3 0xc107b2a5 in cpuidle_idle_call () at kernel/sched/idle.c:156\r\n#4 do_idle () at kernel/sched/idle.c:246\r\n"] +[8.923466, "o", "#5 0xc107b5b5 in cpu_startup_entry (state=)\r\n at kernel/sched/idle.c:351\r\n#6 0xc145d643 in rest_init () at init/main.c:436\r\n"] +[8.923757, "o", "#7 0xc1614acb in start_kernel () at init/main.c:716\r\n"] +[8.926458, "o", "#8 0xc161424a in i386_start_kernel () at arch/x86/kernel/head32.c:56\r\n"] +[8.926871, "o", "#9 0xc10001d3 in startup_32_smp () at arch/x86/kernel/head_32.S:363\r\n"] +[8.927813, "o", "#10 0x00000000 in ?? ()\r\n"] +[8.928515, "o", "(gdb) "] +[9.441926, "o", "#"] +[9.610515, "o", " "] +[10.164044, "o", "V"] +[10.268664, "o", "M"] +[10.453102, "o", " "] +[10.592012, "o", "i"] +[10.687218, "o", "s"] +[10.769038, "o", " "] +[10.88914, "o", "i"] +[11.031982, "o", "d"] +[11.171022, "o", "l"] +[11.339681, "o", "e"] +[11.541285, "o", "\r\n"] +[11.541408, "o", "(gdb) "] +[13.459643, "o", "#"] +[13.604705, "o", " "] +[13.787454, "o", "l"] +[13.867483, "o", "e"] +[14.028803, "o", "t"] +[14.196721, "o", "s"] +[14.299574, "o", " "] +[14.400587, "o", "a"] +[14.678709, "o", "t"] +[15.297907, "o", "\b\u001b[K"] +[15.474776, "o", "\b\u001b[K"] +[17.314512, "o", "a"] +[17.493688, "o", "d"] +[17.640039, "o", "d"] +[17.734576, "o", " "] +[17.836868, "o", "a"] +[17.94166, "o", " "] +[18.172955, "o", "b"] +[18.231003, "o", "r"] +[18.338597, "o", "e"] +[18.389443, "o", "a"] +[18.493356, "o", "k"] +[18.697004, "o", "p"] +[18.791969, "o", "o"] +[19.002137, "o", "i"] +[19.076693, "o", "n"] +[19.247079, "o", "t"] +[19.682963, "o", " "] +[19.920143, "o", "t"] +[20.019301, "o", "o"] +[20.13914, "o", " "] +[20.298365, "o", "a"] +[20.399756, "o", " "] +[20.543224, "o", "s"] +[20.654014, "o", "y"] +[20.722395, "o", "s"] +[20.875473, "o", "t"] +[20.949965, "o", "e"] +[21.03673, "o", "m"] +[21.105939, "o", " "] +[21.199805, "o", "c"] +[21.248673, "o", "a"] +[21.33164, "o", "l"] +[21.457598, "o", "l"] +[21.662139, "o", "\r\n"] +[21.662437, "o", "(gdb) "] +[23.558939, "o", "b"] +[23.610705, "o", "r"] +[23.671647, "o", "e"] +[23.830052, "o", "\u0007ak"] +[24.613391, "o", " "] +[25.766837, "o", "s"] +[25.878063, "o", "y"] +[25.959348, "o", "s"] +[26.26539, "o", "_"] +[26.884977, "o", "d"] +[26.936127, "o", "u"] +[27.021843, "o", "p"] +[27.318277, "o", "2"] +[27.598228, "o", "\r\n"] +[27.640182, "o", "Breakpoint 1 at 0xc1139210: file fs/file.c, line 912.\r\n"] +[27.64023, "o", "(gdb) "] +[28.770631, "o", "c"] +[29.000408, "o", "\r\nContinuing.\r\n"] +[29.585196, "o", "^Z"] +[29.585536, "o", "\r\n[1]+ Stopped make gdb\r\n"] +[29.586221, "o", "\u001b]0;tavi@lktp: ~/src/linux/tools/labs\u0007\u001b[01;32mtavi@lktp\u001b[00m:\u001b[01;34m~/src/linux/tools/labs\u001b[00m$ "] +[30.715625, "o", "#"] +[30.828185, "o", " "] +[30.978622, "o", "c"] +[31.038514, "o", "o"] +[31.18384, "o", "n"] +[31.28793, "o", "n"] +[31.34898, "o", "e"] +[31.392867, "o", "c"] +[31.690793, "o", " "] +[32.237691, "o", "\b\u001b[K"] +[32.355048, "o", "t"] +[32.442939, "o", " "] +[32.588287, "o", "t"] +[32.918417, "o", "o"] +[33.039158, "o", " "] +[33.167914, "o", "t"] +[33.2546, "o", "h"] +[33.340674, "o", "e"] +[33.395216, "o", " "] +[33.6407, "o", "V"] +[33.72697, "o", "M"] +[33.917668, "o", "\r\n"] +[33.918502, "o", "\u001b]0;tavi@lktp: ~/src/linux/tools/labs\u0007\u001b[01;32mtavi@lktp\u001b[00m:\u001b[01;34m~/src/linux/tools/labs\u001b[00m$ "] +[37.546829, "o", "m"] +[37.637743, "o", "i"] +[37.761726, "o", "n"] +[37.837263, "o", "i"] +[37.938906, "o", "c"] +[38.022622, "o", "o"] +[38.113482, "o", "m"] +[38.172694, "o", " "] +[38.308186, "o", "-"] +[38.54722, "o", "D"] +[38.6566, "o", " "] +[39.13904, "o", "s"] +[39.277557, "o", "e"] +[39.337429, "o", "r"] +[39.459585, "o", "ial.pts "] +[39.776685, "o", "\r\n"] +[39.780118, "o", "\u001b[!p\u001b[?3;4l\u001b[4l\u001b>\u001b[0m\u001b(B"] +[39.780371, "o", "\u001b[?1h\u001b=\u001b[H\u001b[2J"] +[39.781975, "o", "\u001b[?12l\u001b[?25h"] +[39.782204, "o", "\nWelcome to minicom 2.7\r\n\nOPTIONS: I18n \r\n"] +[39.782381, "o", "Compiled on Feb 7 2016, 13:37:27.\r\nPort serial.pts, 23:03:56\r\n\nPress CTRL-A Z for help on special keys\r\n\n"] +[40.619769, "o", "\n"] +[40.622796, "o", "root@qemux86:~# "] +[41.981219, "o", "#"] +[42.161306, "o", " "] +[42.413334, "o", "t"] +[42.58837, "o", "r"] +[42.674525, "o", "i"] +[43.334665, "o", "g"] +[43.464242, "o", "g"] +[43.537786, "o", "e"] +[43.650717, "o", "r"] +[43.838322, "o", " "] +[44.718033, "o", "d"] +[44.842141, "o", "u"] +[44.914998, "o", "p"] +[45.233999, "o", "2"] +[45.931475, "o", " "] +[46.078743, "o", "s"] +[46.175471, "o", "y"] +[46.248864, "o", "s"] +[46.892927, "o", "t"] +[46.987556, "o", "e"] +[47.185408, "o", "m"] +[47.28593, "o", " "] +[47.444084, "o", "c"] +[47.49419, "o", "a"] +[47.548442, "o", "l"] +[47.661648, "o", "l"] +[47.793085, "o", "\r\n"] +[47.79407, "o", "root@qemux86:~# "] +[48.389908, "o", "e"] +[48.469687, "o", "c"] +[48.566341, "o", "h"] +[48.637507, "o", "o"] +[48.761749, "o", " "] +[49.620761, "o", "a"] +[49.796805, "o", " "] +[50.159016, "o", ">"] +[50.287746, "o", " "] +[50.407382, "o", "/"] +[50.591362, "o", "t"] +[50.702265, "o", "m"] +[50.775619, "o", "p"] +[51.05656, "o", "/"] +[51.245617, "o", "x"] +[51.460523, "o", "\r\n"] +[52.650063, "o", "\u001b[0m\u001b(B\u001b[7m\u001b[24;1H\u001b[K\u001b[?12l\u001b[?25h"] +[52.650349, "o", "\u001b[?25lCTRL-A Z for help | 115200 8N1 | NOR | Minicom 2.7 | VT102 | Offline | rial.pts\u001b[?12l\u001b[?25h\u001b[13;1H"] +[52.886767, "o", "\u001b[24;1H\u001b[0m\u001b(B\u001b[?12l\u001b[?25h\u001b[H\u001b[2J\u001b[?1l\u001b>"] +[52.88713, "o", "Suspended. Type \"fg\" to resume.\r\n\r\n[2]+ Stopped minicom -D serial.pts\r\n"] +[52.887817, "o", "\u001b]0;tavi@lktp: ~/src/linux/tools/labs\u0007\u001b[01;32mtavi@lktp\u001b[00m:\u001b[01;34m~/src/linux/tools/labs\u001b[00m$ "] +[53.689978, "o", "f"] +[53.888551, "o", "g"] +[53.990684, "o", " "] +[54.462791, "o", "1"] +[54.54925, "o", "\r\n"] +[54.549574, "o", "make gdb\r\n"] +[54.550655, "o", "\r\n"] +[54.558163, "o", "Breakpoint 1, SyS_dup2 (oldfd=3, newfd=1) at fs/file.c:912\r\n"] +[54.558198, "o", "912\tSYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)\r\n"] +[54.558292, "o", "(gdb) "] +[68.972448, "o", "#"] +[69.215133, "o", " "] +[70.145897, "o", "l"] +[70.226727, "o", "e"] +[70.397696, "o", "t"] +[70.589752, "o", "s"] +[70.722316, "o", " "] +[71.095534, "o", "e"] +[71.198966, "o", "x"] +[71.299745, "o", "a"] +[71.960882, "o", "m"] +[72.431415, "o", "\b\u001b[K"] +[72.541534, "o", "\b\u001b[K"] +[72.657555, "o", "\b\u001b[K"] +[72.763229, "o", "\b\u001b[K"] +[72.955358, "o", "b"] +[72.995208, "o", "a"] +[73.09843, "o", "c"] +[73.230889, "o", "k"] +[73.48259, "o", "t"] +[73.625035, "o", "r"] +[73.716815, "o", "a"] +[73.931987, "o", "c"] +[74.00581, "o", "e"] +[74.151293, "o", " "] +[74.244418, "o", "t"] +[74.36868, "o", "h"] +[74.435091, "o", "e"] +[74.852494, "o", " "] +[74.998958, "o", "s"] +[75.146618, "o", "y"] +[75.18531, "o", "s"] +[75.40006, "o", "t"] +[75.507571, "o", "e"] +[75.878345, "o", "m"] +[76.021645, "o", " "] +[76.139759, "o", "c"] +[76.199716, "o", "a"] +[76.298113, "o", "l"] +[76.406879, "o", "l"] +[76.499901, "o", " "] +[76.624196, "o", "f"] +[76.714473, "o", "l"] +[76.863719, "o", "o"] +[76.936706, "o", "w"] +[77.285863, "o", "\r\n"] +[77.286167, "o", "(gdb) "] +[77.609834, "o", "b"] +[77.67291, "o", "t"] +[77.908684, "o", "\r\n"] +[77.909971, "o", "#0 SyS_dup2 (oldfd=3, newfd=1) at fs/file.c:912\r\n"] +[77.910076, "o", "#1 0xc1001361 in do_syscall_32_irqs_on (regs=)\r\n at arch/x86/entry/common.c:327\r\n"] +[77.912731, "o", "#2 do_int80_syscall_32 (regs=0xc7235fb4) at arch/x86/entry/common.c:341\r\n#3 0xc14645d3 in entry_INT80_32 () at arch/x86/entry/entry_32.S:544\r\n"] +[77.91529, "o", "#4 0x00000003 in ?? ()\r\n"] +[77.917663, "o", "#5 0x00000003 in ?? ()\r\n"] +[77.92062, "o", "#6 0x0a09e224 in ?? ()\r\n"] +[77.920927, "o", "Backtrace stopped: previous frame inner to this frame (corrupt stack?)\r\n(gdb) "] +[82.733885, "o", "f"] +[82.805597, "o", "r"] +[82.943042, "o", " "] +[83.158856, "o", "1"] +[83.338543, "o", "\r\n"] +[83.338863, "o", "#1 0xc1001361 in do_syscall_32_irqs_on (regs=)\r\n at arch/x86/entry/common.c:327\r\n"] +[83.339111, "o", "327\t\t\tregs->ax = ia32_sys_call_table[nr](\r\n(gdb) "] +[84.302882, "o", "l"] +[84.466961, "o", "i"] +[84.573508, "o", "s"] +[84.805097, "o", "t"] +[86.197885, "o", " "] +[87.456892, "o", "\r\n"] +[87.457044, "o", "322\t\t\t * It's possible that a 32-bit syscall implementation\r\n323\t\t\t * takes a 64-bit parameter but nonetheless assumes that\r\n324\t\t\t * the high bits are zero. Make sure we zero-extend all\r\n325\t\t\t * of the args.\r\n326\t\t\t */\r\n327\t\t\tregs->ax = ia32_sys_call_table[nr]("] +[87.457116, "o", "\r\n328\t\t\t\t(unsigned int)regs->bx, (unsigned int)regs->cx,\r\n329\t\t\t\t(unsigned int)regs->dx, (unsigned int)regs->si,\r\n330\t\t\t\t(unsigned int)regs->di, (unsigned int)regs->bp);\r\n331\t\t}\r\n"] +[87.457622, "o", "(gdb) "] +[90.858059, "o", "#"] +[90.97271, "o", " "] +[91.144448, "o", "t"] +[91.217524, "o", "h"] +[91.269411, "o", "i"] +[91.456193, "o", "s"] +[91.63016, "o", " "] +[91.782525, "o", "l"] +[91.948946, "o", "o"] +[92.056737, "o", "o"] +[92.152544, "o", "k"] +[92.246961, "o", "s"] +[92.353698, "o", " "] +[92.444179, "o", "l"] +[92.631606, "o", "i"] +[92.770763, "o", "k"] +[92.861851, "o", "e"] +[92.969579, "o", " "] +[93.109459, "o", "t"] +[93.210131, "o", "h"] +[93.324771, "o", "e"] +[93.466555, "o", " "] +[93.619111, "o", "s"] +[93.715197, "o", "y"] +[93.795165, "o", "s"] +[93.949435, "o", "t"] +[94.027965, "o", "e"] +[94.184473, "o", " "] +[94.3008, "o", "c"] +[94.342823, "o", "a"] +[94.445488, "o", "l"] +[94.568218, "o", "l"] +[94.656948, "o", " "] +[94.839902, "o", "d"] +[94.957893, "o", "i"] +[95.075175, "o", "s"] +[95.230277, "o", "p"] +[95.339237, "o", "a"] +[95.543096, "o", "t"] +[95.751003, "o", "c"] +[95.854341, "o", "h"] +[95.943425, "o", "e"] +[96.023733, "o", "r"] +[96.172254, "o", "\r\n"] +[96.172374, "o", "(gdb) "] +[101.058686, "o", "#"] +[101.204849, "o", " "] +[101.545024, "o", "n"] +[101.645855, "o", "r"] +[101.798058, "o", " "] +[101.950607, "o", "i"] +[102.056996, "o", "s"] +[102.134187, "o", " "] +[102.342601, "o", "t"] +[102.416898, "o", "h"] +[102.533298, "o", "e"] +[102.593287, "o", " "] +[102.719888, "o", "s"] +[102.900423, "o", "y"] +[102.94586, "o", "s"] +[103.162384, "o", "t"] +[103.241973, "o", "e"] +[103.574398, "o", "m"] +[103.675272, "o", " "] +[103.802496, "o", "c"] +[103.849596, "o", "a"] +[103.955005, "o", "l"] +[104.090651, "o", "l"] +[104.1248, "o", " "] +[104.318767, "o", "n"] +[104.403953, "o", "u"] +[104.607057, "o", "m"] +[104.809716, "o", "b"] +[104.882333, "o", "e"] +[104.945321, "o", "r"] +[105.169779, "o", "\r\n"] +[105.170195, "o", "(gdb) "] +[120.867099, "o", "l"] +[121.049682, "o", "i"] +[121.155454, "o", "s"] +[121.359302, "o", "t"] +[121.479029, "o", " "] +[121.619056, "o", "3"] +[121.715251, "o", "0"] +[121.836342, "o", "0"] +[122.442593, "o", "\r\n"] +[122.447014, "o", "295\t/*\r\n296\t * Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does\r\n297\t * all entry and exit work and returns with IRQs off. This function is\r\n298\t * extremely hot in workloads that use it, and it's usually called from\r\n299\t * do_fast_syscall_32, so forcibly inline it to improve performance.\r\n300\t */\r\n"] +[122.447378, "o", "301\tstatic __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)\r\n302\t{\r\n303\t\tstruct thread_info *ti = current_thread_info();\r\n304\t\tunsigned int nr = (unsigned int)regs->orig_ax;\r\n(gdb) "] +[124.092652, "o", "#"] +[124.554034, "o", " "] +[124.797533, "o", "i"] +[124.905822, "o", "t"] +[125.030417, "o", " "] +[125.160131, "o", "i"] +[125.259278, "o", "s"] +[125.37996, "o", " "] +[125.517546, "o", "p"] +[125.669394, "o", "i"] +[125.760003, "o", "c"] +[125.875741, "o", "k"] +[125.996377, "o", "e"] +[126.076136, "o", "d"] +[126.247928, "o", " "] +[126.466909, "o", "u"] +[126.53385, "o", "p"] +[126.664159, "o", " "] +[127.47466, "o", "f"] +[127.507676, "o", "r"] +[127.594243, "o", "o"] +[127.686369, "o", "m"] +[127.793519, "o", " "] +[127.867138, "o", "t"] +[127.990397, "o", "h"] +[128.119207, "o", " "] +[128.507492, "o", "\b\u001b[K"] +[128.590147, "o", "e"] +[128.692867, "o", " "] +[128.889331, "o", "s"] +[129.214909, "o", "a"] +[129.981084, "o", "m"] +[130.065029, "o", "e"] +[130.186849, "o", " "] +[130.540104, "o", "s"] +[130.732803, "o", "t"] +[130.793955, "o", "r"] +[131.422146, "o", "c"] +[131.696522, "o", "t"] +[131.853467, "o", "u"] +[132.034292, "o", "r"] +[132.08244, "o", "e"] +[132.324049, "o", "\b\u001b[K"] +[132.443772, "o", "\b\u001b[K"] +[132.573348, "o", "\b\u001b[K"] +[132.694863, "o", "\b\u001b[K"] +[132.81051, "o", "\b\u001b[K"] +[132.997241, "o", "u"] +[133.088819, "o", "c"] +[133.337918, "o", "t"] +[133.454541, "o", "u"] +[133.569818, "o", "r"] +[133.609841, "o", "e"] +[133.711729, "o", " "] +[134.021296, "o", "("] +[134.244295, "o", "p"] +[134.364747, "o", "t"] +[134.518463, "o", "_"] +[134.694346, "o", "r"] +[134.756482, "o", "e"] +[135.004436, "o", "g"] +[135.409283, "o", "s"] +[135.67484, "o", ")"] +[136.719996, "o", "\r\n"] +[136.720311, "o", "(gdb) "] +[159.587179, "o", "#"] +[159.767425, "o", " "] +[159.943861, "o", "l"] +[160.036363, "o", "e"] +[160.198006, "o", "t"] +[160.366941, "o", "s"] +[160.481441, "o", " "] +[160.690788, "o", "i"] +[161.019933, "o", "n"] +[161.265835, "o", "p"] +[161.435021, "o", "s"] +[161.821488, "o", "\b\u001b[K"] +[161.951302, "o", "\b\u001b[K"] +[162.226603, "o", "s"] +[162.36373, "o", "p"] +[162.498729, "o", "e"] +[162.574127, "o", "c"] +[162.821589, "o", "t"] +[162.945206, "o", " "] +[163.051945, "o", "t"] +[163.219025, "o", "h"] +[163.276168, "o", "e"] +[163.387915, "o", " "] +[163.856566, "o", "r"] +[163.947525, "o", "e"] +[164.54179, "o", "g"] +[164.735443, "o", "s"] +[164.860377, "o", " "] +[164.983243, "o", "c"] +[165.068139, "o", "o"] +[165.142719, "o", "n"] +[165.325774, "o", "t"] +[165.347382, "o", "e"] +[165.53939, "o", "n"] +[165.632312, "o", "t"] +[165.845555, "o", "s"] +[166.537455, "o", "\r\n"] +[166.537632, "o", "(gdb) "] +[166.819034, "o", "p"] +[166.958081, "o", "r"] +[167.052344, "o", "i"] +[167.133431, "o", "n"] +[167.213977, "o", "t"] +[167.322155, "o", " "] +[167.662143, "o", "*"] +[167.894034, "o", "r"] +[167.957716, "o", "e"] +[168.123891, "o", "g"] +[168.275607, "o", "s"] +[168.368704, "o", "\r\n"] +[168.369137, "o", "value has been optimized out\r\n(gdb) "] +[169.223841, "o", "#"] +[169.68196, "o", " "] +[170.473422, "o", "o"] +[170.562498, "o", "p"] +[170.754795, "o", "t"] +[170.780246, "o", "i"] +[170.985415, "o", "m"] +[171.06, "o", "i"] +[171.118702, "o", "z"] +[171.326175, "o", "e"] +[171.422983, "o", "d"] +[171.566281, "o", " "] +[171.82724, "o", "b"] +[171.902983, "o", "y"] +[172.056031, "o", " "] +[172.291054, "o", "c"] +[172.382563, "o", "i"] +[172.439382, "o", "m"] +[172.571587, "o", "p"] +[172.870082, "o", "\b\u001b[K"] +[172.987523, "o", "\b\u001b[K"] +[173.101322, "o", "\b\u001b[K"] +[173.254872, "o", "o"] +[173.322597, "o", "m"] +[173.44459, "o", "p"] +[173.603123, "o", "i"] +[173.779322, "o", "l"] +[173.905269, "o", "e"] +[174.000955, "o", "r"] +[174.086025, "o", "."] +[174.209714, "o", "."] +[174.349851, "o", "."] +[174.538669, "o", " "] +[179.50067, "o", "g"] +[179.580322, "o", "o"] +[179.763011, "o", " "] +[179.903828, "o", "a"] +[180.013494, "o", " "] +[180.236946, "o", "f"] +[180.467494, "o", "r"] +[180.568763, "o", "a"] +[180.697886, "o", "m"] +[180.82554, "o", "e"] +[180.907141, "o", " "] +[181.169606, "o", "d"] +[181.262241, "o", "e"] +[181.42098, "o", "e"] +[181.616856, "o", "p"] +[181.793458, "o", "e"] +[181.910212, "o", "r"] +[182.544419, "o", "\r\n"] +[182.54482, "o", "(gdb) "] +[183.073511, "o", "f"] +[183.168588, "o", "r"] +[183.669953, "o", " "] +[191.37133, "o", "2"] +[191.540642, "o", "\r\n"] +[191.541499, "o", "#2 do_int80_syscall_32 (regs=0xc7235fb4) at arch/x86/entry/common.c:341\r\n"] +[191.541856, "o", "341\t\tdo_syscall_32_irqs_on(regs);\r\n(gdb) "] +[192.917131, "o", "p"] +[193.062137, "o", "r"] +[193.147118, "o", "i"] +[193.231132, "o", "n"] +[193.304903, "o", "t"] +[193.385087, "o", " "] +[193.915946, "o", "*"] +[194.135248, "o", "r"] +[194.23597, "o", "e"] +[194.394568, "o", "g"] +[194.566658, "o", "s"] +[194.653019, "o", "\r\n"] +[194.653763, "o", "$1 = {bx = 3, cx = 1, "] +[194.654076, "o", "dx = 3, si = 168423920, di = 168419876, bp = 168419336, \r\n ax = 4294967258, ds = 123, __dsh = 0, es = 123, __esh = 0, fs = 0, \r\n __fsh = 0, "] +[194.654371, "o", "gs = 0, __gsh = 0, orig_ax = 63, ip = 1150252833, cs = 115, \r\n __csh = 0, flags = 514, "] +[194.65463, "o", "sp = 3218117628, ss = 123, __ssh = 0}\r\n(gdb) "] +[198.397833, "o", "#"] +[198.589958, "o", " "] +[199.416287, "o", "\b\u001b[K"] +[199.54844, "o", "\b\u001b[K"] +[199.789439, "o", "p"] +[199.910455, "o", "r"] +[200.013253, "o", "i"] +[200.081967, "o", "n"] +[200.172051, "o", "t"] +[200.23888, "o", " "] +[200.344988, "o", "r"] +[200.443063, "o", "e"] +[200.636653, "o", "g"] +[200.903926, "o", "s"] +[201.297424, "o", "\b\u001b[K"] +[201.555797, "o", "s"] +[201.70675, "o", "\r\n"] +[201.718736, "o", "$2 = (struct pt_regs *) 0xc7235fb4\r\n(gdb) "] +[202.762192, "o", "#"] +[202.96906, "o", " "] +[203.200172, "o", "t"] +[203.335914, "o", "h"] +[203.397287, "o", "i"] +[203.509368, "o", "s"] +[203.663251, "o", " "] +[204.186121, "o", "i"] +[204.277979, "o", "s"] +[204.37577, "o", " "] +[204.575364, "o", "a"] +[204.681527, "o", " "] +[205.432843, "o", "s"] +[205.577435, "o", "a"] +[205.84148, "o", "v"] +[205.901931, "o", "e"] +[206.142056, "o", " "] +[206.48041, "o", "\b\u001b[K"] +[206.576791, "o", "d"] +[206.650202, "o", " "] +[206.846523, "o", "o"] +[207.147206, "o", " "] +[207.497795, "o", "\b\u001b[K"] +[207.682557, "o", "n"] +[207.773982, "o", " "] +[207.873402, "o", "s"] +[208.081623, "o", "t"] +[208.158662, "o", "a"] +[208.370015, "o", "c"] +[208.494404, "o", "k"] +[210.422713, "o", " "] +[212.637105, "o", "s"] +[212.787177, "o", "t"] +[212.851629, "o", "r"] +[212.997138, "o", "u"] +[213.10568, "o", "c"] +[213.316927, "o", "t"] +[213.4249, "o", "u"] +[213.518001, "o", "r"] +[213.568409, "o", "e"] +[213.68215, "o", " "] +[214.147557, "o", "w"] +[214.236414, "o", "h"] +[214.292857, "o", "i"] +[214.384715, "o", "c"] +[214.448337, "o", "h"] +[214.52596, "o", " "] +[214.62955, "o", "s"] +[214.810601, "o", "t"] +[214.922436, "o", "o"] +[215.0038, "o", "r"] +[215.075964, "o", "e"] +[215.264322, "o", "s"] +[215.370649, "o", " "] +[215.800702, "o", "u"] +[215.868643, "o", "s"] +[215.977376, "o", "e"] +[216.044684, "o", "r"] +[216.169163, "o", "s"] +[216.284503, "o", "p"] +[216.34314, "o", "a"] +[216.450213, "o", "c"] +[216.550799, "o", "e"] +[216.722203, "o", " "] +[219.131447, "o", "r"] +[219.195644, "o", "e"] +[219.377234, "o", "g"] +[219.486582, "o", "i"] +[219.520092, "o", "s"] +[219.685879, "o", "t"] +[219.754728, "o", "e"] +[219.874287, "o", "r"] +[220.02845, "o", "s"] +[221.002656, "o", " "] +[221.144104, "o", "v"] +[221.201385, "o", "a"] +[221.300797, "o", "l"] +[221.517941, "o", "u \r"] +[221.655037, "o", "e"] +[221.819353, "o", "s"] +[222.291958, "o", "\r\n"] +[222.292213, "o", "(gdb) "] +[228.409682, "o", "i"] +[228.551388, "o", "n"] +[229.037434, "o", "f"] +[229.139332, "o", "o"] +[229.400686, "o", " "] +[230.375475, "o", "r"] +[230.455225, "o", "e"] +[230.615513, "o", "\u0007"] +[231.367524, "o", "g"] +[231.608532, "o", "s"] +[231.985991, "o", "\b\u001b[K"] +[232.17368, "o", "i"] +[232.244238, "o", "s"] +[232.453164, "o", "t"] +[232.55645, "o", "e"] +[232.688874, "o", "r"] +[232.95895, "o", " "] +[233.83154, "o", "e"] +[233.978035, "o", "s"] +[234.047244, "o", "p"] +[234.168937, "o", "\r\n"] +[234.169348, "o", "esp 0xc7235f8c"] +[234.169649, "o", "\t0xc7235f8c\r\n"] +[234.16976, "o", "(gdb) "] +[250.775201, "o", "#"] +[251.106842, "o", " "] +[251.346477, "o", "h"] +[251.418165, "o", "o"] +[251.501715, "o", "w"] +[251.664792, "o", " "] +[252.067964, "o", "d"] +[252.208144, "o", "i"] +[252.87959, "o", "d"] +[253.06756, "o", " "] +[253.393168, "o", "t"] +[253.533434, "o", "h"] +[253.601184, "o", "o"] +[253.76566, "o", "s"] +[253.959997, "o", "e"] +[254.157286, "o", " "] +[254.898526, "o", "u"] +[254.999925, "o", "s"] +[255.096884, "o", "e"] +[255.171001, "o", "r"] +[255.315978, "o", "s"] +[255.418223, "o", "p"] +[255.516375, "o", "a"] +[255.651774, "o", "c"] +[255.762584, "o", "e"] +[255.92294, "o", " "] +[256.104238, "o", "r"] +[256.165681, "o", "e"] +[256.623148, "o", "g"] +[256.761757, "o", "i"] +[256.823147, "o", "s"] +[257.031872, "o", "t"] +[257.103524, "o", "e"] +[257.561344, "o", "r"] +[258.279258, "o", " "] +[258.447006, "o", "v"] +[258.502906, "o", "a"] +[258.608216, "o", "l"] +[258.786735, "o", "u"] +[258.86697, "o", "e"] +[258.972944, "o", "s"] +[259.099576, "o", " "] +[259.319621, "o", "g"] +[259.43755, "o", "o"] +[259.566479, "o", "t"] +[259.670838, "o", " "] +[259.795132, "o", "s"] +[259.867977, "o", "a"] +[260.067483, "o", "v"] +[260.107313, "o", "e"] +[260.330451, "o", "d"] +[260.681612, "o", " "] +[262.446506, "o", "o"] +[262.637035, "o", "n"] +[262.71973, "o", " "] +[262.816535, "o", "s"] +[262.986307, "o", "t"] +[263.060635, "o", "a"] +[263.228155, "o", "c"] +[263.277514, "o", "k"] +[263.566323, "o", "?"] +[263.893074, "o", "\r\n"] +[263.893184, "o", "(gdb) "] +[264.675148, "o", "#"] +[265.174988, "o", " "] +[265.776336, "o", "g"] +[265.893763, "o", "o"] +[266.031976, "o", " "] +[266.147399, "o", "a"] +[266.278558, "o", " "] +[266.768307, "o", "f"] +[266.949187, "o", "r"] +[267.030231, "o", "a"] +[267.15945, "o", "m"] +[267.376536, "o", " "] +[267.847105, "o", "\b\u001b[K"] +[267.91339, "o", "e"] +[268.015642, "o", " "] +[268.317662, "o", "d"] +[268.428077, "o", "e"] +[268.589581, "o", "e"] +[268.699983, "o", "p"] +[268.795125, "o", "e"] +[268.886986, "o", "r"] +[268.983051, "o", "."] +[269.133065, "o", "."] +[269.275845, "o", "."] +[269.70994, "o", "\r\n"] +[269.71023, "o", "(gdb) "] +[269.989964, "o", "f"] +[270.057684, "o", "r"] +[270.124032, "o", "e"] +[270.338379, "o", " "] +[272.954739, "o", "3"] +[273.342331, "o", "\r\n"] +[273.342463, "o", "Undefined command: \"fre\". Try \"help\".\r\n(gdb) "] +[274.537938, "o", "f"] +[274.591956, "o", "r"] +[274.794531, "o", " "] +[275.013426, "o", "3"] +[275.213151, "o", "\r\n"] +[275.213508, "o", "#3 0xc14645d3 in entry_INT80_32 () at arch/x86/entry/entry_32.S:544\r\n"] +[275.213876, "o", "544\t\tcall\tdo_int80_syscall_32\r\n(gdb) "] +[280.62674, "o", "l"] +[280.794815, "o", "i"] +[280.926434, "o", "s"] +[281.156915, "o", "t"] +[281.246634, "o", " "] +[281.604719, "o", "5"] +[281.88764, "o", "3"] +[281.986378, "o", "2"] +[282.314826, "o", "\r\n"] +[282.319103, "o", "527\t * edx arg3\r\n528\t * esi arg4\r\n529\t * edi arg5\r\n530\t * ebp arg6\r\n531\t */\r\n532\tENTRY(entry_INT80_32)\r\n533\t\tASM_CLAC\r\n534\t\tpushl\t%eax\t\t\t/* pt_regs->orig_ax */\r\n535\t\tSAVE_ALL pt_regs_ax=$-ENOSYS\t/* save rest */\r\n536\t\r\n"] +[282.319396, "o", "(gdb) "] +[287.032723, "o", "$"] +[287.628779, "o", "\b\u001b[K"] +[287.896648, "o", "#"] +[288.026461, "o", " "] +[288.213827, "o", "l"] +[288.286875, "o", "e"] +[288.488417, "o", "t"] +[288.672395, "o", "s"] +[289.171884, "o", " "] +[289.288632, "o", "s"] +[289.468608, "o", "e"] +[289.602164, "o", "e"] +[289.762666, "o", " "] +[289.952819, "o", "w"] +[290.078185, "o", "h"] +[290.119588, "o", "a"] +[290.337222, "o", "t"] +[290.411464, "o", " "] +[291.504878, "o", "S"] +[291.584952, "o", "A"] +[291.754893, "o", "V"] +[291.802071, "o", "E"] +[291.96154, "o", "_"] +[292.14258, "o", "A"] +[292.21097, "o", "L"] +[292.344666, "o", "L"] +[292.56581, "o", " "] +[292.756512, "o", "d"] +[292.852271, "o", "o"] +[292.933196, "o", "e"] +[293.021413, "o", "s"] +[294.080447, "o", "\r\n"] +[294.080887, "o", "(gdb) "] +[295.025805, "o", "d"] +[295.136476, "o", "i"] +[295.271217, "o", "s"] +[295.37067, "o", "a"] +[295.527332, "o", "s"] +[295.898488, "o", "s"] +[296.085096, "o", "emble "] +[297.455021, "o", "\r\n"] +[297.455375, "o", "Dump of assembler code for function entry_INT80_32:\r\n 0xc14645a4 <+0>:\tlea 0x0(%esi),%esi\r\n 0xc14645a7 <+3>:\tpush %eax\r\n 0xc14645a8 <+4>:\tcld \r\n 0xc14645a9 <+5>:\tpush $0x0\r\n 0xc14645ab <+7>:\tpush %fs\r\n 0xc14645ad <+9>:\tpush %es\r\n 0xc14645ae <+10>:\tpush %ds\r\n 0xc14645af <+11>:\tpush $0xffffffda\r\n 0xc14645b1 <+13>:\tpush %ebp\r\n"] +[297.455523, "o", " 0xc14645b2 <+14>:\tpush %edi\r\n 0xc14645b3 <+15>:\tpush %esi\r\n 0xc14645b4 <+16>:\tpush %edx\r\n 0xc14645b5 <+17>:\tpush %ecx\r\n 0xc14645b6 <+18>:\tpush %ebx\r\n"] +[297.455959, "o", " 0xc14645b7 <+19>:\tmov $0x7b,%edx\r\n 0xc14645bc <+24>:\tmov %edx,%ds\r\n 0xc14645be <+26>:\tmov %edx,%es\r\n"] +[297.456273, "o", " 0xc14645c0 <+28>:\tmov $0xd8,%edx\r\n 0xc14645c5 <+33>:\tmov %edx,%fs\r\n"] +[297.456519, "o", " 0xc14645c7 <+35>:\tcall 0xc1000ed3 \r\n 0xc14645cc <+40>:\tmov %esp,%eax\r\n 0xc14645ce <+42>:\tcall 0xc1001300 \r\n"] +[297.456787, "o", "---Type to continue, or q to quit---"] +[300.037654, "o", "q"] +[300.474906, "o", "\r\nQuit\r\n"] +[300.475028, "o", "(gdb) "] +[301.222036, "o", "#"] +[301.344949, "o", " "] +[301.53983, "o", "a"] +[301.639062, "o", "s"] +[301.804905, "o", " "] +[301.932574, "o", "e"] +[302.019082, "o", "x"] +[302.205597, "o", "p"] +[302.282764, "o", "e"] +[302.353223, "o", "c"] +[302.577703, "o", "t"] +[302.659955, "o", "e"] +[302.829647, "o", "d"] +[302.916048, "o", ","] +[302.988029, "o", " "] +[303.195687, "o", "i"] +[303.309352, "o", "t"] +[303.425016, "o", " "] +[304.041744, "o", "p"] +[304.269832, "o", "u"] +[304.428141, "o", "s"] +[304.641784, "o", "h"] +[304.756462, "o", "e"] +[304.840521, "o", "s"] +[305.137798, "o", " "] +[306.996152, "o", "r"] +[307.053783, "o", "e"] +[307.293212, "o", "s"] +[307.493748, "o", "i"] +[308.526057, "o", "\b\u001b[K"] +[308.651061, "o", "\b\u001b[K"] +[308.813776, "o", "\b\u001b[K"] +[309.52065, "o", "u"] +[309.734069, "o", "\b\u001b[K"] +[309.866326, "o", "\b\u001b[K"] +[310.014799, "o", "u"] +[310.293887, "o", "e"] +[310.409052, "o", "r"] +[310.764658, "o", "\b\u001b[K"] +[310.889594, "o", "\b\u001b[K"] +[311.047137, "o", "s"] +[311.173052, "o", "e"] +[311.228372, "o", "r"] +[311.711761, "o", "s"] +[311.809911, "o", "p"] +[311.873173, "o", "a"] +[311.988731, "o", "c"] +[312.077428, "o", "e"] +[312.161123, "o", " "] +[312.281191, "o", "r"] +[312.349935, "o", "e"] +[312.547146, "o", "g"] +[312.667304, "o", "s"] +[312.936388, "o", " "] +[313.086964, "o", "t"] +[313.154591, "o", "o"] +[313.232086, "o", " "] +[314.351931, "o", "s"] +[314.549992, "o", "t"] +[314.60986, "o", "a"] +[314.792887, "o", "c"] +[314.864733, "o", "k"] +[314.991827, "o", "\r\n"] +[314.992256, "o", "(gdb) "] +[327.831401, "o", "#"] +[328.116647, "o", " "] +[328.304643, "o", "l"] +[328.403419, "o", "e"] +[328.643693, "o", "t"] +[328.881425, "o", "s"] +[329.012271, "o", " "] +[329.350209, "o", "o"] +[329.675607, "o", "\b\u001b[K"] +[329.746522, "o", "g"] +[329.894035, "o", "o"] +[329.960528, "o", " "] +[330.265508, "o", "d"] +[330.454674, "o", "e"] +[330.61252, "o", "e"] +[330.767734, "o", "p"] +[331.017861, "o", "e"] +[331.105343, "o", "r"] +[331.193089, "o", ","] +[331.291487, "o", " "] +[331.421255, "o", "t"] +[331.499907, "o", "o"] +[331.589935, "o", " "] +[331.800311, "o", "u"] +[332.05827, "o", "s"] +[332.210451, "o", "e"] +[332.270701, "o", "r"] +[332.420355, "o", "s"] +[332.533886, "o", "p"] +[332.598023, "o", "a"] +[332.679367, "o", "c"] +[332.768566, "o", "e"] +[332.92981, "o", "\r\n"] +[332.929919, "o", "(gdb) "] +[333.474291, "o", "f"] +[333.512483, "o", "r"] +[333.670863, "o", " "] +[334.214986, "o", "2"] +[334.409422, "o", "\r\n"] +[334.410165, "o", "#2 do_int80_syscall_32 (regs=0xc7235fb4) at arch/x86/entry/common.c:341\r\n"] +[334.410451, "o", "341\t\tdo_syscall_32_irqs_on(regs);\r\n(gdb) "] +[334.795034, "o", "p"] +[334.935707, "o", "r"] +[335.041764, "o", "i"] +[335.125845, "o", "n"] +[335.256727, "o", "t"] +[335.436358, "o", " "] +[336.398283, "o", "*"] +[336.683485, "o", "r"] +[336.751145, "o", "e"] +[336.914056, "o", "g"] +[337.129438, "o", "s"] +[337.326378, "o", "\r\n"] +[337.327037, "o", "$3 = {bx = 3, cx = 1, dx = 3, "] +[337.327271, "o", "si = 168423920, di = 168419876, bp = 168419336, \r\n ax = 4294967258, ds = 123, __dsh = 0, es = 123, __esh = 0, fs = 0, \r\n"] +[337.327484, "o", " __fsh = 0, gs = 0, __gsh = 0, orig_ax = 63, ip = 1150252833, cs = 115, \r\n __csh = 0, flags = 514, sp = 3218117628, "] +[337.327641, "o", "ss = 123, __ssh = 0}\r\n(gdb) "] +[338.677993, "o", "#"] +[338.93667, "o", " "] +[339.41632, "o", "t"] +[339.584092, "o", "h"] +[340.119744, "o", "e"] +[340.257132, "o", " "] +[340.430186, "o", "p"] +[340.531965, "o", "t"] +[340.720155, "o", "_"] +[340.823874, "o", "r"] +[340.878238, "o", "e"] +[341.092385, "o", "g"] +[341.261419, "o", "s"] +[341.410487, "o", " "] +[341.687741, "o", "s"] +[341.898318, "o", "t"] +[341.974712, "o", "r"] +[342.103508, "o", "u"] +[342.220027, "o", "c"] +[342.426521, "o", "t"] +[342.533294, "o", "u"] +[342.643383, "o", "r"] +[342.706116, "o", "e"] +[342.969115, "o", " "] +[343.424403, "o", "s"] +[343.831852, "o", "a"] +[344.073427, "o", "v"] +[344.124191, "o", "e"] +[344.307738, "o", "s"] +[344.41903, "o", " "] +[344.562734, "o", "t"] +[344.695776, "o", "h"] +[344.763893, "o", "e"] +[344.926178, "o", " "] +[345.788445, "o", "E"] +[346.04075, "o", "S"] +[346.167767, "o", "P"] +[346.367919, "o", " "] +[346.480742, "o", "a"] +[346.612524, "o", "n"] +[346.708959, "o", "d"] +[346.772221, "o", " "] +[347.04116, "o", "E"] +[347.27069, "o", "I"] +[347.41795, "o", "P"] +[347.620023, "o", " "] +[348.267947, "o", "v"] +[348.310577, "o", "a"] +[348.389535, "o", "l"] +[348.583187, "o", "u"] +[348.626281, "o", "e"] +[348.75411, "o", "s"] +[349.152128, "o", " "] +[350.062966, "o", "\b\b\b\b\b\b\b"] +[350.505726, "o", "\u001b[1@r"] +[350.581222, "o", "\u001b[1@e"] +[350.733489, "o", "\u001b[1@g"] +[350.860972, "o", "\u001b[1@s"] +[351.063679, "o", "\u001b[1@ "] +[351.403168, "o", "\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[351.723396, "o", "a"] +[351.796061, "o", "s"] +[351.924873, "o", " "] +[352.127877, "o", "w"] +[352.258955, "o", "e"] +[352.359039, "o", "l"] +[352.480885, "o", "l"] +[352.581941, "o", "\r\n"] +[352.582063, "o", "(gdb) "] +[354.291356, "o", "p"] +[354.434906, "o", "r"] +[354.548164, "o", "i"] +[354.623451, "o", "n"] +[354.677168, "o", "t"] +[354.835361, "o", " "] +[355.104827, "o", "/"] +[355.285227, "o", "x"] +[355.434379, "o", " "] +[355.814288, "o", "r"] +[355.884763, "o", "e"] +[356.044765, "o", "g"] +[356.169825, "o", "-"] +[356.229667, "o", "s"] +[356.715904, "o", ">"] +[356.92396, "o", "\b\u001b[K"] +[357.049325, "o", "\b\u001b[K"] +[357.20555, "o", "\b\u001b[K"] +[357.296469, "o", "s"] +[357.676925, "o", ">"] +[358.100747, "o", "i"] +[358.154056, "o", "p"] +[358.530121, "o", "\b\u001b[K"] +[358.650376, "o", "\b\u001b[K"] +[358.771888, "o", "\b\u001b[K"] +[358.830286, "o", "-"] +[359.096595, "o", ">"] +[359.399708, "o", "i"] +[359.471959, "o", "p"] +[359.66492, "o", "\r\n"] +[359.69065, "o", "$4 = 0x448f7721\r\n(gdb) "] +[362.750139, "o", "d"] +[362.873006, "o", "i"] +[363.70087, "o", "s"] +[364.005826, "o", "a"] +[364.154305, "o", "s"] +[364.534403, "o", "s"] +[364.638318, "o", "emble "] +[367.274254, "o", "0x"] +[367.274386, "o", "448f"] +[367.274769, "o", "7721"] +[368.070465, "o", "-"] +[368.275859, "o", "0"] +[368.34472, "o", "x"] +[368.717017, "o", "1"] +[368.721481, "o", "2"] +[368.831229, "o", ","] +[369.57988, "o", "+"] +[369.896806, "o", "0"] +[370.018088, "o", "x"] +[370.253572, "o", "1"] +[370.325077, "o", "2"] +[370.729392, "o", "\r\n"] +[370.729561, "o", "Dump of assembler code from 0x448f770f to 0x448f7721:\r\n"] +[370.730714, "o", " 0x448f770f:\tnop\r\n"] +[370.73105, "o", " 0x448f7710:\tmov %ebx,%edx\r\n"] +[370.731361, "o", " 0x448f7712:\tmov 0x8(%esp),%ecx\r\n"] +[370.731696, "o", " 0x448f7716:\tmov 0x4(%esp),%ebx\r\n"] +[370.732008, "o", " 0x448f771a:\tmov $0x3f,%eax\r\n"] +[370.732313, "o", " 0x448f771f:\tint $0x80\r\nEnd of assembler dump.\r\n"] +[370.732521, "o", "(gdb) "] +[374.541556, "o", "#"] +[374.783001, "o", " "] +[374.898373, "o", "t"] +[375.026925, "o", "h"] +[375.073567, "o", "i"] +[375.175876, "o", "s"] +[375.257213, "o", " "] +[375.437225, "o", "l"] +[375.612851, "o", "o"] +[375.726918, "o", "o"] +[375.811791, "o", "k"] +[375.914079, "o", "s"] +[375.991534, "o", " "] +[376.124911, "o", "l"] +[376.227571, "o", "i"] +[376.402924, "o", "k"] +[376.568604, "o", "e"] +[376.709076, "o", " "] +[376.892681, "o", "t"] +[377.0435, "o", "h"] +[377.153381, "o", "e"] +[377.255434, "o", " "] +[379.598554, "o", "d"] +[379.742728, "o", "u"] +[379.818192, "o", "p"] +[379.946914, "o", "2"] +[380.101209, "o", " "] +[381.094712, "o", "i"] +[381.254821, "o", "m"] +[381.448647, "o", "p"] +[381.516377, "o", "l"] +[381.632838, "o", "e"] +[381.733497, "o", "m"] +[381.821502, "o", "e"] +[381.939177, "o", "n"] +[382.061297, "o", "t"] +[382.126975, "o", "a"] +[382.263958, "o", "t"] +[382.355856, "o", "i"] +[382.39873, "o", "o"] +[382.625141, "o", "n"] +[382.785792, "o", " "] +[382.924201, "o", "i"] +[383.007739, "o", "n"] +[383.112653, "o", " "] +[383.344371, "o", "g"] +[383.528085, "o", "l"] +[383.818807, "o", "\b\u001b[K"] +[383.940934, "o", "\b\u001b[K"] +[384.549965, "o", "l"] +[384.699794, "o", "i"] +[384.893748, "o", "b"] +[384.958022, "o", "c"] +[385.106737, "o", "\r\n"] +[385.106862, "o", "(gdb) "] +[390.638525, "o", "#"] +[390.745393, "o", " "] +[390.899618, "o", "l"] +[390.932654, "o", "e"] +[391.101789, "o", "t"] +[391.253114, "o", "s"] +[391.385379, "o", " "] +[391.507941, "o", "c"] +[391.609023, "o", "h"] +[391.674385, "o", "e"] +[391.751088, "o", "c"] +[391.811, "o", "k"] +[391.917497, "o", " "] +[392.061073, "o", "t"] +[392.14249, "o", "h"] +[392.235203, "o", "e"] +[392.357752, "o", " "] +[392.997036, "o", "s"] +[393.16912, "o", "t"] +[393.264152, "o", "a"] +[393.476405, "o", "c"] +[393.623273, "o", "k"] +[393.735766, "o", " "] +[393.886348, "o", "v"] +[393.956001, "o", "a"] +[394.095124, "o", "l"] +[394.297637, "o", "u"] +[394.373537, "o", "e"] +[394.864079, "o", "s"] +[396.020466, "o", " "] +[396.544499, "o", "a"] +[397.19223, "o", "\b\u001b[K"] +[397.516516, "o", "("] +[397.757745, "o", "d"] +[397.840435, "o", "a"] +[398.021342, "o", "r"] +[398.083794, "o", "a"] +[398.395255, "o", "\b\u001b[K"] +[398.527905, "o", "\b\u001b[K"] +[398.655121, "o", "\b\u001b[K"] +[398.791444, "o", "\b\u001b[K"] +[400.259196, "o", "\b\u001b[K"] +[400.381224, "o", "\b\u001b[K"] +[400.705872, "o", "\r\n"] +[400.705994, "o", "(gdb) "] +[405.090136, "o", "p"] +[405.250017, "o", "r"] +[405.339117, "o", "i"] +[405.408649, "o", "n"] +[405.48463, "o", "t"] +[405.598488, "o", " "] +[406.982799, "o", "/"] +[407.074314, "o", "x"] +[407.190834, "o", " "] +[407.406215, "o", "r"] +[407.477829, "o", "e"] +[407.621752, "o", "g"] +[407.761064, "o", "s"] +[407.848706, "o", "-"] +[408.12336, "o", ">"] +[409.034397, "o", "s"] +[409.101141, "o", "p"] +[409.237828, "o", "\r\n"] +[409.250562, "o", "$5 = 0xbfd093fc\r\n(gdb) "] +[410.588712, "o", "x"] +[410.779534, "o", " "] +[410.929273, "o", "/"] +[411.084678, "o", "x"] +[411.282106, "o", " "] +[412.846629, "o", "0xb"] +[412.84701, "o", "fd093fc"] +[413.495368, "o", "\r\n"] +[413.495801, "o", "0xbfd093fc:\t0x08068b46\r\n"] +[413.495927, "o", "(gdb) "] +[413.970708, "o", "\r\n"] +[413.971563, "o", "0xbfd09400:\t0x00000003\r\n"] +[413.971895, "o", "(gdb) "] +[414.429768, "o", "\r\n"] +[414.430693, "o", "0xbfd09404:\t0x00000001\r\n"] +[414.430996, "o", "(gdb) "] +[416.367425, "o", "#"] +[416.600327, "o", " "] +[417.126384, "o", "f"] +[417.206421, "o", "i"] +[417.315777, "o", "r"] +[417.716293, "o", "s"] +[418.11331, "o", "t"] +[418.641803, "o", " "] +[419.136181, "o", "s"] +[419.31382, "o", "e"] +[419.466617, "o", "e"] +[420.57146, "o", "m"] +[421.230979, "o", "\b\u001b[K"] +[421.365032, "o", "\b\u001b[K"] +[421.493488, "o", "\b\u001b[K"] +[421.621835, "o", "\b\u001b[K"] +[421.857413, "o", "i"] +[421.945647, "o", "s"] +[422.057603, "o", " "] +[422.199844, "o", "t"] +[422.257072, "o", "h"] +[422.36931, "o", "e"] +[422.432441, "o", " "] +[423.25473, "o", "r"] +[423.329204, "o", "e"] +[423.486095, "o", "t"] +[423.556503, "o", "u"] +[423.658879, "o", "r"] +[423.740909, "o", "n"] +[423.811235, "o", " "] +[423.941111, "o", "a"] +[424.030245, "o", "d"] +[424.169883, "o", "d"] +[424.362463, "o", "r"] +[424.436531, "o", "e"] +[424.621395, "o", "s"] +[424.763605, "o", "s"] +[425.22778, "o", "\r\n"] +[425.228074, "o", "(gdb) "] +[426.180565, "o", "#"] +[426.329978, "o", " "] +[426.674846, "o", "s"] +[427.644279, "o", "e"] +[427.832117, "o", "c"] +[427.945399, "o", "o"] +[428.050368, "o", "n"] +[428.1215, "o", "d"] +[428.252667, "o", " "] +[428.383049, "o", "a"] +[428.500148, "o", "n"] +[428.596744, "o", "d"] +[428.698814, "o", " "] +[428.945383, "o", "r"] +[429.106316, "o", "i"] +[429.645192, "o", "\b\u001b[K"] +[429.767912, "o", "\b\u001b[K"] +[429.845124, "o", "t"] +[430.003863, "o", "h"] +[430.107219, "o", "i"] +[430.255689, "o", "r"] +[430.487167, "o", "d"] +[430.667477, "o", " "] +[431.006023, "o", "a"] +[431.211805, "o", "r"] +[431.292319, "o", "e"] +[431.437362, "o", " "] +[431.56393, "o", "t"] +[431.683208, "o", "h"] +[431.728348, "o", "e"] +[431.865432, "o", " "] +[432.888452, "o", "p"] +[432.977097, "o", "a"] +[433.175822, "o", "r"] +[433.258553, "o", "a"] +[433.531527, "o", "m"] +[434.072679, "o", "e"] +[434.235626, "o", "t"] +[434.331498, "o", "e"] +[434.456163, "o", "r"] +[434.694781, "o", "s"] +[434.879649, "o", " "] +[435.461291, "o", "("] +[435.855804, "o", "f"] +[435.932997, "o", "d"] +[436.744873, "o", "s"] +[436.988719, "o", " "] +[438.082246, "o", "3"] +[438.216148, "o", " "] +[438.350139, "o", "a"] +[438.461137, "o", "n"] +[438.546648, "o", "d"] +[438.645921, "o", " "] +[438.899168, "o", "1"] +[439.219735, "o", ")"] +[439.481293, "o", "\r\n"] +[439.481613, "o", "(gdb) "] +[450.102183, "o", "quit\r\n"] +[450.102822, "o", "A debugging session is active.\r\n\r\n\tInferior 1 [Remote target] will be detached.\r\n\r\nQuit anyway? (y or n) "] +[451.119252, "o", "y"] +[451.379225, "o", "\r\nDetaching from program: /home/tavi/src/linux/vmlinux, Remote target\r\n"] +[451.379742, "o", "Ending remote debugging.\r\n"] +[451.390975, "o", "\u001b]0;tavi@lktp: ~/src/linux/tools/labs\u0007\u001b[01;32mtavi@lktp\u001b[00m:\u001b[01;34m~/src/linux/tools/labs\u001b[00m$ "] +[451.925851, "o", "f"] +[452.122079, "o", "g"] +[452.20423, "o", "\r\n"] +[452.2046, "o", "minicom -D serial.pts\r\n"] +[452.204924, "o", "\u001b[!p\u001b[?3;4l\u001b[4l\u001b>\u001b[?1h\u001b=\u001b[1;1H\u001b[?12l\u001b[?25h\u001b[0m\u001b(B \u001b[2;1HWelcome to minicom 2.7 \u001b[3;1H \u001b[4;1HOPTIONS: I18n \u001b[5;1HCompiled on Feb 7 2016, 13:37:27. \u001b[6;1HPort serial.pts, 23:03:56 \u001b[7;1H \u001b[8;1HPress CTRL-A Z for help on special keys \u001b[9;1H \u001b[10;1H \u001b[11;1Hroot@qemux86:~# # trigger dup2 system call \u001b[12;1Hroot@qemux86:~# echo a > /"] +[452.205008, "o", "tmp/x \u001b[13;1H \u001b[14;1H \u001b[15;1H \u001b[16;1H \u001b[17;1H \u001b[18;1H \u001b[19;1H \u001b[20;1H \u001b[21;1H \u001b[22;1H \u001b[23;1H \u001b[24;1H\u001b[0m\u001b("] +[452.205404, "o", "B\u001b[7mCTRL-A Z for help | 115200 8N1 | NOR | Minicom 2.7 | VT102 | Offline | rial.pts\u001b[13;1H\u001b[?12l\u001b[?25h\u001b[24;1H\u001b[0m\u001b(B \u001b[13;1Hroot@qemux86:~# "] +[453.066691, "o", "\u001b[0m\u001b(B\u001b[7m\u001b[24;1H\u001b[K\u001b[?12l\u001b[?25h\u001b[?25lCTRL-A Z for help | 115200 8N1 | NOR | Minicom 2.7 | VT102 | Offline | rial.pts\u001b[?12l\u001b[?25h\u001b[13;17H"] +[453.274675, "o", "\u001b[8;30H\u001b[?25l\u001b[0m\u001b(B+----------------------+\u001b[9;30H| Leave Minicom? |\u001b[10;30H| No |\u001b[11;30H+----------------------+\u001b[10;51H\u001b[?25l\u001b[10;33H\u001b[0m\u001b(B\u001b[7m Yes "] +[453.425761, "o", "\u001b[?12l\u001b[?25h\u001b[8;1H\u001b[0m\u001b(BPress CTRL-A Z for help on special keys \u001b[9;1H \u001b[10;1H \u001b[11;1Hroot@qemux86:~# # trigger dup2 system call \u001b[13;17H\u001b[0m\u001b(B\u001b[7m\u001b[?12l\u001b[?25h"] +[453.426136, "o", "\u001b[?12l\u001b[?25h\u001b[0m\u001b(B\u001b[H\u001b[2J\u001b[?12l\u001b[?25h\u001b[?1l\u001b>\u001b[!p\u001b[?3;4l\u001b[4l\u001b>"] +[453.42742, "o", "\u001b]0;tavi@lktp: ~/src/linux/tools/labs\u0007\u001b[01;32mtavi@lktp\u001b[00m:\u001b[01;34m~/src/linux/tools/labs\u001b[00m$ "] +[453.929041, "o", "#"] +[454.48124, "o", " "] +[454.772523, "o", "t"] +[454.877335, "o", "h"] +[454.967191, "o", "e"] +[455.064771, "o", " "] +[455.221252, "o", "e"] +[455.36275, "o", "n"] +[455.48508, "o", "d"] +[456.337409, "o", "\r\n"] +[456.3382, "o", "\u001b]0;tavi@lktp: ~/src/linux/tools/labs\u0007\u001b[01;32mtavi@lktp\u001b[00m:\u001b[01;34m~/src/linux/tools/labs\u001b[00m$ "] +[457.092602, "o", "exit\r\n"] diff --git a/Documentation/teaching/lectures/syscalls-vdso.cast b/Documentation/teaching/lectures/syscalls-vdso.cast new file mode 100644 index 00000000000000..08d11b8fa39bd8 --- /dev/null +++ b/Documentation/teaching/lectures/syscalls-vdso.cast @@ -0,0 +1,299 @@ +{"title": "VDSO", "width": 80, "height": 24, "env": {"TERM": "xterm-256color", "SHELL": "/bin/bash"}, "timestamp": 1519704037, "version": 2, "idle_time_limit": 1.0} +[0.025954, "o", "\u001b]0;tavi@lktp: ~/src/linux/tools/labs\u0007\u001b[01;32mtavi@lktp\u001b[00m:\u001b[01;34m~/src/linux/tools/labs\u001b[00m$ "] +[0.778357, "o", "\r\u001b[12P(reverse-i-search)`':\u001b[C"] +[1.32861, "o", "\b\b\b\u001b[23@m': minicom -D serial.pts\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"] +[1.448449, "o", "\b\b\b\b\b\b\b\b\b\u001b[1@i\u001b[C\u001b[C\u001b[C"] +[1.765225, "o", "\r\u001b]0;tavi@lktp: ~/src/linux/tools/labs\u0007\u001b[01;32mtavi@lktp\u001b[00m:\u001b[01;34m~/src/linux/tools/labs\u001b[00m$ minicom -D serial.pts \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r\n"] +[1.769204, "o", "\u001b[!p\u001b[?3;4l\u001b[4l\u001b>\u001b[0m\u001b(B"] +[1.769319, "o", "\u001b[?1h\u001b=\u001b[H\u001b[2J"] +[1.770766, "o", "\u001b[?12l\u001b[?25h"] +[1.770874, "o", "\nWelcome to minicom 2.7\r\n\nOPTIONS: I18n \r\nCompiled on Feb 7 2016, 13:37:27.\r\nPort serial.pts, 05:00:24\r\n\nPress CTRL-A Z for help on special keys\r\n\n"] +[2.416093, "o", "\n"] +[2.418403, "o", "root@qemux86:~# "] +[3.828624, "o", "\r(reverse-i-search)`': "] +[4.158346, "o", "\b\b\bc': cat /proc/$$/maps | grep vdso\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"] +[4.248967, "o", "\b\b\b\b\b\b\b\b\b\b\b ': cat /proc/$$/maps | grep vdso \u001b[11;21Ha': "] +[4.827489, "o", "\r\u001b[P\u001b[P\u001b[P\u001b[P\u001b[P\u001b[P\u001b[P\u001b[Proot@qemux86:~# "] +[4.82862, "o", "\r\n"] +[4.879844, "o", "b7fe1000-b7fe2000 r-xp 00000000 00:00 0 [vdso]"] +[4.880004, "o", "\r\n"] +[4.881875, "o", "root@qemux86:~# "] +[6.142231, "o", "d"] +[6.284442, "o", "d"] +[6.458279, "o", " "] +[6.621989, "o", "i"] +[6.739944, "o", "f"] +[6.869664, "o", " "] +[7.493448, "o", "\b \b"] +[7.874738, "o", "="] +[8.588815, "o", "/"] +[8.637697, "o", "d"] +[8.738769, "o", "e"] +[8.938278, "o", "v"] +[8.984739, "o", "/"] +[9.500486, "o", "$"] +[9.669718, "o", "$"] +[9.933013, "o", "/"] +[10.500669, "o", "m"] +[10.616163, "o", "e"] +[10.693377, "o", "m"] +[10.978277, "o", " "] +[11.216909, "o", "o"] +[11.376459, "o", "f"] +[11.551348, "o", "="] +[12.560851, "o", "v"] +[12.764026, "o", "d"] +[12.85403, "o", "."] +[13.112242, "o", "s"] +[13.185422, "o", "o"] +[13.657544, "o", " "] +[15.232911, "o", "s"] +[15.352051, "o", "k"] +[15.564274, "o", "i"] +[15.669058, "o", "p"] +[16.316486, "o", "="] +[17.003593, "o", "$"] +[18.445474, "o", "("] +[18.601053, "o", "("] +[19.54437, "o", "0"] +[19.660685, "o", "x"] +[20.138912, "o", "b"] +[20.616486, "o", "f"] +[21.48522, "o", "\b \b"] +[22.515307, "o", "7"] +[23.508009, "o", "f"] +[23.790619, "o", "e"] +[24.400251, "o", "1"] +[25.248207, "o", ")"] +[25.375563, "o", ")"] +[26.327645, "o", " "] +[26.726994, "o", "c"] +[26.847455, "o", "o"] +[27.02857, "o", "u"] +[27.276381, "o", "n"] +[27.383646, "o", "t"] +[27.625756, "o", "="] +[27.920608, "o", "1"] +[28.521296, "o", " "] +[29.112339, "o", "b"] +[29.222891, "o", "s"] +[29.743661, "o", "="] +[30.384493, "o", "4"] +[30.485127, "o", "0"] +[30.695845, "o", "9"] +[31.075003, "o", "6"] +[32.188246, "o", "\r\n"] +[32.199355, "o", "dd: "] +[32.19954, "o", "failed to open '/dev/885/mem'"] +[32.199693, "o", ": No such file or directory"] +[32.199842, "o", "\r\n"] +[32.201711, "o", "root@qemux86:~# "] +[33.598384, "o", "dd if=/dev/$$/mem of=vd.so skip=$((0xb7fe1)) count=1 bs=4096"] +[34.061345, "o", "\b"] +[34.562175, "o", "\b"] +[34.593086, "o", "\b"] +[34.624228, "o", "\b"] +[34.655779, "o", "\b"] +[34.686158, "o", "\b"] +[34.718106, "o", "\b"] +[34.747207, "o", "\b"] +[34.777381, "o", "\b"] +[34.808345, "o", "\b"] +[34.838882, "o", "\b"] +[34.87023, "o", "\b"] +[34.901235, "o", "\b"] +[34.931911, "o", "\b"] +[34.962706, "o", "\b"] +[34.994145, "o", "\b"] +[35.025857, "o", "\b"] +[35.055689, "o", "\b"] +[35.086722, "o", "\b"] +[35.117042, "o", "\b"] +[35.148358, "o", "\b"] +[35.179121, "o", "\b"] +[35.210605, "o", "\b"] +[35.240516, "o", "\b"] +[35.271948, "o", "\b"] +[35.30293, "o", "\b"] +[35.334901, "o", "\b"] +[35.364321, "o", "\b"] +[35.396241, "o", "\b"] +[35.426037, "o", "\b"] +[35.456994, "o", "\b"] +[35.488153, "o", "\b"] +[35.519142, "o", "\b"] +[35.550182, "o", "\b"] +[35.581047, "o", "\b"] +[35.611116, "o", "\b"] +[35.641763, "o", "\b"] +[35.672908, "o", "\b"] +[35.703498, "o", "\b"] +[35.734998, "o", "\b"] +[35.766411, "o", "\b"] +[35.796549, "o", "\b"] +[35.82753, "o", "\b"] +[36.601742, "o", "\b"] +[37.09839, "o", "\b"] +[37.128155, "o", "\b"] +[37.163111, "o", "\b"] +[37.192346, "o", "\b"] +[37.223703, "o", "\b"] +[37.253359, "o", "\b"] +[37.284626, "o", "\b"] +[37.608855, "o", "v"] +[37.939538, "o", "\b\u001b[P"] +[38.087974, "o", "\b\u001b[P"] +[38.254754, "o", "\b\u001b[P"] +[38.440987, "o", " /$$/mem of=vd.so skip=$((0xb7fe1)) count=1 bs=4096 \u001b[15;24Hp"] +[38.558039, "o", " /$$/mem of=vd.so skip=$((0xb7fe1)) count=1 bs=4096 \u001b[15;25Hr"] +[38.691276, "o", " /$$/mem of=vd.so skip=$((0xb7fe1)) count=1 bs=4096 \u001b[15;26Ho"] +[38.847178, "o", " /$$/mem of=vd.so skip=$((0xb7fe1)) count=1 bs=4096 \u001b[15;27Hc"] +[39.508168, "o", "\r\n"] +[39.523093, "o", "dd: "] +[39.52328, "o", "/proc/885/mem: cannot skip to specified offset"] +[39.523317, "o", "\r\n"] +[39.52466, "o", "1+0 records in"] +[39.524744, "o", "\r\n"] +[39.524825, "o", "1+0 records out"] +[39.525076, "o", "\r\n"] +[39.525484, "o", "4096 bytes (4.1 kB, 4.0 KiB) copied, 0.00261601 s, 1.6 MB/s"] +[39.525575, "o", "\r\n"] +[39.527474, "o", "root@qemux86:~# "] +[41.610174, "o", "n"] +[41.732264, "o", "m"] +[41.988107, "o", " "] +[42.33858, "o", "-"] +[42.79556, "o", "D"] +[43.104068, "o", " "] +[43.291478, "o", "v"] +[43.55666, "o", "d"] +[43.843116, "o", "."] +[44.185221, "o", "s"] +[44.542652, "o", "o"] +[45.448387, "o", "\r\n"] +[45.491518, "o", "00000000 A LINUX_2.5\r\n"] +[45.49171, "o", "00000000 A LINUX_2.6"] +[45.491843, "o", "\r\n"] +[45.491964, "o", "00000b4c T __kernel_rt_sigreturn"] +[45.492082, "o", "\r\n"] +[45.492159, "o", "00000b40 T __kernel_sigreturn"] +[45.492224, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[45.492353, "o", "00000b2c T __kernel_vsyscall"] +[45.492598, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[45.49281, "o", "00000710 T __vdso_clock_gettime"] +[45.492887, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[45.493019, "o", "000009a0 T __vdso_gettimeofday"] +[45.493104, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[45.493252, "o", "00000b00 T __vdso_time"] +[45.493323, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[45.495654, "o", "root@qemux86:~# "] +[47.899977, "o", "o"] +[48.346053, "o", "b"] +[48.401345, "o", "j"] +[48.663622, "o", "d"] +[48.966338, "o", "u"] +[49.273586, "o", "m"] +[49.392373, "o", "p"] +[49.52031, "o", " "] +[49.715854, "o", "-"] +[50.017434, "o", "d"] +[50.916108, "o", "r"] +[51.076094, "o", " "] +[51.470914, "o", ">"] +[51.659245, "o", " "] +[52.330585, "o", "v"] +[52.682659, "o", "d"] +[53.100659, "o", "s"] +[53.21442, "o", "o"] +[53.522123, "o", "."] +[55.809864, "o", "s"] +[57.590678, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[57.633021, "o", "objdump: "] +[57.633213, "o", "'a.out': No such file"] +[57.633325, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[57.635624, "o", "root@qemux86:~# "] +[59.8835, "o", "objdump -dr > vdso.s"] +[60.157193, "o", "\b"] +[60.660172, "o", "\b"] +[60.689762, "o", "\b"] +[60.722033, "o", "\b"] +[60.752467, "o", "\b"] +[60.783825, "o", "\b"] +[60.813211, "o", "\b"] +[60.845191, "o", "\b"] +[60.875427, "o", "\b"] +[60.905546, "o", "\b"] +[61.100984, "o", "r"] +[61.244992, "o", " "] +[61.660944, "o", " > vdso.s \u001b[24;29Hv"] +[61.996232, "o", " > vdso.s \u001b[24;30Hd"] +[62.139048, "o", " > vdso.s \u001b[24;31H."] +[62.350791, "o", " > vdso.s \u001b[24;32Hs"] +[62.450021, "o", " > vdso.s \u001b[24;33Ho"] +[62.598679, "o", " > vdso.s \u001b[24;34H "] +[62.933485, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[62.974853, "o", "root@qemux86:~# "] +[66.634044, "o", "v"] +[66.745211, "o", "i"] +[66.942786, "o", " "] +[67.232609, "o", "v"] +[67.824023, "o", "d"] +[68.377726, "o", "s"] +[68.437403, "o", "o"] +[68.723205, "o", "."] +[69.712646, "o", "s"] +[69.919804, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[69.956533, "o", "\u001b[1;1H\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\u001b[1;1H\u001b[K\nvd.so: file format elf32-i386\u001b[5;1HDisassembly of section .text:\u001b[7;1H000006d0 <__vdso_clock_gettime@@LINUX_2.6-0x40>:\u001b[8;2H6d0: 55 push %ebp\u001b[9;2H6d1: 89 e5 mov %esp,%ebp\u001b[10;2H6"] +[69.956753, "o", "d3: 53 push %ebx\u001b[11;2H6d4: e8 49 04 00 00 call b22 <__vdso_time@@LINUX_2.6+0x22>\u001b[12;2H6d9: 81 c3 6b fc ff ff add $0xfffffc6b,%ebx\u001b[13;2H6df: 0f ae e8 lfence\u001b[14;2H6e2: 0f 31 rdtsc\u001b[15;2H6e4: 89 c1 mov %eax,%ecx\u001b[16;2H6e6: 8b 83 44 cd ff ff mov -0x32bc(%ebx),%eax\u001b[17;2H6ec: 8b 9b 48 cd ff ff mov -0x32b8(%ebx),%ebx\u001b[18;2H6f2: 39 d3 cmp %edx,%ebx\u001b[19;2H6f4: 72 0e jb 704 \u001b[20;2H6f6: 76 08 jbe 700 \u001b[21;2H6f8: 89 da mov %ebx,%edx\u001b[22;2H6fa: 5b pop %ebx\u001b[23;2H6fb: 5d pop %ebp\u001b[1;1H\u001b[24;1H\u001b[K- vdso.s 1/413 0%\u001b[1;1H"] +[70.48049, "o", "\u001b[24;1H\u001b[K/"] +[71.703477, "o", "v"] +[71.956563, "o", "s"] +[72.061476, "o", "y"] +[72.151126, "o", "s"] +[72.345804, "o", "c"] +[72.43538, "o", "a"] +[72.532243, "o", "l"] +[72.655082, "o", "l"] +[72.868018, "o", "\u001b[1;1H\u001b[1;2H82e: 3d ff c9 9a 3b cmp $0x3b9ac9ff,%eax\u001b[2;1H 833: 77 eb ja 820 <__vdso_cloc"] +[72.868164, "o", "k_gettime@@LINUX_2.6+0x11\u001b[2;80H\u001b[3;2H835: 8b 75 f0 mov -0x10(%ebp),%esi\u001b[4;2H838: 01 0f add %ecx,(%edi)\u001b[5;1H 83a: 89 47 04 mov %eax,0x4(%edi)\u001b[6;2H83d: 85 f6 test %esi,%esi"] +[72.868249, "o", "\u001b[7;1H 83f: 0f 85 24 ff ff ff jne 769 <__vdso_clock_gettime@@LINUX_2.6+0x59\u001b[7;80H\u001b[8;2H845: 89 f9 mov "] +[72.86854, "o", " %edi,%ecx\u001b[9;2H847: b8 09 01 00 00 mov $0x109,%eax\u001b[10;2H84c: 89 da mov %ebx,%edx\u001b[11;2H84e: 8b 5d 08 mov 0x8(%ebp),%ebx \u001b[12;2H851: e8 d6 02 00 00 call b2c <__kernel_vsyscall@@LINUX_2.5>\u001b[13;2H856: 89 d3 mov %edx,%ebx\u001b[14;2H858: 83 c4 10 add $0x10,%esp\u001b[15;2H85b: 5b pop %ebx \u001b[16;2H85c: 5e pop %esi "] +[72.868973, "o", "\u001b[17;2H85d: 5f pop %edi \u001b[18;2H85e: 5d pop %ebp \u001b[19;2H85f: c3 ret \u001b[20;2H860: 8b 45 08 mov 0x8(%ebp),%eax \u001b[21;2H863: 85 c0 test %eax,%ea\u001b[22;2H865: 75 de jne 845 <__vdso_clock_gettime@@LINUX_2.6+0x13\u001b[22;80H\u001b[23;2H867: 89 7d 0c mov %edi,0xc(%ebp)\u001b[12;54H\u001b[24;1H\u001b[K- vdso.s 135/413 32%\u001b[12;54H"] +[73.659739, "o", "\u001b[24;1H\u001b[K/"] +[73.751232, "o", "\u001b[12;54H\u001b[1;2Hac2: e9 fe fe ff ff jmp 9c5 <__vdso_gettimeofday@@LINUX_2.6+0x25>\u001b[1;80H\u001b[2;2Hac7: 8b 97 84 cd ff ff mov -0x327c(%edi),%edx \u001b[2;80H\u001b[3;2Hacd: 8b 5d 0c mov 0xc(%ebp),%ebx \u001b[4;2Had0: 89 13 mov %edx,(%ebx\u001b[5;2Had2: 8b 97 88 cd ff ff mov -0x3278(%edi),%edx\u001b[6;2Had8: 89 53 04 "] +[73.751392, "o", " mov %edx,0x4(%ebx)\u001b[7;2Hadb: eb 95 jmp a72 <__vdso_gettimeofday@@LINUX_2.6+0xd2>\u001b[7;80H\u001b[8;2Hadd: b8 4e 00 00 00 mov $0x4e,%eax\u001b[9;2Hae2: 8b 4d 0c mov 0xc(%ebp),%ecx\u001b[10;2Hae5\u001b[11;2Hae7: 89 f3 mov %esi,%ebx \u001b[12;2Hae9: e8 3e 00\u001b[13;2Haee\u001b[14;2Haf0: e9 7d ff ff ff jmp a72 <__vdso_gettimeofday@@LINUX_2.6+0xd2>\u001b[14;80H\u001b[15;2Haf5: 8d 74 26 00 lea 0x0(%esi,%eiz,1),%esi\u001b[16;2Haf9: 8d bc 27 00 00 00 00 lea 0x0(%edi,%eiz,1),%edi\u001b[17;2H \u001b[18;1H00000b00 <__vdso_time@@LINUX_2.6>: \u001b[19;2Hb00: 55 push %ebp\u001b[20;2Hb01: e8 18 00 00 00 call b1e <__vdso_time@@LINUX_2.6+0x1e>\u001b[21;2Hb06: 05 3e f8 ff ff add $0xfffff83e,%eax\u001b[22;2Hb0b: 89 e5 mov %esp,%ebp \u001b[22;80H\u001b[23;2Hb0d: 8b 55 08 mov 0x8(%ebp),%edx\u001b[12;54H\u001b[24;1H\u001b[K- vd"] +[73.751422, "o", "so.s 351/413 84%\u001b[12;54H"] +[74.638384, "o", "\u001b[24;1H\u001b[K/"] +[74.706985, "o", "\u001b[12;54H\u001b[1;2Hb1c: 5d pop %ebp \u001b[1;80H\u001b[2;2Hb1d: c3 ret \u001b[3;2Hb1e: 8b 04 24 mov (%esp),%eax \u001b[4;2Hb21: c3 ret "] +[74.707435, "o", " \u001b[5;2Hb22: 8b 1c 24 mov (%esp),%ebx \u001b[6;2Hb25: c3 ret \u001b[7;2Hb26: 8b 3c 24 mov (%esp),%edi \u001b[7;80H\u001b[8;2Hb29: c3 ret \u001b[9;2Hb2a: 90 nop \u001b[10;2Hb2b: 90 nop \u001b[11;2H \u001b[12;1H00000b2c <__kernel_vsyscall@@LINUX_2.5>: \u001b[13;2Hb2c: 51 push %ecx \u001b[14;2Hb2d: 52 push %edx \u001b[14;80H\u001b[15;2Hb2e: 55 push %ebp \u001b[16;2Hb2f: 89 e5 mov %esp,%ebp \u001b[17;2Hb31: 0f 34 sysenter\u001b[18;1H b33: cd 80 int $0x80"] +[74.707709, "o", "\u001b[19;3H35: 5d pop \u001b[20;3H36: 5a pop %edx \u001b[21;3H37: 59 pop %ecx \u001b[22;3H38: c3 ret \u001b[23;3H39: 90 nop \u001b[12;20H\u001b[24;1H\u001b[K- vdso.s 378/413 91%\u001b[12;20H"] +[75.741371, "o", "\u001b[13;20H\u001b[24;1H\u001b[K- vdso.s 379/413 91%\u001b[13;20H"] +[76.240657, "o", "\u001b[14;20H\u001b[24;1H\u001b[K- vdso.s 380/413 92%\u001b[14;20H"] +[76.271822, "o", "\u001b[15;20H\u001b[24;1H\u001b[K- vdso.s 381/413 92%\u001b[15;20H"] +[76.303272, "o", "\u001b[16;20H\u001b[24;1H\u001b[K- vdso.s 382/413 92%\u001b[16;20H"] +[76.335376, "o", "\u001b[17;20H\u001b[24;1H\u001b[K- vdso.s 383/413 92%\u001b[17;20H"] +[76.365685, "o", "\u001b[18;20H\u001b[24;1H\u001b[K- vdso.s 384/413 92%\u001b[18;20H"] +[76.396024, "o", "\u001b[19;20H\u001b[24;1H\u001b[K- vdso.s 385/413 93%\u001b[19;20H"] +[76.426024, "o", "\u001b[20;20H\u001b[24;1H\u001b[K- vdso.s 386/413 93%\u001b[20;20H"] +[76.633377, "o", "\u001b[21;20H\u001b[24;1H\u001b[K- vdso.s 387/413 93%\u001b[21;20H"] +[76.801144, "o", "\u001b[22;20H\u001b[24;1H\u001b[K- vdso.s 388/413 93%\u001b[22;20H"] +[76.95044, "o", "\u001b[23;20H\u001b[24;1H\u001b[K- vdso.s 389/413 94%\u001b[23;20H"] +[77.101372, "o", "\u001b[1;4Hd: c3 ret \u001b[2;4He: 8b 04 24 mov (%esp),%eax\u001b[3;3H21: c3 ret \u001b[4;4H2: 8b 1c 24 mov (%esp),%ebx\u001b[5;4H5: c3 ret "] +[77.10181, "o", " \u001b[6;4H6: 8b 3c 24 mov (%esp),%edi\u001b[7;4H9: c3 ret \u001b[8;4Ha: 90 nop\u001b[9;4Hb\u001b[10;2H \u001b[11;1H00000b2c <__kernel_vsyscall@@LINUX_2.5>:\u001b[12;1H b2c: 51 push %ecx\u001b[13;4Hd: 52 push %ed\u001b[14;4He: 55 push %ebp\u001b[15;4Hf: 89 e5 mov %esp,%ebp\u001b[16;3H31: 0f 34 sysenter \u001b[17;4H3: cd 80 int $0x80\u001b[18;4H5: 5d pop %ebp \u001b[19;4H6: 5a pop %edx\u001b[20;4H7: 59 pop %ec\u001b[21;4H8: c3 ret \u001b[22;4H9: 90 nop\u001b[23;4Ha\u001b[23;20H\u001b[24;1H\u001b[K- vdso.s 390/413 94%\u001b[23;20H"] +[77.455803, "o", "\u001b[1;4He: 8b 04 24 mov (%esp),%eax\u001b[2;3H21: c3 ret \u001b[3;4H2: 8b 1c 24 mov (%esp),%ebx\u001b[4;4H5: c3 ret \u001b[5;4H6: 8b 3c 24 mov (%esp),%edi\u001b[6;4H9: c3 ret \u001b[7;4Ha: 90 nop\u001b[8;4Hb\u001b[9;2H \u001b[10;1H00000b2c <__kernel_vsyscall@@LINUX_2.5>:\u001b[11;1H b2c: 51 push %ecx\u001b[12;4H"] +[77.455926, "o", "d: 52 push %ed\u001b[13;4He: 55 push %ebp\u001b[14;4Hf: 89 e5 mov %esp,%ebp\u001b[15;3H31: 0f 34 sysenter \u001b[16;4H3: cd 80 int $0x80\u001b[17;4H5: 5d pop %ebp \u001b[18;4H6: 5a pop %edx\u001b[19;4H7: 59 pop %ec\u001b[20;4H8: c3 ret \u001b[21;4H9: 90 nop\u001b[22;4Ha\u001b[23;4Hb\u001b[23;20H\u001b[24;1H\u001b[K- vdso.s 391/413 94%\u001b[23;20H"] +[85.076246, "o", "\u0007\u001b[24;1H\u001b[K- vdso.s 391/413 94%\u001b[23;20H"] +[85.088716, "o", "\u001b[24;1H\u001b[K:"] +[85.733973, "o", "q"] +[86.200246, "o", "!"] +[86.579774, "o", "\u001b[23;20H\u001b[24;1H\u001b[K- vdso.s 391/413 94%\u001b[23;20H"] +[86.580204, "o", "\u001b[24;1H\u001b[K"] +[86.586893, "o", "root@qemux86:~# "] +[87.180212, "o", "\u001b[0m\u001b(B\u001b[7m\r\u001b[K\u001b[?12l\u001b[?25h"] +[87.180527, "o", "\u001b[?25lCTRL-A Z for help | 115200 8N1 | NOR | Minicom 2.7 | VT102 | Offline | rial.pts\u001b[?12l\u001b[?25h\u001b[24;17H"] +[87.375774, "o", "\u001b[8;30H\u001b[?25l\u001b[0m\u001b(B+----------------------+\u001b[9;30H| Leave Minicom? |\u001b[10;30H| No |\u001b[11;30H+----------------------+\u001b[10;51H\u001b[?25l\u001b[10;33H\u001b[0m\u001b(B\u001b[7m Yes "] +[87.691546, "o", "\u001b[?12l\u001b[?25h\u001b[8;1H\u001b[0m\u001b(B b2b: 90 nop \u001b[9;1H \u001b[10;1H00000b2c <__kernel_vsyscall@@LINUX_2.5>: \u001b[11;1H b2c: 51 push %ecx \u001b[24;17H\u001b[0m\u001b(B\u001b[7m"] +[87.691675, "o", "\u001b[?12l\u001b[?25h"] +[87.691981, "o", "\u001b[?12l\u001b[?25h\u001b[0m\u001b(B\u001b[H\u001b[2J\u001b[?12l\u001b[?25h\u001b[?1l\u001b>\u001b[!p\u001b[?3;4l\u001b[4l\u001b>"] +[87.69329, "o", "\u001b]0;tavi@lktp: ~/src/linux/tools/labs\u0007\u001b[01;32mtavi@lktp\u001b[00m:\u001b[01;34m~/src/linux/tools/labs\u001b[00m$ "] +[88.615113, "o", "exit\r\n"] diff --git a/Documentation/teaching/lectures/syscalls.rst b/Documentation/teaching/lectures/syscalls.rst new file mode 100644 index 00000000000000..30859835e0eef6 --- /dev/null +++ b/Documentation/teaching/lectures/syscalls.rst @@ -0,0 +1,609 @@ +============ +System Calls +============ + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +Lecture objectives: +=================== + +.. slide:: System Calls + :inline-contents: True + :level: 2 + + * Linux system calls implementation + + * VDSO and virtual syscalls + + * Accessing user space from system calls + + + +Linux system calls implementation +================================= + +At a high level system calls are "services" offered by the kernel to +user applications and they resemble library APIs in that they are +described as a function call with a name, parameters and return value. + +.. slide:: System Calls as Kernel services + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + +-------------+ +-------------+ + | Application | | Application | + +-------------+ +-------------+ + | | + |read(fd, buff, len) |fork() + | | + v v + +---------------------------------------+ + | Kernel | + +---------------------------------------+ + + +However, on a closer look, we can see that system calls are actually +not function calls, but specific assembly instructions (architecture +and kernel specific) that do the following: + +.. slide:: System Call Setup + :inline-contents: True + :level: 2 + + * setup information to identify the system call and its parameters + + * trigger a kernel mode switch + + * retrieve the result of the system call + +In Linux, system calls are identified by numbers and the parameters +for system calls are machine word sized (32 or 64 bit). There can be a +maximum of 6 system call parameters. Both the system call number and +the parameters are stored in certain registers. + +For example, on 32bit x86 architecture, the system call identifier is +stored in the EAX register, while parameters in registers EBX, ECX, +EDX, ESI, EDI, EBP. + +.. slide:: Linux system call setup + :inline-contents: False + :level: 2 + + * System calls are identified by numbers + + * The parameters for system calls are machine word sized (32 or 64 + bit) and they are limited to a maximum of 6 + + * Uses registers to store them both (e.g. for 32bit x86: EAX for + system call and EBX, ECX, EDX, ESI, EDI, EBP for parameters) + +System libraries (e.g. libc) offers functions that implement the +actual system calls in order to make it easier for applications to use +them. + +When a user to kernel mode transition occurs, the execution flow is +interrupted and it is transfered to a kernel entry point. This is +similar with how interrupts and exception are handled (in fact on some +architectures this transition happens as a result of an exception). + +The system call entry point will save registers (which contains values +from user space, including system call number and system call +parameters) on stack and then it will continue with executing the +system call dispatcher. + +.. note:: During the user - kernel mode transition the stack is also + switched from ther user stack to the kernel stack. This is + explained in more details in the interrupts lecture. + +.. slide:: Example of Linux system call setup and handling + :inline-contents: True + :level: 2 + + .. ditaa:: + + +-------------+ dup2 +-----------------------------+ + | Application |-----+ | libc | + +-------------+ | | | + +---->| C7590 dup2: | + | ... | + | C7592 movl 0x8(%esp),%ecx | + | C7596 movl 0x4(%esp),%ebx | + | C759a movl $0x3f,%eax | + +------------------------------+ C759f int $0x80 | + | | ... +<-----+ + | +-----------------------------+ | + | | + | | + | | + | | + | +------------------------------------------------------------+ | + | | Kernel | | + | | | | + +--->|ENTRY(entry_INT80_32) | | + | ASM_CLAC | | + | pushl %eax # pt_regs->orig_ax | | + | SAVE_ALL pt_regs_ax=$-ENOSYS # save rest | | + | ... | | + | movl %esp, %eax | | + | call do_int80_syscall_32 | | + | .... | | + | RESTORE_REGS 4 # skip orig_eax/error_code | | + | ... | | + | INTERRUPT_RETURN +-+ + +------------------------------------------------------------+ + + +The purpose of the system call dispatcher is to verify the system call +number and run the kernel function associated with the system call. + +.. slide:: Linux System Call Dispatcher + :inline-contents: True + :level: 2 + + .. code-block:: c + + /* Handles int $0x80 */ + __visible void do_int80_syscall_32(struct pt_regs *regs) + { + enter_from_user_mode(); + local_irq_enable(); + do_syscall_32_irqs_on(regs); + } + + /* simplified version of the Linux x86 32bit System Call Dispatcher */ + static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) + { + unsigned int nr = regs->orig_ax; + + if (nr < IA32_NR_syscalls) + regs->ax = ia32_sys_call_table[nr](regs->bx, regs->cx, + regs->dx, regs->si, + regs->di, regs->bp); + syscall_return_slowpath(regs); + } + + + +To demonstrate the system call flow we are going to use the virtual +machine setup, attach gdb to a running kernel, add a breakpoint to the +dup2 system call and inspect the state. + +.. slide:: Inspecting dup2 system call + :inline-contents: True + :level: 2 + + |_| + + .. asciicast:: syscalls-inspection.cast + + +In summary, this is what happens during a system call: + +.. slide:: System Call Flow Summary + :inline-contents: True + :level: 2 + + * The application is setting up the system call number and + parameters and it issues a trap instruction + + * The execution mode switches from user to kernel; the CPU switches + to a kernel stack; the user stack and the return address to user + space is saved on the kernel stack + + * The kernel entry point saves registers on the kernel stack + + * The system call dispatcher identifies the system call function + and runs it + + * The user space registers are restored and execution is switched + back to user (e.g. calling IRET) + + * The user space application resumes + + +System call table +----------------- + +The system call table is what the system call dispatcher uses to map +system call numbers to kernel functions: + +.. slide:: System Call Table + :inline-contents: True + :level: 2 + + .. code-block:: c + + #define __SYSCALL_I386(nr, sym, qual) [nr] = sym, + + const sys_call_ptr_t ia32_sys_call_table[] = { + [0 ... __NR_syscall_compat_max] = &sys_ni_syscall, + #include + }; + + .. code-block:: c + + __SYSCALL_I386(0, sys_restart_syscall, ) + __SYSCALL_I386(1, sys_exit, ) + #ifdef CONFIG_X86_32 + __SYSCALL_I386(2, sys_fork, ) + #else + __SYSCALL_I386(2, sys_fork, ) + #endif + __SYSCALL_I386(3, sys_read, ) + __SYSCALL_I386(4, sys_write, ) + + + +System call parameters handling +------------------------------- + +Handling system call parameters is tricky. Since these values are +setup by user space, the kernel can not assume correctness and must +always verify them throughly. + +Pointers have a few important special cases that must be checked: + +.. slide:: System Calls Pointer Parameters + :inline-contents: True + :level: 2 + + * Never allow pointers to kernel-space + + * Check for invalid pointers + + +Since system calls are executed in kernel mode, they have access to +kernel space and if pointers are not properly checked user +applications might get read or write access to kernel space. + +For example, lets consider the case where such a check is not made for +the read or write system calls. If the user passes a kernel-space +pointer to a write system call then it can get access to kernel data +by later reading the file. If it passes a kernel-space pointer to a +read system call then it can corrupt kernel memory. + + +.. slide:: Pointers to Kernel Space + :level: 2 + + * User access to kernel data if allowed in a write system call + + * User corrupting kernel data if allowed in a read system call + + +Likewise, if a pointer passed by the application is invalid +(e.g. unmapped, read-only for cases where it is used for writing), it +could "crash" the kernel. There two approaches that could be used: + +.. slide:: Invalid pointers handling approaches + :inline-contents: True + :level: 2 + + * Check the pointer against the user address space before using it, + or + + * Avoid checking the pointer and rely on the MMU to detect when the + pointer is invalid and use the page fault handler to determine + that the pointer was invalid + + +Although it sounds tempting, the second approach is not that easy to +implement. The page fault handler uses the fault address (the address +that was accessed), the faulting address (the address of the +instruction that did the access) and information from the user address +space to determine the cause: + +.. slide:: Page fault handling + :inline-contents: True + :level: 2 + + * Copy on write, demand paging, swapping: both the fault and + faulting addresses are in user space; the fault address is + valid (checked against the user address space) + + * Invalid pointer used in system call: the faulting address is + in kernel space; the fault address is in user space and it is + invalid + + * Kernel bug (kernel accesses invalid pointer): same as above + +But in the last two cases we don't have enough information to +determine the cause of the fault. + +In order to solve this issue Linux uses special APIs (e.g +:c:func:`copy_to_user`) to accesses user space that are specially +crafted: + +.. slide:: Marking kernel code that accesses user space + :inline-contents: True + :level: 2 + + * The exact instructions that access user space are recorded in a + table (exception table) + + * When a page fault occurs the faulting address is checked against + this table + + +Although the fault handling case may be more costly overall depending +on the address space vs exception table size, and it is more complex, +it is optimized for the common case and that is why it is preferred +and used in Linux. + + +.. slide:: Cost analysis for pointer checks vs fault handling + :inline-contents: True + :level: 2 + + +------------------+-----------------------+------------------------+ + | Cost | Pointer checks | Fault handling | + +==================+=======================+========================+ + | Valid address | address space search | negligible | + +------------------+-----------------------+------------------------+ + | Invalid address | address space search | exception table search | + +------------------+-----------------------+------------------------+ + + +Virtual Dynamic Shared Object (VDSO) +==================================== + +The VDSO mechanism was born out of the necessity of optimizing the +system call implementation, in a way that does not impact libc with +having to track the CPU capabilities in conjunction with the kernel +version. + +For example: x86 has two ways of issuing system calls: int 0x80 and +sysenter. The later is significantly faster so it should be used when +available. However, it is only available for processors newer than +Pentium II and only for kernel versions greater than 2.6. + +With VDSO the system call interface is decided by the kernel: + +.. slide:: Virtual Dynamic Shared Object (VDSO) + :inline-contents: True + :level: 2 + + * a stream of instructions to issue the system call is generated by + the kernel in a special memory area (formatted as an ELF shared + object) + + * that memory area is mapped towards the end of the user address + space + + * libc searches for VDSO and if present will use it to issue the + system call + + +.. slide:: Inspecting VDSO + :inline-contents: True + :level: 2 + + |_| + + .. asciicast:: syscalls-vdso.cast + + + +An interesting development of the VDSO are the virtual system calls +(vsyscalls) which run directly from user space. These vsyscalls are +also part of VDSO and they are accessing data from the VDSO page that +is either static or modified by the kernel in a separate read-write +map of the VDSO page. Examples of system calls that can be implemented +as vsyscalls are: getpid or gettimeofday. + + +.. slide:: Virtual System Calls (vsyscalls) + :inline-contents: True + :level: 2 + + * "System calls" that run directly from user space, part of the VDSO + + * Static data (e.g. getpid()) + + * Dynamic data update by the kernel a in RW map of the VDSO + (e.g. gettimeofday(), time(), ) + + +Accessing user space from system calls +===================================== + +As we mentioned earlier, user space must be accessed with special APIs +(:c:func:`get_user`, :c:func:`put_user`, :c:func:`copy_from_user`, +:c:func:`copy_to_user`) that check wether the pointer is in user space +and also handle the fault if the pointer is invalid. In case of invalid +pointers they return a non zero value. + +.. slide:: Accessing user space from system calls + :inline-contents: True + :level: 2 + + .. code-block:: c + + /* OK: return -EFAULT if user_ptr is invalid */ + if (copy_from_user(&kernel_buffer, user_ptr, size)) + return -EFAULT; + + /* NOK: only works if user_ptr is valid otherwise crashes kernel */ + memcpy(&kernel_buffer, user_ptr, size); + + +Let's examine the simplest API, get_user, as implemented for x86: + +.. slide:: get_user implementation + :inline-contents: True + :level: 2 + + .. code-block:: c + + #define get_user(x, ptr) \ + ({ \ + int __ret_gu; \ + register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \ + __chk_user_ptr(ptr); \ + might_fault(); \ + asm volatile("call __get_user_%P4" \ + : "=a" (__ret_gu), "=r" (__val_gu), \ + ASM_CALL_CONSTRAINT \ + : "0" (ptr), "i" (sizeof(*(ptr)))); \ + (x) = (__force __typeof__(*(ptr))) __val_gu; \ + __builtin_expect(__ret_gu, 0); \ + }) + + +The implementation uses inline assembly, that allows inserting ASM +sequences in C code and also handles access to / from variables in the +ASM code. + +Based on the type size of the x variable, one of __get_user_1, +__get_user_2 or __get_user_4 will be called. Also, before executing +the assembly call, ptr will be moved to the first register EAX while +after the completion of assembly part the value of EAX will be moved +to __ret_gu and the EDX register will be moved to __val_gu. + +It is equivalent to the following pseudo code: + + +.. slide:: get_user pseudo code + :inline-contents: True + :level: 2 + + .. code-block:: c + + #define get_user(x, ptr) \ + movl ptr, %eax \ + call __get_user_1 \ + movl %edx, x \ + movl %eax, result \ + + + +The __get_user_1 implementation for x86 is the following: + +.. slide:: get_user_1 implementation + :inline-contents: True + :level: 2 + + .. code-block:: none + + .text + ENTRY(__get_user_1) + mov PER_CPU_VAR(current_task), %_ASM_DX + cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user + ASM_STAC + 1: movzbl (%_ASM_AX),%edx + xor %eax,%eax + ASM_CLAC + ret + ENDPROC(__get_user_1) + + bad_get_user: + xor %edx,%edx + mov $(-EFAULT),%_ASM_AX + ASM_CLAC + ret + END(bad_get_user) + + _ASM_EXTABLE(1b,bad_get_user) + +The first two statements check the pointer (which is stored in EDX) +with the addr_limit field of the current task (process) descriptor to +make sure that we don't have a pointer to kernel space. + +Then, SMAP is disabled, to allow access to user from kernel, and the +access to user space is done with the instruction at the 1: label. EAX +is then zeroed to mark success, SMAP is enabled, and the call returns. + +The movzbl instruction is the one that does the access to user space +and its address is captured with the 1: label and stored in a special +section: + +.. slide:: Exception table entry + :inline-contents: True + :level: 2 + + .. code-block:: c + + /* Exception table entry */ + # define _ASM_EXTABLE_HANDLE(from, to, handler) \ + .pushsection "__ex_table","a" ; \ + .balign 4 ; \ + .long (from) - . ; \ + .long (to) - . ; \ + .long (handler) - . ; \ + .popsection + + # define _ASM_EXTABLE(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_default) + + +For each address that accesses user space we have an entry in the +exception table, that is made up of: the faulting address(from), where +to jump to in case of a fault, and a handler function (that implements +the jump logic). All of these addresses are stored on 32bit in +relative format to the exception table, so that they work for both 32 +and 64 bit kernels. + + +All of the exception table entries are then collected in the +__ex_table section by the linker script: + +.. slide:: Exception table building + :inline-contents: True + :level: 2 + + .. code-block:: c + + #define EXCEPTION_TABLE(align) \ + . = ALIGN(align); \ + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start___ex_table) = .; \ + KEEP(*(__ex_table)) \ + VMLINUX_SYMBOL(__stop___ex_table) = .; \ + } + + +The section is guarded with __start___ex_table and __stop___ex_table +symbols, so that it is easy to find the data from C code. This table +is accessed by the fault handler: + + +.. slide:: Exception table handling + :inline-contents: True + :level: 2 + + .. code-block:: c + + bool ex_handler_default(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) + { + regs->ip = ex_fixup_addr(fixup); + return true; + } + + int fixup_exception(struct pt_regs *regs, int trapnr) + { + const struct exception_table_entry *e; + ex_handler_t handler; + + e = search_exception_tables(regs->ip); + if (!e) + return 0; + + handler = ex_fixup_handler(e); + return handler(e, regs, trapnr); + } + + +All it does is to set the return address to the one in the to field of +the exception table entry which, in case of the get_user exception +table entry, is bad_get_user which return -EFAULT to the caller. + diff --git a/Makefile b/Makefile index 69fa5c0310d834..7328fc4b825cb4 100644 --- a/Makefile +++ b/Makefile @@ -224,7 +224,7 @@ clean-targets := %clean mrproper cleandocs no-dot-config-targets := $(clean-targets) \ cscope gtags TAGS tags help% %docs check% coccicheck \ $(version_h) headers_% archheaders archscripts \ - %asm-generic kernelversion %src-pkg + %asm-generic kernelversion %slides %src-pkg no-sync-config-targets := $(no-dot-config-targets) install %install \ kernelrelease @@ -1473,7 +1473,7 @@ $(help-board-dirs): help-%: # Documentation targets # --------------------------------------------------------------------------- DOC_TARGETS := xmldocs latexdocs pdfdocs htmldocs epubdocs cleandocs \ - linkcheckdocs dochelp refcheckdocs + linkcheckdocs dochelp refcheckdocs slides PHONY += $(DOC_TARGETS) $(DOC_TARGETS): scripts_basic FORCE $(Q)$(MAKE) $(build)=Documentation $@ diff --git a/README.rst b/README.rst new file mode 120000 index 00000000000000..67226b289b4cc2 --- /dev/null +++ b/README.rst @@ -0,0 +1 @@ +Documentation/teaching/index.rst \ No newline at end of file diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index ddfa3f24204c71..d0932b46fe1b8f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1206,9 +1206,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, break; } while (rq->vq->num_free); if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { + local_bh_disable(); u64_stats_update_begin(&rq->stats.syncp); rq->stats.kicks++; u64_stats_update_end(&rq->stats.syncp); + local_bh_enable(); } return !oom; diff --git a/tools/labs/.gitignore b/tools/labs/.gitignore new file mode 100644 index 00000000000000..0f4ec23c13ab1e --- /dev/null +++ b/tools/labs/.gitignore @@ -0,0 +1,10 @@ +/skels/ +vmlinux +zImage +serial.pts +rootfs.img +disk1.img +disk2.img +/*core-image-*.ext4 +.modinst +/out/ diff --git a/tools/labs/Makefile b/tools/labs/Makefile new file mode 100644 index 00000000000000..d6d07bdfbda76b --- /dev/null +++ b/tools/labs/Makefile @@ -0,0 +1,51 @@ +KDIR ?= $(shell realpath $(PWD)/../..) + +LABS ?= $(shell cd templates && find -mindepth 1 -maxdepth 1 -type d) +MODS = $(shell cd templates && find $(LABS) -mindepth 1 -name Kbuild | xargs dirname) +TODO ?= 0 + +include qemu/Makefile + +skels: + mkdir -p skels + cd templates && find $(LABS) -type f | xargs ./generate_skels.py --output ../skels --todo $(TODO) + rm -f skels/Kbuild + +skels/Kbuild: + echo "# autogenerated, do not edit " > $@ + echo "ccflags-y += -Wno-unused-function -Wno-unused-label -Wno-unused-variable " >> $@ + for i in $(shell cd skels && find -mindepth 1 -name Kbuild | xargs --no-run-if-empty dirname); do echo "obj-m += $$i/" >> $@; done + +build: $(KCONFIG) skels/Kbuild + $(MAKE) -C $(KDIR) M=$(KDIR)/tools/labs/skels ARCH=$(ARCH) modules + for i in $(shell find skels -name Makefile | xargs --no-run-if-empty dirname); do $(MAKE) -C $$i; done + +TEMPDIR := $(shell mktemp -u) + +copy: $(YOCTO_IMAGE) + if [ -e qemu.mon ]; then exit 1; fi + mkdir $(TEMPDIR) + sudo mount -t ext4 -o loop $(YOCTO_IMAGE) $(TEMPDIR) + find skels -type f \( -name *.ko -or -executable \) | xargs --no-run-if-empty sudo cp --parents -t $(TEMPDIR)/home/root || true + find skels -type d \( -name checker \) | xargs --no-run-if-empty sudo cp -r --parents -t $(TEMPDIR)/home/root || true + sudo umount $(TEMPDIR) + rmdir $(TEMPDIR) + +docker-docs: + cd docs && docker-compose build + cd docs && docker-compose run docs-build bash -c "cd /linux/tools/labs && make docs" + +docs: + $(MAKE) -C $(KDIR) DOCBOOKS= SPHINXDIRS="teaching" htmldocs + $(MAKE) -C $(KDIR) BUILDDIR=$(KDIR)/Documentation/output/slides DOCBOOKS= SPHINXDIRS="teaching" slides + for i in $(KDIR)/Documentation/output/slides/teaching/lectures/*.html; do name=$$(basename $$i .html); cp $$i $(KDIR)/Documentation/output/teaching/lectures/$$name-slides.html; done + cp -r $(KDIR)/Documentation/output/slides/teaching/_static $(KDIR)/Documentation/output/teaching/ + +clean:: + $(MAKE) -C $(KDIR) M=$(KDIR)/tools/labs/skels ARCH=$(ARCH) clean + for i in $(shell find skels -name Makefile | xargs --no-run-if-empty dirname); do $(MAKE) -C $$i clean; done + +clean_skels: + rm -rf skels + +.PHONY: skels build copy docs docker-docs clean clean_skels diff --git a/tools/labs/Makefile.vmchecker b/tools/labs/Makefile.vmchecker new file mode 100644 index 00000000000000..9a9ada27392b35 --- /dev/null +++ b/tools/labs/Makefile.vmchecker @@ -0,0 +1,38 @@ +KDIR = /home/so2/vm/linux + +include Makefile + +$(YOCTO_IMAGE): clean-slate.$(YOCTO_IMAGE) + cp clean-slate.$(YOCTO_IMAGE) $(YOCTO_IMAGE) + +setup: $(YOCTO_IMAGE) + mkdir -p out + +copy: + if [ -e qemu.mon ]; then exit 1; fi + mkdir $(TEMPDIR) + sudo mount -t ext4 -o loop $(YOCTO_IMAGE) $(TEMPDIR) + find skels -type f \( -name *.ko -or -executable \) | xargs sudo cp -t $(TEMPDIR)/home/root || true + sudo cp -r skels/assignments/*/checker/* -t $(TEMPDIR)/home/root || true + sudo umount $(TEMPDIR) + rmdir $(TEMPDIR) + +extract: + if [ -e qemu.mon ]; then exit 1; fi + mkdir $(TEMPDIR) + sudo mount -t ext4 -o loop $(YOCTO_IMAGE) $(TEMPDIR) + sudo cp $(TEMPDIR)/home/root/run-stdout.vmr out/ && sudo chown so2:so2 out/run-stdout.vmr || true + sudo cp $(TEMPDIR)/home/root/run-stderr.vmr out/ && sudo chown so2:so2 out/run-stderr.vmr || true + sudo umount $(TEMPDIR) + rmdir $(TEMPDIR) + test -f out/run-stdout.vmr || echo "No testing output (likely due to submission errors)." > out/run-stdout.vmr + test -f out/run-stderr.vmr || echo "No testing output (likely due to submission errors)." > out/run-stderr.vmr + +postprocess: + sed -i '/^Linux version /,/^netconsole: network logging started/d' out/run-km.vmr + +destroy: + -rm -rf out + -rm -f $(YOCTO_IMAGE) + +.PHONY: setup copy extract postprocess destroy diff --git a/tools/labs/docs/Dockerfile b/tools/labs/docs/Dockerfile new file mode 100644 index 00000000000000..294331cecdbece --- /dev/null +++ b/tools/labs/docs/Dockerfile @@ -0,0 +1,26 @@ +FROM ubuntu:18.04 + +RUN apt-get update +RUN apt-get install -y software-properties-common +RUN apt-get install -y sudo +RUN apt-get install -y make +RUN apt-get install -y git +RUN apt-get install -y python +RUN apt-get install -y python-pip +RUN apt-get install -y ditaa +RUN apt-get install -y graphviz +RUN pip install Sphinx==1.6.7 sphinx_rtd_theme hieroglyph==1.0 +# append new packages here, to minimize docker rebuild time +RUN rm -rf /var/lib/apt/lists/* + +RUN useradd -ms /bin/bash ubuntu && adduser ubuntu sudo && echo -n 'ubuntu:ubuntu' | chpasswd + +# Enable passwordless sudo for users under the "sudo" group +RUN sed -i.bkp -e \ + 's/%sudo\s\+ALL=(ALL\(:ALL\)\?)\s\+ALL/%sudo ALL=NOPASSWD:ALL/g' \ + /etc/sudoers + +USER ubuntu +WORKDIR /home/ubuntu/ + +ENV PATH ${PATH}:/home/ubuntu/.local diff --git a/tools/labs/docs/docker-compose.yml b/tools/labs/docs/docker-compose.yml new file mode 100644 index 00000000000000..db2f22bbfe61ec --- /dev/null +++ b/tools/labs/docs/docker-compose.yml @@ -0,0 +1,12 @@ +version: '3' +services: + docs-build: + build: + context: . + dockerfile: Dockerfile + volumes: + - ../../../:/linux + environment: + # workaround for binfmt_misc support in containers + - SPHINX_DITAA_CMD=jexec + - SPHINX_DITAA_ARG=/usr/bin/ditaa diff --git a/tools/labs/qemu/Makefile b/tools/labs/qemu/Makefile new file mode 100644 index 00000000000000..a9b644eebb3c6f --- /dev/null +++ b/tools/labs/qemu/Makefile @@ -0,0 +1,92 @@ +QEMU_DISPLAY ?= none +ARCH ?= x86 +ifeq ($(ARCH),x86) +b = b +endif + +ZIMAGE = $(KDIR)/arch/$(ARCH)/boot/$(b)zImage +KCONFIG = $(KDIR)/.config +NTTCP = $(KDIR)/tools/labs/templates/assignments/6-e100/nttcp + +YOCTO_URL = http://downloads.yoctoproject.org/releases/yocto/yocto-2.3/machines/qemu/qemu$(ARCH)/ +YOCTO_IMAGE = core-image-minimal-qemu$(ARCH).ext4 +#YOCTO_IMAGE = core-image-minimal-dev-qemu$(ARCH).ext4 +#YOCTO_IMAGE = core-image-sato-dev-qemu$(ARCH).ext4 +#YOCTO_IMAGE = core-image-sato-qemu$(ARCH).ext4 +#YOCTO_IMAGE = core-image-sato-sdk-qemu$(ARCH).ext4 + +QEMU_OPTS = -kernel $(ZIMAGE) \ + -device virtio-serial \ + -chardev pty,id=virtiocon0 -device virtconsole,chardev=virtiocon0 \ + -serial pipe:pipe1 -serial pipe:pipe2 \ + -netdev tap,id=tap0,ifname=tap0,script=no,downscript=no -net nic,netdev=tap0,model=virtio \ + -netdev tap,id=tap1,ifname=tap1,script=no,downscript=no -net nic,netdev=tap1,model=i82559er \ + -drive file=$(YOCTO_IMAGE),if=virtio,format=raw \ + -drive file=disk1.img,if=virtio,format=raw \ + -drive file=disk2.img,if=virtio,format=raw \ + --append "root=/dev/vda loglevel=15 console=hvc0" \ + --display $(QEMU_DISPLAY) -s -m 256 + +boot: .modinst tap0 tap1 pipe1.in pipe1.out pipe2.in pipe2.out disk1.img disk2.img nttcp-run + ARCH=$(ARCH) qemu/qemu.sh $(QEMU_OPTS) + +TEMPDIR := $(shell mktemp -u) + +$(KCONFIG): qemu/kernel_config.x86 + cp $^ $@ + $(MAKE) -C $(KDIR) oldnoconfig + +zImage: $(ZIMAGE) + +$(ZIMAGE): $(KCONFIG) + $(MAKE) -C $(KDIR) + $(MAKE) -C $(KDIR) modules + +.modinst: $(ZIMAGE) $(YOCTO_IMAGE) + mkdir $(TEMPDIR) + sudo mount -t ext4 -o loop $(YOCTO_IMAGE) $(TEMPDIR) + sudo $(MAKE) -C $(KDIR) modules_install INSTALL_MOD_PATH=$(TEMPDIR) + sudo umount $(TEMPDIR) + rmdir $(TEMPDIR) + sleep 1 && touch .modinst + +gdb: $(ZIMAGE) + gdb -ex "target remote localhost:1234" $(KDIR)/vmlinux + +$(YOCTO_IMAGE): + wget $(YOCTO_URL)/$(YOCTO_IMAGE) + sudo qemu/prepare-image.sh $(YOCTO_IMAGE) + +tap0: + qemu/create_net.sh $@ + +tap1: + qemu/create_net.sh $@ + +nttcp-run: $(NTTCP) tap1 + $(NTTCP) -v -i & + +pipe1.in: + mkfifo $@ + +pipe1.out: + mkfifo $@ + +pipe2.in: pipe1.out + ln $< $@ + +pipe2.out: pipe1.in + ln $< $@ + +disk1.img: + qemu-img create -f raw $@ 100M + +disk2.img: + qemu-img create -f raw $@ 100M + +clean:: + -rm -f .modinst + -rm -f disk1.img disk2.img + -rm -f pipe1.in pipe1.out pipe2.in pipe2.out + +.PHONY: boot gdb clean tap0 tap1 diff --git a/tools/labs/qemu/create_net.sh b/tools/labs/qemu/create_net.sh new file mode 100755 index 00000000000000..f215ad8700079d --- /dev/null +++ b/tools/labs/qemu/create_net.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +if test $# -ne 1; then + echo "Usage: $0 " 1>&2 + echo " must be tap0 or tap1" + exit 1 +fi + +device=$1 + +USER=$(whoami) + +case "$device" in + "tap0") + subnet=172.213.0 + ;; + "tap1") + subnet=172.30.0 + ;; + *) + echo "Unknown device" 1>&2 + exit 1 + ;; +esac + +# If device doesn't exist add device. +if ! /sbin/ip link show dev "$device" > /dev/null 2>&1; then + sudo ip tuntap add mode tap user "$USER" dev "$device" +fi + +# Reconfigure just to be sure (even if device exists). +sudo /sbin/ip address flush dev "$device" +sudo /sbin/ip link set dev "$device" down +sudo /sbin/ip address add $subnet.1/24 dev "$device" +sudo /sbin/ip link set dev "$device" up + +mkdir -p $PWD/tftp + +sudo dnsmasq --enable-tftp --tftp-root=$PWD/tftp --no-resolv --no-hosts --bind-interfaces --interface $device -F $subnet.2,$subnet.20 -x dnsmasq.pid || true diff --git a/tools/labs/qemu/kernel_config.x86 b/tools/labs/qemu/kernel_config.x86 new file mode 100644 index 00000000000000..0259ae6541f3f9 --- /dev/null +++ b/tools/labs/qemu/kernel_config.x86 @@ -0,0 +1,84 @@ +# CONFIG_LOCALVERSION_AUTO is not set +# CONFIG_CROSS_MEMORY_ATTACH is not set +# CONFIG_USELIB is not set +CONFIG_BLK_DEV_INITRD=y +# CONFIG_COMPAT_BRK is not set +CONFIG_SLAB=y +# CONFIG_64BIT is not set +CONFIG_SMP=y +# CONFIG_X86_EXTENDED_PLATFORM is not set +# CONFIG_SCHED_OMIT_FRAME_POINTER is not set +# CONFIG_X86_MCE is not set +# CONFIG_MICROCODE is not set +# CONFIG_SECCOMP is not set +# CONFIG_RELOCATABLE is not set +# CONFIG_SUSPEND is not set +# CONFIG_VIRTUALIZATION is not set +CONFIG_KPROBES=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_BINFMT_AOUT=y +CONFIG_BINFMT_MISC=y +# CONFIG_COMPACTION is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_NETFILTER=y +# CONFIG_UEVENT_HELPER is not set +CONFIG_DEVTMPFS=y +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +CONFIG_BLK_DEV_LOOP=y +CONFIG_VIRTIO_BLK=y +CONFIG_NETDEVICES=y +CONFIG_NETCONSOLE=m +CONFIG_VIRTIO_NET=y +# CONFIG_DEVMEM is not set +CONFIG_VIRTIO_CONSOLE=y +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +# CONFIG_HID_A4TECH is not set +# CONFIG_HID_APPLE is not set +# CONFIG_HID_BELKIN is not set +# CONFIG_HID_CHERRY is not set +# CONFIG_HID_CHICONY is not set +# CONFIG_HID_CYPRESS is not set +# CONFIG_HID_EZKEY is not set +# CONFIG_HID_KENSINGTON is not set +# CONFIG_HID_LOGITECH is not set +# CONFIG_HID_MICROSOFT is not set +# CONFIG_HID_MONTEREY is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_VIRTIO_PCI=y +# CONFIG_X86_PLATFORM_DEVICES is not set +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_EXT4_ENCRYPTION=y +# CONFIG_DNOTIFY is not set +# CONFIG_INOTIFY_USER is not set +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_CONFIGFS_FS=y +# CONFIG_MISC_FILESYSTEMS is not set +CONFIG_CRYPTO_ECHAINIV=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_GDB_SCRIPTS=y +# CONFIG_ENABLE_MUST_CHECK is not set +# CONFIG_UNUSED_SYMBOLS is not set +# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_SLAB=y +CONFIG_DEBUG_SLAB_LEAK=y +CONFIG_DEBUG_KMEMLEAK=y +CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=1024 +CONFIG_PROVE_LOCKING=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +# CONFIG_FTRACE is not set +# CONFIG_X86_VERBOSE_BOOTUP is not set +# CONFIG_X86_DEBUG_FPU is not set diff --git a/tools/labs/qemu/prepare-image.sh b/tools/labs/qemu/prepare-image.sh new file mode 100755 index 00000000000000..459fa749baf2e8 --- /dev/null +++ b/tools/labs/qemu/prepare-image.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +size=$(stat -c%s $1) +if [ $size -lt 50000000 ]; then + e2fsck -f $1 + resize2fs $1 50M +fi + +TMP=$(mktemp -d) + +mount -t ext4 -o loop $1 $TMP + +# add console +echo "hvc0:12345:respawn:/sbin/getty 115200 hvc0" >> $TMP/etc/inittab + +# add more vty +cat >> $TMP/etc/inittab <> $TMP/etc/network/interfaces + +sudo umount $TMP +rmdir $TMP diff --git a/tools/labs/qemu/qemu.sh b/tools/labs/qemu/qemu.sh new file mode 100755 index 00000000000000..4735f227ae2d1b --- /dev/null +++ b/tools/labs/qemu/qemu.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# +# This script runs qemu and creates a symbolic link named serial.pts +# to the qemu serial console (pts based). Because the qemu pts +# allocation is dynamic, it is preferable to have a stable path to +# avoid visual inspection of the qemu output when connecting to the +# serial console. + +case $ARCH in + x86) + qemu=qemu-system-i386 + ;; + arm) + qemu=qemu-system-arm + ;; +esac + +echo info chardev | nc -U -l qemu.mon | egrep --line-buffered -o "/dev/pts/[0-9]*" | xargs -I PTS ln -fs PTS serial.pts & +$qemu "$@" -monitor unix:qemu.mon +rm qemu.mon +rm serial.pts diff --git a/tools/labs/scripts/install-startup-script b/tools/labs/scripts/install-startup-script new file mode 100755 index 00000000000000..4df78826c1d98c --- /dev/null +++ b/tools/labs/scripts/install-startup-script @@ -0,0 +1,34 @@ +#!/bin/bash + +# Sample run: +# sudo ./install-startup-script ../clean-slate.core-image-minimal-qemux86.ext4 + +startup_script="so2-startup-script" + +if test $UID -ne 0; then + echo "You must be root to run this script." 1>&2 + exit 1 +fi + +if test $# -ne 1; then + echo "Usage: path/to/rootfs/image" 1>&2 + exit 1 +fi + +rootfs_image="$1" + +mkdir tmp_mnt +mount -t ext4 -o loop "$rootfs_image" tmp_mnt + +cp "$startup_script" tmp_mnt/etc/init.d/so2 +pushd tmp_mnt/etc/ > /dev/null 2>&1 +for i in rc*.d; do + cd "$i" + pwd + ln -sfn ../init.d/so2 S99so2 + cd .. +done +popd > /dev/null 2>&1 + +umount tmp_mnt +rmdir tmp_mnt diff --git a/tools/labs/scripts/so2-startup-script b/tools/labs/scripts/so2-startup-script new file mode 100755 index 00000000000000..3335210da90a7c --- /dev/null +++ b/tools/labs/scripts/so2-startup-script @@ -0,0 +1,36 @@ +#!/bin/sh + +echo "SO2 checker starts ..." > /dev/hvc0 +echo "SO2 checker starts ..." > /so2-checker.debug + +ip a a dev eth0 172.213.0.7/24 +ip l set dev eth0 up +sleep 1 + +modprobe netconsole netconsole=6666@172.213.0.7/eth0,6666@172.213.0.1/ + +dmesg -n 8 +dmesg -c /dev/null 2>&1 +sleep 3 + +echo "Starting tests ..." > /dev/hvc0 +echo "Starting tests ..." >> /so2-checker.debug +date > /dev/hvc0 +date >> /so2-checker.debug + +cd /home/root +/bin/sh ./run.sh > /dev/hvc0 + +echo "Testing complete." > /dev/hvc0 +echo "Testing complete." >> /so2-checker.debug +date > /dev/hvc0 +date >> /so2-checker.debug + +sleep 5 +rmmod netconsole + +sleep 3 +echo "All done. Shutting down ..." > /dev/hvc0 +echo "All done. Shutting down ..." >> /so2-checker.debug + +poweroff diff --git a/tools/labs/scripts/vmchecker-setup b/tools/labs/scripts/vmchecker-setup new file mode 100755 index 00000000000000..787b4a8f489d3c --- /dev/null +++ b/tools/labs/scripts/vmchecker-setup @@ -0,0 +1,12 @@ +#!/bin/bash + +if test ! -f ../initial.clean-slate.core-image-minimal-qemux86.ext4; then + wget http://downloads.yoctoproject.org/releases/yocto/yocto-2.3/machines/qemu/qemux86/core-image-minimal-qemux86.ext4 -O ../initial.clean-slate.core-image-minimal-qemux86.ext4 +fi +if test ! -f ../clean-slate.core-image-minimal-qemux86.ext4; then + cp ../initial.clean-slate.core-image-minimal-qemux86.ext4 ../clean-slate.core-image-minimal-qemux86.ext4 + sudo ./install-startup-script ../clean-slate.core-image-minimal-qemux86.ext4 +fi +if test ! -d ../out/; then + mkdir ../out/ +fi diff --git a/tools/labs/templates/api_assignment/checker/list-checker b/tools/labs/templates/api_assignment/checker/list-checker new file mode 100755 index 00000000000000..a57f7785b310cd --- /dev/null +++ b/tools/labs/templates/api_assignment/checker/list-checker @@ -0,0 +1,775 @@ +#!/bin/sh + +# +# List kernel API checker +# +# + +# Enable/disable debug (1/0). +DEBUG_=1 + +DEBUG() +{ + if test "x$DEBUG_" = "x1"; then + $@ 1>&2 + fi +} + +max_points=90 + +# Enable/disable exiting when program fails. +EXIT_IF_FAIL=0 + +test_do_fail() +{ + points=$1 + printf "failed [ 0/%02d]\n" "$max_points" + if test "x$EXIT_IF_FAIL" = "x1"; then + exit 1 + fi +} + +test_do_pass() +{ + points=$1 + printf "passed [%02d/%02d]\n" "$points" "$max_points" +} + +basic_test() +{ + message=$1 + points=$2 + shift 2 + test_command=$@ + + printf "%s" "$message" + + i=0 + limit=$((60 - ${#message})) + while test "$i" -lt "$limit"; do + printf "." + i=$(($i+1)) + done + + $test_command > /dev/null 2>&1 + if test $? -eq 0; then + test_do_pass "$points" + else + test_do_fail "$points" + fi +} + +module="list" +module_file="$module".ko +proc_folder="/proc/list" +preview="$proc_folder/preview" +management="$proc_folder/management" + +init_test() +{ + dmesg -c > /dev/null 2>&1 + insmod "$module_file" + if test $? -ne 0; then + echo "Error inserting module." 1>&2 + exit 1 + fi + sleep 1 +} + +cleanup_test() +{ + rmmod "$module" +} + +test_module_exists() +{ + init_test + + lsmod 2> /dev/null | grep -w list > /dev/null 2>&1 + basic_test "module_exists" 1 test "$?" -eq 0 + + cleanup_test +} + +test_proc_folder_exits() +{ + init_test + + basic_test "folder_exists" 1 test -d "$proc_folder" + + cleanup_test +} + +test_preview_exits() +{ + init_test + + basic_test "preview_exists" 1 test -f "$preview" + + cleanup_test +} + +test_management_exits() +{ + init_test + + basic_test "management_exists" 1 test -f "$management" + + cleanup_test +} + +test_preview_is_readable() +{ + init_test + + cat "$preview" > /dev/null 2>&1 + basic_test "preview_is_readable" 1 test $? -eq 0 + + cleanup_test +} + +test_preview_is_not_writable() +{ + init_test + + echo "hello" 2>&1 > "$preview" | grep "Input/output error" > /dev/null + basic_test "preview_is_not_writable" 1 test $? -eq 0 + + cleanup_test +} + +test_management_is_writable() +{ + init_test + + echo "hello" 2>&1 > "$management" | grep "Input/output error" > /dev/null + basic_test "management_is_writable" 1 test $? -ne 0 + + cleanup_test +} + +test_management_is_not_readable() +{ + init_test + + cat "$management" > /dev/null 2>&1 + basic_test "management_is_not_readable" 1 test $? -ne 0 + + cleanup_test +} + +test_addf_no_error() +{ + init_test + + echo "addf lorem" > "$management" 2> /dev/null + basic_test "addf_no_error" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_print() +{ + init_test + + echo "addf lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "addf_print" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_print_once() +{ + init_test + + echo "addf lorem" > "$management" + no=$(cat "$preview" | grep "lorem" | wc -l) + basic_test "addf_print_once" 2 test "$no" -eq 1 + + cleanup_test +} + +test_adde_no_error() +{ + init_test + + echo "adde lorem" > "$management" 2> /dev/null + basic_test "adde_no_error" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_print() +{ + init_test + + echo "adde lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "adde_print" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_print_once() +{ + init_test + + echo "adde lorem" > "$management" + no=$(cat "$preview" | grep "lorem" | wc -l) + basic_test "adde_print_once" 2 test "$no" -eq 1 + + cleanup_test +} + +test_addf_two_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "addf_two_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_addf_two_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + cat "$preview" | grep "lorem" > /dev/null + ret1=$? + cat "$preview" | grep "ipsum" > /dev/null + ret2=$? + basic_test "addf_two_content" 2 test "$ret1" -eq 0 -a "$ret2" -eq 0 + + cleanup_test +} + +test_addf_same_twice_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "addf_same_twice_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_addf_same_twice_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "addf_same_twice_content" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_same_twice_content_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf lorem" > "$management" + no=$(cat "$preview" | grep "lorem" | wc -l) + basic_test "addf_same_twice_content_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_adde_two_number() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde ipsum" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "adde_two_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_adde_two_content() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde ipsum" > "$management" + cat "$preview" | grep "lorem" > /dev/null + ret1=$? + cat "$preview" | grep "ipsum" > /dev/null + ret2=$? + basic_test "adde_two_content" 2 test "$ret1" -eq 0 -a "$ret2" -eq 0 + + cleanup_test +} + +test_adde_same_twice_number() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "adde_same_twice_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_adde_same_twice_content() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "adde_same_twice_content" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_same_twice_content_number() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde lorem" > "$management" + no=$(cat "$preview" | grep "lorem" | wc -l) + basic_test "adde_same_twice_content_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_addf_front_after_addf() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + + # "ipsum" must be first. + cat "$preview" | head -n 1 | grep "ipsum" > /dev/null + basic_test "addf_front_after_addf" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_front_after_adde() +{ + init_test + + echo "adde lorem" > "$management" + echo "addf ipsum" > "$management" + + # "ipsum" must be first. + cat "$preview" | head -n 1 | grep "ipsum" > /dev/null + basic_test "addf_front_after_adde" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_front_after_adde_addf() +{ + init_test + + echo "adde lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + + # "dolor" must be first. + cat "$preview" | head -n 1 | grep "dolor" > /dev/null + basic_test "addf_front_after_adde_addf" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_front_after_addf_adde() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + echo "addf dolor" > "$management" + + # "dolor" must be first. + cat "$preview" | head -n 1 | grep "dolor" > /dev/null + basic_test "addf_front_after_addf_adde" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_end_after_addf() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + + # "ipsum" must be last. + cat "$preview" | tail -n -1 | grep "ipsum" > /dev/null + basic_test "adde_end_after_addf" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_end_after_adde() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde ipsum" > "$management" + + # "ipsum" must be last. + cat "$preview" | tail -n -1 | grep "ipsum" > /dev/null + basic_test "addf_end_after_adde" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_end_after_adde_addf() +{ + init_test + + echo "adde lorem" > "$management" + echo "addf ipsum" > "$management" + echo "adde dolor" > "$management" + + # "dolor" must be last. + cat "$preview" | tail -n -1 | grep "dolor" > /dev/null + basic_test "adde_end_after_adde_addf" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_end_after_addf_adde() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + echo "adde dolor" > "$management" + + # "dolor" must be last. + cat "$preview" | tail -n -1 | grep "dolor" > /dev/null + basic_test "adde_end_after_addf_adde" 2 test $? -eq 0 + + cleanup_test +} + +test_delf_no_error() +{ + init_test + + echo "addf lorem" > "$management" + echo "delf lorem" > "$management" + basic_test "delf_no_error" 2 test $? -eq 0 + + cleanup_test +} + +test_delf_removes_front() +{ + init_test + + echo "addf lorem" > "$management" + echo "delf lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "delf_removes_front" 2 test "$no" -eq 0 + + cleanup_test +} + +test_delf_removes_end() +{ + init_test + + echo "adde lorem" > "$management" + echo "delf lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "delf_removes_end" 2 test "$no" -eq 0 + + cleanup_test +} + +test_delf_front_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "delf lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "delf_front_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_delf_front_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "delf lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "delf_front_content" 2 test $? -ne 0 + + cleanup_test +} + +test_delf_first() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "delf ipsum" > "$management" + cat "$preview" | grep "ipsum" > /dev/null + basic_test "delf_first" 2 test $? -ne 0 + + cleanup_test +} + +test_delf_none() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + echo "delf dolor" > "$management" 2> /dev/null + no=$(cat "$preview" | wc -l) + basic_test "delf_none" 2 test "$no" -eq 2 + + cleanup_test +} + +test_dela_one_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "dela ipsum" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "dela_one_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_dela_one_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "dela ipsum" > "$management" + cat "$preview" | grep "ipsum" > /dev/null + basic_test "dela_one_content" 2 test $? -ne 0 + + cleanup_test +} + +test_dela_two_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "adde ipsum" > "$management" + echo "dela ipsum" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "dela_one_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_dela_two_content() +{ + init_test + + echo "adde lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "adde ipsum" > "$management" + echo "dela ipsum" > "$management" + cat "$preview" | grep "ipsum" > /dev/null + basic_test "dela_one_content" 2 test $? -ne 0 + + cleanup_test +} + +test_dela_all_five_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde lorem" > "$management" + echo "addf lorem" > "$management" + echo "adde lorem" > "$management" + echo "addf lorem" > "$management" + echo "dela lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "dela_all_five_number" 2 test "$no" -eq 0 + + cleanup_test +} + +test_dela_all_five_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde lorem" > "$management" + echo "addf lorem" > "$management" + echo "adde lorem" > "$management" + echo "addf lorem" > "$management" + echo "dela lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "dela_all_five_content" 2 test $? -ne 0 + + cleanup_test +} + +test_dela_none() +{ + init_test + + echo "adde lorem" > "$management" + echo "addf ipsum" > "$management" + echo "dela dolor" > "$management" 2> /dev/null + no=$(cat "$preview" | wc -l) + basic_test "dela_none" 2 test "$no" -eq 2 + + cleanup_test +} + +test_mix_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + echo "adde dolor" > "$management" + echo "addf sit" > "$management" + echo "adde sit" > "$management" + echo "addf sit" > "$management" + echo "addf lorem" > "$management" + echo "delf lorem" > "$management" + echo "delf dolor" > "$management" + echo "adde dolor" > "$management" + echo "dela sit" > "$management" + echo "delf ipsum" > "$management" + echo "delf ipsum" > "$management" + echo "dela dolor" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "mix_number" 3 test "$no" -eq 1 + + cleanup_test +} + +test_mix_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + echo "adde dolor" > "$management" + echo "addf sit" > "$management" + echo "adde sit" > "$management" + echo "addf sit" > "$management" + echo "addf lorem" > "$management" + echo "delf lorem" > "$management" + echo "delf dolor" > "$management" + echo "adde dolor" > "$management" + echo "dela sit" > "$management" + echo "delf ipsum" > "$management" + echo "delf ipsum" > "$management" + echo "dela dolor" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "mix_content" 3 test "$no" -eq 1 + + cleanup_test +} + +run_tests() +{ + test_module_exists + test_proc_folder_exits + test_preview_exits + test_management_exits + test_preview_is_readable + test_preview_is_not_writable + test_management_is_writable + test_management_is_not_readable + test_addf_no_error + test_addf_print + test_addf_print_once + test_adde_no_error + test_adde_print + test_adde_print_once + test_addf_two_number + test_addf_two_content + test_addf_same_twice_number + test_addf_same_twice_content + test_addf_same_twice_content_number + test_adde_two_number + test_adde_two_content + test_adde_same_twice_number + test_adde_same_twice_content + test_adde_same_twice_content_number + test_addf_front_after_addf + test_addf_front_after_adde + test_addf_front_after_adde_addf + test_addf_front_after_addf_adde + test_adde_end_after_addf + test_adde_end_after_adde + test_adde_end_after_adde_addf + test_adde_end_after_addf_adde + test_delf_no_error + test_delf_removes_front + test_delf_removes_end + test_delf_front_number + test_delf_front_content + test_delf_first + test_delf_none + test_dela_one_number + test_dela_one_content + test_dela_two_number + test_dela_two_content + test_dela_all_five_number + test_dela_all_five_content + test_dela_none + test_mix_number + test_mix_content +} + +run_tests | tee results.txt + +cat results.txt | grep '\[.*\]$' | awk -F '[] /[]+' ' +BEGIN { + sum=0 +} + +{ + sum += $(NF-2); +} + +END { + printf "\n%66s [%02d/90]\n", "Total:", sum; +}' + +rm results.txt diff --git a/tools/labs/templates/api_assignment/kernel-api/Kbuild b/tools/labs/templates/api_assignment/kernel-api/Kbuild new file mode 100644 index 00000000000000..5e45a816841549 --- /dev/null +++ b/tools/labs/templates/api_assignment/kernel-api/Kbuild @@ -0,0 +1 @@ +obj-m = list.o diff --git a/tools/labs/templates/api_assignment/kernel-api/list.c b/tools/labs/templates/api_assignment/kernel-api/list.c new file mode 100644 index 00000000000000..39db4cef953ad2 --- /dev/null +++ b/tools/labs/templates/api_assignment/kernel-api/list.c @@ -0,0 +1,117 @@ +/* + * list.c - Linux kernel list API + * + * TODO 1/0: Fill in name / email + * Author: FirstName LastName + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#define PROCFS_MAX_SIZE 1024 + +#define procfs_dir_name "list" +#define procfs_file_read "preview" +#define procfs_file_write "management" + +struct proc_dir_entry *proc_list; +struct proc_dir_entry *proc_list_read; +struct proc_dir_entry *proc_list_write; + +/* TODO 2/0: define your list! */ + +static int list_proc_show(struct seq_file *m, void *v) +{ + /* TODO 3/0: print your list. One element / line. */ + seq_puts(m, "Remove this line\n"); + + return 0; +} + +static int list_read_open(struct inode *inode, struct file *file) +{ + return single_open(file, list_proc_show, NULL); +} + +static int list_write_open(struct inode *inode, struct file *file) +{ + return single_open(file, list_proc_show, NULL); +} + +static ssize_t list_write(struct file *file, const char __user *buffer, + size_t count, loff_t *offs) +{ + char local_buffer[PROCFS_MAX_SIZE]; + unsigned long local_buffer_size = 0; + + local_buffer_size = count; + if (local_buffer_size > PROCFS_MAX_SIZE) + local_buffer_size = PROCFS_MAX_SIZE; + + memset(local_buffer, 0, PROCFS_MAX_SIZE); + if (copy_from_user(local_buffer, buffer, local_buffer_size)) + return -EFAULT; + + /* local_buffer contains your command written in /proc/list/management + * TODO 4/0: parse the command and add/delete elements. + */ + + return local_buffer_size; +} + +static const struct file_operations r_fops = { + .owner = THIS_MODULE, + .open = list_read_open, + .read = seq_read, + .release = single_release, +}; + +static const struct file_operations w_fops = { + .owner = THIS_MODULE, + .open = list_write_open, + .write = list_write, + .release = single_release, +}; + +static int list_init(void) +{ + proc_list = proc_mkdir(procfs_dir_name, NULL); + if (!proc_list) + return -ENOMEM; + + proc_list_read = proc_create(procfs_file_read, 0000, proc_list, + &r_fops); + if (!proc_list_read) + goto proc_list_cleanup; + + proc_list_write = proc_create(procfs_file_write, 0000, proc_list, + &w_fops); + if (!proc_list_write) + goto proc_list_read_cleanup; + + return 0; + +proc_list_read_cleanup: + proc_remove(proc_list_read); +proc_list_cleanup: + proc_remove(proc_list); + return -ENOMEM; +} + +static void list_exit(void) +{ + proc_remove(proc_list); +} + +module_init(list_init); +module_exit(list_exit); + +MODULE_DESCRIPTION("Linux kernel list API"); +/* TODO 5/0: Fill in your name / email address */ +MODULE_AUTHOR("FirstName LastName &2 + fi +} + +max_points=90 + +# Enable/disable exiting when program fails. +EXIT_IF_FAIL=0 + +test_do_fail() +{ + points=$1 + printf "failed [00/%02d]\n" "$max_points" + if test "x$EXIT_IF_FAIL" = "x1"; then + exit 1 + fi +} + +test_do_pass() +{ + points=$1 + printf "passed [%02d/%02d]\n" "$points" "$max_points" +} + +basic_test() +{ + message=$1 + points=$2 + shift 2 + test_command=$@ + + printf "%s" "$message" + + i=0 + limit=$((60 - ${#message})) + while test "$i" -lt "$limit"; do + printf "." + i=$(($i+1)) + done + + $test_command > /dev/null 2>&1 + if test $? -eq 0; then + test_do_pass "$points" + else + test_do_fail "$points" + fi +} + +module="list" +module_file="$module".ko +proc_folder="/proc/list" +preview="$proc_folder/preview" +management="$proc_folder/management" + +init_test() +{ + dmesg -c > /dev/null 2>&1 + insmod "$module_file" + if test $? -ne 0; then + echo "Error inserting module." 1>&2 + exit 1 + fi + sleep 1 +} + +cleanup_test() +{ + rmmod "$module" +} + +test_module_exists() +{ + init_test + + lsmod 2> /dev/null | grep -w list > /dev/null 2>&1 + basic_test "module_exists" 1 test "$?" -eq 0 + + cleanup_test +} + +test_proc_folder_exits() +{ + init_test + + basic_test "folder_exists" 1 test -d "$proc_folder" + + cleanup_test +} + +test_preview_exits() +{ + init_test + + basic_test "preview_exists" 1 test -f "$preview" + + cleanup_test +} + +test_management_exits() +{ + init_test + + basic_test "management_exists" 1 test -f "$management" + + cleanup_test +} + +test_preview_is_readable() +{ + init_test + + cat "$preview" > /dev/null 2>&1 + basic_test "preview_is_readable" 1 test $? -eq 0 + + cleanup_test +} + +test_preview_is_not_writable() +{ + init_test + + echo "hello" 2>&1 > "$preview" | grep "Input/output error" > /dev/null + basic_test "preview_is_not_writable" 1 test $? -eq 0 + + cleanup_test +} + +test_management_is_writable() +{ + init_test + + echo "hello" 2>&1 > "$management" | grep "Input/output error" > /dev/null + basic_test "management_is_writable" 1 test $? -ne 0 + + cleanup_test +} + +test_management_is_not_readable() +{ + init_test + + cat "$management" > /dev/null 2>&1 + basic_test "management_is_not_readable" 1 test $? -ne 0 + + cleanup_test +} + +test_addf_no_error() +{ + init_test + + echo "addf lorem" > "$management" 2> /dev/null + basic_test "addf_no_error" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_print() +{ + init_test + + echo "addf lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "addf_print" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_print_once() +{ + init_test + + echo "addf lorem" > "$management" + no=$(cat "$preview" | grep "lorem" | wc -l) + basic_test "addf_print_once" 2 test "$no" -eq 1 + + cleanup_test +} + +test_adde_no_error() +{ + init_test + + echo "adde lorem" > "$management" 2> /dev/null + basic_test "adde_no_error" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_print() +{ + init_test + + echo "adde lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "adde_print" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_print_once() +{ + init_test + + echo "adde lorem" > "$management" + no=$(cat "$preview" | grep "lorem" | wc -l) + basic_test "adde_print_once" 2 test "$no" -eq 1 + + cleanup_test +} + +test_addf_two_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "addf_two_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_addf_two_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + cat "$preview" | grep "lorem" > /dev/null + ret1=$? + cat "$preview" | grep "ipsum" > /dev/null + ret2=$? + basic_test "addf_two_content" 2 test "$ret1" -eq 0 -a "$ret2" -eq 0 + + cleanup_test +} + +test_addf_same_twice_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "addf_same_twice_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_addf_same_twice_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "addf_same_twice_content" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_same_twice_content_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf lorem" > "$management" + no=$(cat "$preview" | grep "lorem" | wc -l) + basic_test "addf_same_twice_content_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_adde_two_number() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde ipsum" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "adde_two_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_adde_two_content() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde ipsum" > "$management" + cat "$preview" | grep "lorem" > /dev/null + ret1=$? + cat "$preview" | grep "ipsum" > /dev/null + ret2=$? + basic_test "adde_two_content" 2 test "$ret1" -eq 0 -a "$ret2" -eq 0 + + cleanup_test +} + +test_adde_same_twice_number() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "adde_same_twice_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_adde_same_twice_content() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "adde_same_twice_content" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_same_twice_content_number() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde lorem" > "$management" + no=$(cat "$preview" | grep "lorem" | wc -l) + basic_test "adde_same_twice_content_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_addf_front_after_addf() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + + # "ipsum" must be first. + cat "$preview" | head -n 1 | grep "ipsum" > /dev/null + basic_test "addf_front_after_addf" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_front_after_adde() +{ + init_test + + echo "adde lorem" > "$management" + echo "addf ipsum" > "$management" + + # "ipsum" must be first. + cat "$preview" | head -n 1 | grep "ipsum" > /dev/null + basic_test "addf_front_after_adde" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_front_after_adde_addf() +{ + init_test + + echo "adde lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + + # "dolor" must be first. + cat "$preview" | head -n 1 | grep "dolor" > /dev/null + basic_test "addf_front_after_adde_addf" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_front_after_addf_adde() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + echo "addf dolor" > "$management" + + # "dolor" must be first. + cat "$preview" | head -n 1 | grep "dolor" > /dev/null + basic_test "addf_front_after_addf_adde" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_end_after_addf() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + + # "ipsum" must be last. + cat "$preview" | tail -n -1 | grep "ipsum" > /dev/null + basic_test "adde_end_after_addf" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_end_after_adde() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde ipsum" > "$management" + + # "ipsum" must be last. + cat "$preview" | tail -n -1 | grep "ipsum" > /dev/null + basic_test "addf_end_after_adde" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_end_after_adde_addf() +{ + init_test + + echo "adde lorem" > "$management" + echo "addf ipsum" > "$management" + echo "adde dolor" > "$management" + + # "dolor" must be last. + cat "$preview" | tail -n -1 | grep "dolor" > /dev/null + basic_test "adde_end_after_adde_addf" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_end_after_addf_adde() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + echo "adde dolor" > "$management" + + # "dolor" must be last. + cat "$preview" | tail -n -1 | grep "dolor" > /dev/null + basic_test "adde_end_after_addf_adde" 2 test $? -eq 0 + + cleanup_test +} + +test_delf_no_error() +{ + init_test + + echo "addf lorem" > "$management" + echo "delf lorem" > "$management" + basic_test "delf_no_error" 2 test $? -eq 0 + + cleanup_test +} + +test_delf_removes_front() +{ + init_test + + echo "addf lorem" > "$management" + echo "delf lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "delf_removes_front" 2 test "$no" -eq 0 + + cleanup_test +} + +test_delf_removes_end() +{ + init_test + + echo "adde lorem" > "$management" + echo "delf lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "delf_removes_end" 2 test "$no" -eq 0 + + cleanup_test +} + +test_delf_front_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "delf lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "delf_front_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_delf_front_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "delf lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "delf_front_content" 2 test $? -ne 0 + + cleanup_test +} + +test_delf_first() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "delf ipsum" > "$management" + cat "$preview" | grep "ipsum" > /dev/null + basic_test "delf_first" 2 test $? -ne 0 + + cleanup_test +} + +test_delf_none() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + echo "delf dolor" > "$management" 2> /dev/null + no=$(cat "$preview" | wc -l) + basic_test "delf_none" 2 test "$no" -eq 2 + + cleanup_test +} + +test_dela_one_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "dela ipsum" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "dela_one_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_dela_one_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "dela ipsum" > "$management" + cat "$preview" | grep "ipsum" > /dev/null + basic_test "dela_one_content" 2 test $? -ne 0 + + cleanup_test +} + +test_dela_two_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "adde ipsum" > "$management" + echo "dela ipsum" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "dela_one_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_dela_two_content() +{ + init_test + + echo "adde lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "adde ipsum" > "$management" + echo "dela ipsum" > "$management" + cat "$preview" | grep "ipsum" > /dev/null + basic_test "dela_one_content" 2 test $? -ne 0 + + cleanup_test +} + +test_dela_all_five_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde lorem" > "$management" + echo "addf lorem" > "$management" + echo "adde lorem" > "$management" + echo "addf lorem" > "$management" + echo "dela lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "dela_all_five_number" 2 test "$no" -eq 0 + + cleanup_test +} + +test_dela_all_five_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde lorem" > "$management" + echo "addf lorem" > "$management" + echo "adde lorem" > "$management" + echo "addf lorem" > "$management" + echo "dela lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "dela_all_five_content" 2 test $? -ne 0 + + cleanup_test +} + +test_dela_none() +{ + init_test + + echo "adde lorem" > "$management" + echo "addf ipsum" > "$management" + echo "dela dolor" > "$management" 2> /dev/null + no=$(cat "$preview" | wc -l) + basic_test "dela_none" 2 test "$no" -eq 2 + + cleanup_test +} + +test_mix_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + echo "adde dolor" > "$management" + echo "addf sit" > "$management" + echo "adde sit" > "$management" + echo "addf sit" > "$management" + echo "addf lorem" > "$management" + echo "delf lorem" > "$management" + echo "delf dolor" > "$management" + echo "adde dolor" > "$management" + echo "dela sit" > "$management" + echo "delf ipsum" > "$management" + echo "delf ipsum" > "$management" + echo "dela dolor" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "mix_number" 3 test "$no" -eq 1 + + cleanup_test +} + +test_mix_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + echo "adde dolor" > "$management" + echo "addf sit" > "$management" + echo "adde sit" > "$management" + echo "addf sit" > "$management" + echo "addf lorem" > "$management" + echo "delf lorem" > "$management" + echo "delf dolor" > "$management" + echo "adde dolor" > "$management" + echo "dela sit" > "$management" + echo "delf ipsum" > "$management" + echo "delf ipsum" > "$management" + echo "dela dolor" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "mix_content" 3 test "$no" -eq 1 + + cleanup_test +} + +run_tests() +{ + test_module_exists + test_proc_folder_exits + test_preview_exits + test_management_exits + test_preview_is_readable + test_preview_is_not_writable + test_management_is_writable + test_management_is_not_readable + test_addf_no_error + test_addf_print + test_addf_print_once + test_adde_no_error + test_adde_print + test_adde_print_once + test_addf_two_number + test_addf_two_content + test_addf_same_twice_number + test_addf_same_twice_content + test_addf_same_twice_content_number + test_adde_two_number + test_adde_two_content + test_adde_same_twice_number + test_adde_same_twice_content + test_adde_same_twice_content_number + test_addf_front_after_addf + test_addf_front_after_adde + test_addf_front_after_adde_addf + test_addf_front_after_addf_adde + test_adde_end_after_addf + test_adde_end_after_adde + test_adde_end_after_adde_addf + test_adde_end_after_addf_adde + test_delf_no_error + test_delf_removes_front + test_delf_removes_end + test_delf_front_number + test_delf_front_content + test_delf_first + test_delf_none + test_dela_one_number + test_dela_one_content + test_dela_two_number + test_dela_two_content + test_dela_all_five_number + test_dela_all_five_content + test_dela_none + test_mix_number + test_mix_content +} + +run_tests | tee results.txt + +cat results.txt | grep '\[.*\]$' | awk -F '[] /[]+' ' +BEGIN { + sum=0 +} + +{ + sum += $(NF-2); +} + +END { + printf "\n%66s [%02d/90]\n", "Total:", sum; +}' + +rm results.txt diff --git a/tools/labs/templates/assignments/0-list/list.c b/tools/labs/templates/assignments/0-list/list.c new file mode 100644 index 00000000000000..c12c3e3efbbb60 --- /dev/null +++ b/tools/labs/templates/assignments/0-list/list.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * list.c - Linux kernel list API + * + * TODO 1/0: Fill in name / email + * Author: FirstName LastName + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#define PROCFS_MAX_SIZE 512 + +#define procfs_dir_name "list" +#define procfs_file_read "preview" +#define procfs_file_write "management" + +struct proc_dir_entry *proc_list; +struct proc_dir_entry *proc_list_read; +struct proc_dir_entry *proc_list_write; + +/* TODO 2/0: define your list! */ + +static int list_proc_show(struct seq_file *m, void *v) +{ + /* TODO 3/0: print your list. One element / line. */ + seq_puts(m, "Remove this line\n"); + + return 0; +} + +static int list_read_open(struct inode *inode, struct file *file) +{ + return single_open(file, list_proc_show, NULL); +} + +static int list_write_open(struct inode *inode, struct file *file) +{ + return single_open(file, list_proc_show, NULL); +} + +static ssize_t list_write(struct file *file, const char __user *buffer, + size_t count, loff_t *offs) +{ + char local_buffer[PROCFS_MAX_SIZE]; + unsigned long local_buffer_size = 0; + + local_buffer_size = count; + if (local_buffer_size > PROCFS_MAX_SIZE) + local_buffer_size = PROCFS_MAX_SIZE; + + memset(local_buffer, 0, PROCFS_MAX_SIZE); + if (copy_from_user(local_buffer, buffer, local_buffer_size)) + return -EFAULT; + + /* local_buffer contains your command written in /proc/list/management + * TODO 4/0: parse the command and add/delete elements. + */ + + return local_buffer_size; +} + +static const struct file_operations r_fops = { + .owner = THIS_MODULE, + .open = list_read_open, + .read = seq_read, + .release = single_release, +}; + +static const struct file_operations w_fops = { + .owner = THIS_MODULE, + .open = list_write_open, + .write = list_write, + .release = single_release, +}; + +static int list_init(void) +{ + proc_list = proc_mkdir(procfs_dir_name, NULL); + if (!proc_list) + return -ENOMEM; + + proc_list_read = proc_create(procfs_file_read, 0000, proc_list, + &r_fops); + if (!proc_list_read) + goto proc_list_cleanup; + + proc_list_write = proc_create(procfs_file_write, 0000, proc_list, + &w_fops); + if (!proc_list_write) + goto proc_list_read_cleanup; + + return 0; + +proc_list_read_cleanup: + proc_remove(proc_list_read); +proc_list_cleanup: + proc_remove(proc_list); + return -ENOMEM; +} + +static void list_exit(void) +{ + proc_remove(proc_list); +} + +module_init(list_init); +module_exit(list_exit); + +MODULE_DESCRIPTION("Linux kernel list API"); +/* TODO 5/0: Fill in your name / email address */ +MODULE_AUTHOR("FirstName LastName "); +MODULE_LICENSE("GPL v2"); diff --git a/tools/labs/templates/assignments/00-hello/Kbuild b/tools/labs/templates/assignments/00-hello/Kbuild new file mode 100644 index 00000000000000..9de37409e9e7c6 --- /dev/null +++ b/tools/labs/templates/assignments/00-hello/Kbuild @@ -0,0 +1,3 @@ +EXTRA_CFLAGS = -Wall -g -m32 + +obj-m = hello-world.o diff --git a/tools/labs/templates/assignments/00-hello/checker/_checker b/tools/labs/templates/assignments/00-hello/checker/_checker new file mode 100755 index 00000000000000..4b8a0869da2adf --- /dev/null +++ b/tools/labs/templates/assignments/00-hello/checker/_checker @@ -0,0 +1,12 @@ +#!/bin/sh + +/bin/dmesg -c > /dev/null 2>&1 +/sbin/rmmod hello-world > /dev/null 2>&1 +/sbin/insmod hello-world.ko +/bin/dmesg | grep 'Hello, World!' > /dev/null 2>&1 +if test $? -eq 0; then + echo "Test PASSED." +else + echo "Test FAILED." +fi +/sbin/rmmod hello-world > /dev/null 2>&1 diff --git a/tools/labs/templates/assignments/00-hello/hello-world.c b/tools/labs/templates/assignments/00-hello/hello-world.c new file mode 100644 index 00000000000000..c8c067b1ddf0ad --- /dev/null +++ b/tools/labs/templates/assignments/00-hello/hello-world.c @@ -0,0 +1,23 @@ +#include +#include +#include + +MODULE_DESCRIPTION("Hello World"); +MODULE_AUTHOR("Psoru Lesfo Rever"); +MODULE_LICENSE("GPL"); + + +static int hello_init(void) +{ + /* TODO: Print "Hello, World!" */ + pr_info("Hello, World!\n"); + + return 0; +} + +static void hello_exit(void) +{ +} + +module_init(hello_init); +module_exit(hello_exit); diff --git a/tools/labs/templates/assignments/1-tracer/checker/Makefile b/tools/labs/templates/assignments/1-tracer/checker/Makefile new file mode 100644 index 00000000000000..ed86666ec4df3c --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/Makefile @@ -0,0 +1,19 @@ +LDFLAGS = -m32 + +.PHONY: all clean + +all: tracer_test + +tracer_test: tracer_test.o + +tracer_test.o: _test/tracer_test.c + make -C _test + ln -sfn _test/tracer_test.o $@ + +run: all + ./_checker + +clean: + -rm -f *~ + -rm -f tracer_test.o tracer_test + -make -C _test clean diff --git a/tools/labs/templates/assignments/1-tracer/checker/README b/tools/labs/templates/assignments/1-tracer/checker/README new file mode 100644 index 00000000000000..75c90417a94fb6 --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/README @@ -0,0 +1,113 @@ += KPROBE BASED TRACER TEST SUITE == + +Test suite for kprobe based tracer + +== FILES == + +README + * this file + +Makefile + * Makefile for automating the build process + +_checker + * script to run all tests defined in _test/tracer_test.c + +_test/Makefile + * test suite internal Makefile (creates necessary object files) + +_test/tracer_test.c + * test suite for Kprobe Based Tracer + +_test/tracer_test.h + * test suite header file + +_test/tracer.h + * kprobe tracer header file (macros and structures) + +_test/test.h + * useful macros for testing + +_test/debug.h + * debugging macros + +_test/util.h + * useful macros for generic use (error processing) + +_helper/* + * helper kernel module for simulating kprobed ops + +== BUILDING == + +Use the linux-kernel-labs build infrastructure, i.e run + + make build + +in the tools/labs/ folder. + +If you want to do things by hand, you have to use the Makefile in the +current folder to run build the executable: + + make + +and the kernel module-specific Makefile command to build the kernel module: + +make -C $(KDIR) M=$(KDIR)/tools/labs/skels/assignments/1-tracer/checker/_helper/ ARCH=x86 modules + +== RUNNING == + +Use the linux-kernel-labs run infrastructure, i.e. run + + make copy + +This copies the skel/ subfolder in the future root filesystem image for the +QEMU/KVM virtual machine. + +If you want to do things by hand, copy your tracer.ko module and _checker, +tracer_test and tracer_helper.ko to fsimg/root directory on your QEMU/KVM +virtual machine. + +In order to run the test suite you can either use the _checker +script or run the tracer_test executable. + +The _checker script runs all tests and computes assignment grade: + + ./_checker + +In order to run a specific test pass the test number (1 .. 10) to the +tracer_test executable. + + ./tracer_test 5 + +== TESTS == + +Tests are basically unit tests. A single function in the test_fun_array (see +tracer_test.c) is called each time the tracer_test executable is invoked, +testing a single functionality (and assuming previous tests have been run and +passed). + +The EXIT_IF_FAIL macro (see test.h) is unnecessary since after each test, the +program completes. + +Each test function follows the unit test pattern: initialization, action, +evaluation. The test macro (see test.h) is invoked at the end of each test +for evaluating and grading the test. + +== DEBUGGING == + +The debug.h header file consists of several macros useful for debugging +(dprintf, dlog). There are multiple uses of these macros throughout the above +files. + +In order to turn debug messages on, you must define the DEBUG macro, either in +a header file, or, I suggest, in the Makefile. The LOG_LEVEL macro limits the +log message types that are to be printed, by default LOG_WARNING (see enum in +debug.h). You may redefine it in a header file or in the Makefile. + +Rapid enabling of debug messages is achieved by commenting out the CPPFLAGS +line in the Makefile. It turns on debugging and enables all log messages +(LOG_DEBUG). + +== OTHER == + +srand48() and drand48() are used for generating random numbers. diff --git a/tools/labs/templates/assignments/1-tracer/checker/_checker b/tools/labs/templates/assignments/1-tracer/checker/_checker new file mode 100755 index 00000000000000..f5a4a7da1b5d26 --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_checker @@ -0,0 +1,24 @@ +#!/bin/sh + +first_test=1 +last_test=10 +executable=tracer_test + +for i in $(seq $first_test $last_test); do + ./"$executable" $i +done | tee results.txt + +cat results.txt | grep '\[.*\]$' | awk -F '[] /[]+' ' +BEGIN { + sum=0 +} + +{ + sum += $2; +} + +END { + printf "\n%66s [%03d/100]\n", "Total:", sum; +}' + +rm -f results.txt diff --git a/tools/labs/templates/assignments/1-tracer/checker/_helper/Kbuild b/tools/labs/templates/assignments/1-tracer/checker/_helper/Kbuild new file mode 100644 index 00000000000000..7a7b493cfd9a0f --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_helper/Kbuild @@ -0,0 +1,4 @@ +EXTRA_CFLAGS = -Wall -g + +obj-m = tracer_helper.o + diff --git a/tools/labs/templates/assignments/1-tracer/checker/_helper/helper.h b/tools/labs/templates/assignments/1-tracer/checker/_helper/helper.h new file mode 100644 index 00000000000000..3cb30e03c9dcb9 --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_helper/helper.h @@ -0,0 +1,35 @@ +#ifndef _HELPER__ +#define _HELPER__ + +#include + +#define NAMESIZE 64 +#define MCOUNT 128 + +#define PREPARE_TEST _IOW('t', 19, unsigned int) +#define START_TEST _IOW('t', 20, unsigned int) +#define STOP_TEST _IOW('t', 21, unsigned int) + +/*XXX match test_params with tracers_stats + * perhaps use the same struct + */ +struct test_params { + pid_t pid; + char thread_name[NAMESIZE]; + int idx; /* index for multi-kthreaded test */ + /* + * kcalls: 5 + * alloc : [1024] [8] [128] [10] [128] + * free : [0] [0] [1] [0] [1] + */ + int kcalls; /* number of kmalloc calls */ + int alloc[MCOUNT]; /* sizes of kmalloc allocations */ + int free[MCOUNT]; /* intmap for which allocations to free */ + int sched; + int up; + int down; + int lock; + int unlock; +}; + +#endif diff --git a/tools/labs/templates/assignments/1-tracer/checker/_helper/tracer_helper.c b/tools/labs/templates/assignments/1-tracer/checker/_helper/tracer_helper.c new file mode 100644 index 00000000000000..1c93d230c45511 --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_helper/tracer_helper.c @@ -0,0 +1,201 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "helper.h" + + +#define HELPER_MINOR 30 + +/* number of paralel kernel threads */ +static wait_queue_head_t wq[MCOUNT]; + +/*XXX: this looks like a poor design, please refactor */ +static int thread_prepared[MCOUNT] = {0, }; +static int thread_running[MCOUNT] = {0, }; +static int thread_should_stop[MCOUNT] = {0, }; +static struct task_struct *t[MCOUNT] = {NULL, }; + +static struct test_params tp[MCOUNT]; +static int tcount; + +void do_work(void) +{ + int i, j; + int a = 0; + + for (i = 0; i < 1000; i++) + for (j = 0; j < 1000; j++) + a = i * j; +} + +int thread_fn(void *data) +{ + int i; + + void *k_addr[MCOUNT]; + struct semaphore sem; + struct mutex lock; + + struct test_params *tp; + + tp = (struct test_params *)data; + + thread_prepared[tp->idx] = 1; + wake_up_interruptible(&wq[tp->idx]); + + wait_event_interruptible(wq[tp->idx], thread_running[tp->idx] == 1); + + for (i = 0; i < tp->kcalls; i++) + k_addr[i] = kmalloc(tp->alloc[i], GFP_KERNEL); + + /*XXX: do proper cleanup, avoid memory leaks */ + for (i = 0; i < tp->kcalls; i++) + if (tp->free[i] && k_addr[i]) + kfree(k_addr[i]); + + for (i = 0; i < tp->sched; i++) + schedule(); + + /* ***: use tp->down for down_interruptible */ + sema_init(&sem, 1); + for (i = 0; i < tp->up; i++) { + up(&sem); + do_work(); + down_interruptible(&sem); + } + /* ***: use to->unlock for mutex_unlock */ + mutex_init(&lock); + for (i = 0; i < tp->lock; i++) { + mutex_lock(&lock); + do_work(); + mutex_unlock(&lock); + } + + wait_event_interruptible(wq[tp->idx], thread_should_stop[tp->idx] == 1); + + /* reset state machine */ + thread_prepared[tp->idx] = 0; + thread_running[tp->idx] = 0; + thread_should_stop[tp->idx] = 0; + + return 0; +} +static int helper_open(struct inode *inode, struct file *file) +{ +#ifdef DEBUG + pr_info("tracer-helper: open\n"); +#endif + return 0; +} + +static int helper_release(struct inode *inode, struct file *file) +{ +#ifdef DEBUG + pr_info("tracer-helper: close\n"); +#endif + return 0; +} + +static long helper_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + int ret = 0; + + switch (cmd) { + case PREPARE_TEST: + if (copy_from_user(&tp[tcount], (struct test_param *)arg, + sizeof(tp[tcount]))) { + pr_info("Error copy to user\n"); + return -EFAULT; + } + t[tp[tcount].idx] = kthread_run(thread_fn, &tp[tcount], "%s", + tp[tcount].thread_name); + if (!t[tp[tcount].idx]) { + pr_info("Could not create thread!\n"); + return -ENOMEM; + } + + ret = t[tp[tcount].idx]->pid; + wait_event_interruptible(wq[tp[tcount].idx], + thread_prepared[tp[tcount].idx] == 1); + tcount++; + break; + case START_TEST: +#if 0 + pr_info("%s: start test for idx %lu\n", __func__, arg); +#endif + thread_running[arg] = 1; + wake_up_interruptible(&wq[arg]); + break; + case STOP_TEST: +#if 0 + pr_info("%s: stop test for idx %lu\n", __func__, arg); +#endif + thread_should_stop[arg] = 1; + wake_up_interruptible(&wq[arg]); + kthread_stop(t[arg]); + break; + default: + break; + } + + return ret; +} + +static const struct file_operations tracer_fops = { + .open = helper_open, + .unlocked_ioctl = helper_ioctl, + .release = helper_release, +}; + +static struct miscdevice helper_dev = { + .minor = HELPER_MINOR, + .name = "helper", + .fops = &tracer_fops, +}; + +static int __init tracer_helper_init(void) +{ + int rc, i; + + rc = misc_register(&helper_dev); + if (rc < 0) { + pr_err("misc_register: fail\n"); + return rc; + } + + for (i = 0; i < MCOUNT; i++) + init_waitqueue_head(&wq[i]); +#ifdef DEBUG + pr_info("tracer-helper: init\n"); +#endif + return 0; +} + +static void __exit tracer_helper_exit(void) +{ + misc_deregister(&helper_dev); +#ifdef DEBUG + pr_info("tracer-helper: exit\n"); +#endif +} + +MODULE_AUTHOR("Daniel Baluta"); +MODULE_LICENSE("GPL"); + +module_init(tracer_helper_init); +module_exit(tracer_helper_exit); diff --git a/tools/labs/templates/assignments/1-tracer/checker/_test/Makefile b/tools/labs/templates/assignments/1-tracer/checker/_test/Makefile new file mode 100644 index 00000000000000..00160bc9b3647c --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_test/Makefile @@ -0,0 +1,10 @@ +#CPPFLAGS = -DDEBUG -DLOG_LEVEL=LOG_DEBUG +CFLAGS = -Wall -g -m32 + +.PHONY: all clean + +all: tracer_test.o + +clean: + -rm -f *~ + -rm -f tracer_test.o diff --git a/tools/labs/templates/assignments/1-tracer/checker/_test/debug.h b/tools/labs/templates/assignments/1-tracer/checker/_test/debug.h new file mode 100644 index 00000000000000..debdeccf492038 --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_test/debug.h @@ -0,0 +1,77 @@ +/* + * debugging macros + * heavily inspired by previous work and Internet resources + * + * uses C99 variadic macros + * uses non-standard usage of the token-paste operator (##) for + * removing the comma symbol (,) when not followed by a token + * uses non-standard __FUNCTION__ macro (MSVC doesn't support __func__) + * tested on gcc 4.4.5 and Visual Studio 2008 (9.0), compiler version 15.00 + * + * 2011, Razvan Deaconescu, razvan.deaconescu@cs.pub.ro + */ + +#ifndef DEBUG_H_ +#define DEBUG_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* log levels */ +enum { + LOG_EMERG = 1, + LOG_ALERT, + LOG_CRIT, + LOG_ERR, + LOG_WARNING, + LOG_NOTICE, + LOG_INFO, + LOG_DEBUG +}; + +/* + * initialize default loglevel (for dlog) + * may be redefined in the including code + */ + +#ifndef LOG_LEVEL +#define LOG_LEVEL LOG_WARNING +#endif + +/* + * define DEBUG macro as a compiler option: + * -DDEBUG for GCC + * /DDEBUG for MSVC + */ + +#if defined DEBUG +#define dprintf(format, ...) \ + fprintf(stderr, " [%s(), %s:%u] " format, \ + __func__, __FILE__, __LINE__, \ + ##__VA_ARGS__) +#else +#define dprintf(format, ...) \ + do { \ + } while (0) +#endif + +#if defined DEBUG +#define dlog(level, format, ...) \ + do { \ + if (level <= LOG_LEVEL) \ + dprintf(format, ##__VA_ARGS__); \ + } while (0) +#else +#define dlog(level, format, ...) \ + do { \ + } while (0) +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/labs/templates/assignments/1-tracer/checker/_test/helper.h b/tools/labs/templates/assignments/1-tracer/checker/_test/helper.h new file mode 100644 index 00000000000000..3cb30e03c9dcb9 --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_test/helper.h @@ -0,0 +1,35 @@ +#ifndef _HELPER__ +#define _HELPER__ + +#include + +#define NAMESIZE 64 +#define MCOUNT 128 + +#define PREPARE_TEST _IOW('t', 19, unsigned int) +#define START_TEST _IOW('t', 20, unsigned int) +#define STOP_TEST _IOW('t', 21, unsigned int) + +/*XXX match test_params with tracers_stats + * perhaps use the same struct + */ +struct test_params { + pid_t pid; + char thread_name[NAMESIZE]; + int idx; /* index for multi-kthreaded test */ + /* + * kcalls: 5 + * alloc : [1024] [8] [128] [10] [128] + * free : [0] [0] [1] [0] [1] + */ + int kcalls; /* number of kmalloc calls */ + int alloc[MCOUNT]; /* sizes of kmalloc allocations */ + int free[MCOUNT]; /* intmap for which allocations to free */ + int sched; + int up; + int down; + int lock; + int unlock; +}; + +#endif diff --git a/tools/labs/templates/assignments/1-tracer/checker/_test/test.h b/tools/labs/templates/assignments/1-tracer/checker/_test/test.h new file mode 100644 index 00000000000000..49ba56f0eeb525 --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_test/test.h @@ -0,0 +1,63 @@ +/* + * generic test suite + * + * test macros and headers + */ + +#ifndef TEST_H_ +#define TEST_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* to be defined by calling program */ +extern int max_points; + +/* + * uncommend EXIT_IF_FAIL macro in order to stop test execution + * at first failed test + */ + +/*#define EXIT_IF_FAIL 1*/ + +#if defined(EXIT_IF_FAIL) +#define test_do_fail(points) \ + do { \ + printf("failed\n"); \ + exit(EXIT_FAILURE); \ + } while (0) +#else +#define test_do_fail(points) \ + printf("failed [ 0/%3d]\n", max_points) +#endif + +#define test_do_pass(points) \ + printf("passed [%3d/%3d]\n", points, max_points) + +#define test(message, test, points) \ + do { \ + size_t _i; \ + int t = (test); \ + \ + printf("%s", message); \ + fflush(stdout); \ + \ + for (_i = 0; _i < 60 - strlen(message); _i++) \ + putchar('.'); \ + \ + if (!t) \ + test_do_fail(points); \ + else \ + test_do_pass(points); \ + \ + fflush(stdout); \ + } while (0) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/labs/templates/assignments/1-tracer/checker/_test/tracer.h b/tools/labs/templates/assignments/1-tracer/checker/_test/tracer.h new file mode 100644 index 00000000000000..7a55257b770aaf --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_test/tracer.h @@ -0,0 +1,21 @@ +/* + * SO2 kprobe based tracer header file + * + * this is shared with user space + */ + +#ifndef TRACER_H__ +#define TRACER_H__ 1 + +#include +#ifndef __KERNEL__ +#include +#endif /* __KERNEL__ */ + +#define TRACER_DEV_MINOR 42 +#define TRACER_DEV_NAME "tracer" + +#define TRACER_ADD_PROCESS _IOW(_IOC_WRITE, 42, pid_t) +#define TRACER_REMOVE_PROCESS _IOW(_IOC_WRITE, 43, pid_t) + +#endif /* TRACER_H_ */ diff --git a/tools/labs/templates/assignments/1-tracer/checker/_test/tracer_test.c b/tools/labs/templates/assignments/1-tracer/checker/_test/tracer_test.c new file mode 100644 index 00000000000000..175ebfa07caff5 --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_test/tracer_test.c @@ -0,0 +1,779 @@ +/* + * SO2 Kprobe based tracer - test suite + * + * Authors: + * Daniel Baluta + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "test.h" +#include "debug.h" +#include "util.h" + +#include "tracer.h" +#include "tracer_test.h" +#include "helper.h" + +/* use this to enable stats debugging */ +#if 0 +#define DEBUG +#endif + +#define MSECS 1000 + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +struct tracer_stats { + pid_t tr_pid; + int tr_alloc; + int tr_free; + int tr_mem; + int tr_mem_free; + int tr_sched; + int tr_up; + int tr_down; + int tr_lock; + int tr_unlock; +}; + +struct test_case { + char test_name[NAMESIZE]; + int score; + struct test_params test_params; +}; + +struct tracer_stats ts[MCOUNT]; + +struct test_case tc[] = { + /* 0 */ + { + .test_name = "test_simple_kmalloc", + .test_params = { + .thread_name = "xthread-0", + .kcalls = 1, + .alloc = {1024, }, + .idx = 0, + }, + .score = 5, + }, + /* 1 */ + { + .test_name = "test_simple_kfree", + .test_params = { + .thread_name = "xthread-1", + .kcalls = 1, + .alloc = {4096, }, + .free = {1, }, + .idx = 1, + }, + .score = 5, + }, + /* 2 */ + { + .test_name = "test_simple_sched", + .test_params = { + .thread_name = "xthread-2", + .sched = 1, + .idx = 2, + }, + .score = 4, + }, + /* 3 */ + { + .test_name = "test_simple_up_down", + .test_params = { + .thread_name = "xthread-3", + .up = 1, + .down = 1, + .idx = 3, + }, + .score = 4, + }, + /* 4 */ + { + .test_name = "test_simple_lock_unlock", + .test_params = { + .thread_name = "xthread-4", + .lock = 1, + .unlock = 1, + .idx = 4, + }, + .score = 4, + }, + + /* 5 */ + { + .test_name = "test_medium_kmalloc", + .test_params = { + .thread_name = "xthread-5", + .kcalls = 16, + .alloc = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + .idx = 5, + }, + .score = 5, + }, + + /* 6 */ + { + .test_name = "test_medium_free", + .test_params = { + .thread_name = "xthread-6", + .kcalls = 12, + .alloc = {1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048}, + .free = {0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1}, + .idx = 6, + }, + .score = 5, + }, + + /* 7 */ + { + .test_name = "test_medium_sched", + .test_params = { + .thread_name = "xthread-7", + .sched = 30, + .idx = 7, + }, + .score = 5, + }, + + /* 8 */ + { + .test_name = "test_medium_up_down", + .test_params = { + .thread_name = "xthread-8", + .up = 32, + .down = 32, + .idx = 8, + }, + .score = 4, + }, + /* 9 */ + { + .test_name = "test_medium_lock_unlock", + .test_params = { + .thread_name = "xthread-9", + .lock = 32, + .unlock = 32, + .idx = 9, + }, + .score = 4, + }, + /* 10 */ + { + .test_name = "test_medium_combined", + .test_params = { + .thread_name = "xthread-9", + .kcalls = 9, + .alloc = {1024, 512, 128, 64, 32, 64, 128, 512, 1024}, + .free = {1, 1, 1, 1, 1, 1, 1, 1, 1}, + .lock = 8, + .unlock = 8, + .up = 12, + .down = 12, + .idx = 10, + }, + .score = 5, + }, +}; + +/* declared in test.h; used for printing information in test macro */ +int max_points = 100; + +/* + * Do initialization for tracer test functions. + */ + +static void init_test(void) +{ + int rc; + + rc = system("insmod " MODULE_FILENAME); + DIE(rc != 0, "init_test"); +} + +static void init_test2(int *fd) +{ + int rc; + + system("insmod " MODULE_FILENAME); + + rc = open("/dev/tracer", O_RDONLY); + DIE(rc < 0, "init_test2"); + + *fd = rc; +} + +/* + * Do cleanup for tracer test functions. + */ + +static void cleanup_test(void) +{ + system("rmmod " MODULE_NAME); +} + +static void cleanup_test2(int fd) +{ + close(fd); + + system("rmmod " MODULE_NAME); +} + +/* + * Do initialization for tracer helper test module + */ +static void init_helper(int *fd) +{ + int rc; + + system("insmod " HELPER_MODULE_FILENAME); + + rc = open("/dev/helper", O_RDONLY); + DIE(rc < 0, "init helper"); + + *fd = rc; +} + +/* + * Do cleanup for tracer helper test module + */ + +static void cleanup_helper(int fd) +{ + close(fd); + + system("rmmod " HELPER_MODULE_NAME); +} + + +/* + * Check for successful module insertion and removal from the kernel. + */ + +static void test_insmod_rmmod(void) +{ + int rc; + + rc = system("insmod " MODULE_FILENAME); + test("test_insmod", rc == 0, 1); + + rc = system("rmmod " MODULE_NAME); + test("test_rmmod", rc == 0, 1); + + rc = system("insmod " MODULE_FILENAME); + test(__func__, rc == 0, 1); + + system("rmmod " MODULE_NAME); +} + +static void test_open_dev_tracer(void) +{ + int rc; + char dev_name[64]; + + init_test(); + snprintf(dev_name, 63, "/dev/%s", TRACER_DEV_NAME); + + rc = open(dev_name, O_RDONLY); + test(__func__, rc >= 0, 1); + close(rc); + + cleanup_test(); +} + +static void test_dev_minor_major(void) +{ + int rc; + struct stat buf; + + init_test(); + + rc = lstat("/dev/tracer", &buf); + if (rc < 0) { + perror("lstat"); + exit(-1); + } + test(__func__, major(buf.st_rdev) == 10 && + minor(buf.st_rdev) == 42, 1); + + cleanup_test(); +} + +/* + * Check for proc entry for kprobe stats + */ + +static void test_proc_entry_exists_after_insmod(void) +{ + int rc; + + init_test(); + + rc = system("ls /proc/tracer > /dev/null 2>&1"); + test(__func__, rc == 0, 2); + + cleanup_test(); +} + +static void test_proc_entry_inexistent_after_rmmod(void) +{ + int rc; + + init_test(); + cleanup_test(); + + rc = system("ls /proc/tracer > /dev/null 2>&1"); + test(__func__, rc != 0, 2); +} + +int tracer_proc_check_values(struct tracer_stats *st, + struct test_case *tc, int no) +{ + int idx, idz, idk;/* really? */ + int a, b, c, d, e, f, g, h, i, j;/* no, no */ + int total_mem = 0; + int total_free = 0; + int no_free = 0; + int ok = 0; + /* this is embarassing - O(n^2) - stats are not sorted by pid */ + + for (idx = 0; idx < no; idx++) { + ok = 0; + for (idk = 0; idk < no; idk++) { + if (st[idk].tr_pid != tc[idx].test_params.pid) + continue; + ok = 1; + total_mem = 0; + total_free = 0; + no_free = 0; + + for (idz = 0; idz < tc[idx].test_params.kcalls; idz++) { + total_mem += tc[idx].test_params.alloc[idz]; + total_free += tc[idx].test_params.free[idz] * + tc[idx].test_params.alloc[idz]; + if (tc[idx].test_params.free[idz]) + no_free++; + } + + a = (st[idk].tr_pid == tc[idx].test_params.pid); + b = (st[idk].tr_alloc == tc[idx].test_params.kcalls); + dprintf("tr_alloc (%d): got %d, expected %d\n", + st[idk].tr_pid, st[idk].tr_alloc, + tc[idx].test_params.kcalls); + + c = (st[idk].tr_free == no_free); + dprintf("tr_free (%d): got %d, expected %d\n", + st[idk].tr_pid, st[idk].tr_free, no_free); + + d = (st[idk].tr_mem == total_mem); + dprintf("tr_mem (%d): got %d, expected %d\n", + st[idk].tr_pid, st[idk].tr_mem, total_mem); + + e = (st[idk].tr_mem_free == total_free); + dprintf("tr_free (%d): got %d, expected %d\n", + st[idk].tr_pid, st[idk].tr_mem_free, + total_free); + + f = (st[idk].tr_sched >= tc[idx].test_params.sched); + dprintf("tr_sched (%d): got %d, expected %d\n", + st[idk].tr_pid, st[idk].tr_sched, + tc[idx].test_params.sched); + + g = (st[idk].tr_up == tc[idx].test_params.up); + dprintf("tr_up (%d): got %d, expected %d\n", + st[idk].tr_pid, st[idk].tr_up, + tc[idx].test_params.up); + + h = (st[idk].tr_down == tc[idx].test_params.down); + dprintf("tr_down (%d): got %d, expected %d\n", + st[idk].tr_pid, st[idk].tr_down, + tc[idx].test_params.down); + + i = (st[idk].tr_lock == tc[idx].test_params.lock); + dprintf("tr_lock (%d): got %d, expected %d\n", + st[idk].tr_pid, st[idk].tr_lock, + tc[idx].test_params.lock); + + j = (st[idk].tr_unlock == tc[idx].test_params.unlock); + dprintf("tr_unlock (%d): got %d, expected %d\n", + st[idk].tr_pid, st[idk].tr_unlock, + tc[idx].test_params.unlock); + + if (!a || !b || !c || !d || + !e || !f || !g || !h || !i || !j) + return 0; + } + } + return ok; +} + +static void tracer_proc_read_values(struct tracer_stats *st, int no) +{ + char buffer[256]; + FILE *f; + int i; + + f = fopen("/proc/tracer", "rt"); + DIE(f == NULL, "tracer_proc_read_value"); + + /* skip header line */ + fgets(buffer, 256, f); + + for (i = 0; i < no; i++) { + fscanf(f, "%d %d %d %d %d %d %d %d %d %d", + &st[i].tr_pid, &st[i].tr_alloc, &st[i].tr_free, + &st[i].tr_mem, &st[i].tr_mem_free, &st[i].tr_sched, + &st[i].tr_up, &st[i].tr_down, &st[i].tr_lock, + &st[i].tr_unlock); + } + fclose(f); +} + +/* + * creates a process prepared to run with @tp params + * returns the pid of the newly created process + */ +void prepare_helper(int fd, struct test_params *tp, pid_t *pid) +{ + int rc; + + rc = ioctl(fd, PREPARE_TEST, tp); + DIE(rc < 0, "prepare helper"); + *pid = rc; +} + +void start_helper(int fd, int idx) +{ + int rc; + + rc = ioctl(fd, START_TEST, idx); + DIE(rc < 0, "start helper"); +} + +void stop_helper(int fd, int idx) +{ + int rc; + + rc = ioctl(fd, STOP_TEST, idx); + DIE(rc < 0, "stop helper"); +} + +/* XXX: we should really check the return codes */ +void trace_process(int fd, pid_t pid) +{ + int rc; + + rc = ioctl(fd, TRACER_ADD_PROCESS, pid); + DIE(rc < 0, "trace_process"); +} + +void untrace_process(int fd, pid_t pid) +{ + int rc; + + rc = ioctl(fd, TRACER_REMOVE_PROCESS, pid); + DIE(rc < 0, "untrace process"); +} +static void test_single(void) +{ + int fd, fdh, i, rc; + + init_test2(&fd); + init_helper(&fdh); + + for (i = 0; i < 11; i++) { + + prepare_helper(fdh, &tc[i].test_params, &tc[i].test_params.pid); + usleep(400 * MSECS); + + trace_process(fd, tc[i].test_params.pid); + usleep(400 * MSECS); + + start_helper(fdh, tc[i].test_params.idx); + usleep(400 * MSECS); + + /* check proc for schedule stats */ + tracer_proc_read_values(&ts[0], 1); + rc = tracer_proc_check_values(&ts[0], &tc[i], 1); + + memset(&ts[0], 0, sizeof(struct tracer_stats)); + + untrace_process(fd, tc[i].test_params.pid); + usleep(400 * MSECS); + stop_helper(fdh, tc[i].test_params.idx); + + usleep(400 * MSECS); + test(tc[i].test_name, rc == 1, tc[i].score); + } + + cleanup_helper(fdh); + cleanup_test2(fd); +} + + +static void test_multiple_zero_stats(void) +{ + int fd, fdh, i, rc; + struct test_case mz[16]; + struct tracer_stats zstats[16]; /* zstats, mz, wtf? */ + + for (i = 0; i < 16; i++) { + memset(&mz[i], 0, sizeof(struct test_case)); + snprintf(mz[i].test_params.thread_name, 16, "xthread-%d", i); + mz[i].test_params.idx = i; + } + + init_test2(&fd); + init_helper(&fdh); + + for (i = 0; i < 16; i++) { + prepare_helper(fdh, &mz[i].test_params, &mz[i].test_params.pid); + + trace_process(fd, mz[i].test_params.pid); + } + + usleep(400 * MSECS); + for (i = 0; i < 16; i++) + start_helper(fdh, mz[i].test_params.idx); + + usleep(400 * MSECS); + + /* check proc for schedule stats */ + + tracer_proc_read_values(&zstats[0], 16); + rc = tracer_proc_check_values(&zstats[0], &mz[0], 16); + + for (i = 0; i < 16; i++) + untrace_process(fd, mz[i].test_params.pid); + + for (i = 0; i < 16; i++) + stop_helper(fdh, mz[i].test_params.idx); + usleep(400 * MSECS); + + test("test_multiple_zero_stats", rc == 1, 5); + + cleanup_helper(fdh); + cleanup_test2(fd); +} + +/* + * FIXME: duplicate code + */ +static void test_multiple_nonzero_stats(void) +{ + int fd, fdh, i, rc; + struct test_case mz[16]; + struct tracer_stats zstats[16]; /* zstats, mz, wtf? */ + + for (i = 0; i < 16; i++) { + memset(&mz[i], 0, sizeof(struct test_case)); + snprintf(mz[i].test_params.thread_name, 16, "xthread-%d", i); + mz[i].test_params.up = i; + mz[i].test_params.down = i; + mz[i].test_params.sched = i; + mz[i].test_params.lock = i; + mz[i].test_params.unlock = i; + mz[i].test_params.idx = i; + } + + init_test2(&fd); + init_helper(&fdh); + + for (i = 0; i < 16; i++) { + prepare_helper(fdh, &mz[i].test_params, &mz[i].test_params.pid); + + trace_process(fd, mz[i].test_params.pid); + } + + usleep(400 * MSECS); + for (i = 0; i < 16; i++) + start_helper(fdh, mz[i].test_params.idx); + + usleep(400 * MSECS); + + /* check proc for schedule stats */ + + tracer_proc_read_values(&zstats[0], 16); + rc = tracer_proc_check_values(&zstats[0], &mz[0], 16); + + for (i = 0; i < 16; i++) + untrace_process(fd, mz[i].test_params.pid); + + for (i = 0; i < 16; i++) + stop_helper(fdh, mz[i].test_params.idx); + usleep(400 * MSECS); + + test("test_multiple_nonzero_stats", rc == 1, 12); + + cleanup_helper(fdh); + cleanup_test2(fd); +} + +/* + * FIXME: duplicate code + */ +static void test_decent_alloc_free(void) +{ + int fd, fdh, i, rc, j; + struct test_case mz[32]; + struct tracer_stats zstats[32]; /* zstats, mz, wtf? */ + + for (i = 0; i < 32; i++) { + memset(&mz[i], 0, sizeof(struct test_case)); + snprintf(mz[i].test_params.thread_name, 16, "xthread-%d", i); + mz[i].test_params.kcalls = 32; + for (j = 0; j < 32; j++) { + mz[i].test_params.alloc[j] = 8 * j * (i+1); + mz[i].test_params.free[j] = 1; + } + + mz[i].test_params.idx = i; + } + + init_test2(&fd); + init_helper(&fdh); + + for (i = 0; i < 32; i++) { + prepare_helper(fdh, &mz[i].test_params, &mz[i].test_params.pid); + + trace_process(fd, mz[i].test_params.pid); + } + + usleep(800 * MSECS); + for (i = 0; i < 32; i++) + start_helper(fdh, mz[i].test_params.idx); + + usleep(800 * MSECS); + + /* check proc for schedule stats */ + + tracer_proc_read_values(&zstats[0], 32); + rc = tracer_proc_check_values(&zstats[0], &mz[0], 32); + + for (i = 0; i < 32; i++) + untrace_process(fd, mz[i].test_params.pid); + + for (i = 0; i < 32; i++) + stop_helper(fdh, mz[i].test_params.idx); + usleep(800 * MSECS); + + test("test_decent_alloc_free", rc == 1, 12); + + cleanup_helper(fdh); + cleanup_test2(fd); +} + +/* + * FIXME: duplicate code + */ +static void test_mini_stress(void) +{ + int fd, fdh, i, rc; + struct test_case mz[32]; + struct tracer_stats zstats[32]; /* zstats, mz, wtf? */ + + for (i = 0; i < 32; i++) { + memset(&mz[i], 0, sizeof(struct test_case)); + snprintf(mz[i].test_params.thread_name, 16, "xthread-%d", i); + mz[i].test_params.up = 512 + i; + mz[i].test_params.down = 512 + i; + mz[i].test_params.sched = i; + mz[i].test_params.lock = 128 + i; + mz[i].test_params.unlock = 128 + i; + mz[i].test_params.idx = i; + } + + init_test2(&fd); + init_helper(&fdh); + + for (i = 0; i < 32; i++) { + prepare_helper(fdh, &mz[i].test_params, &mz[i].test_params.pid); + + trace_process(fd, mz[i].test_params.pid); + } + + usleep(800 * MSECS); + for (i = 0; i < 32; i++) + start_helper(fdh, mz[i].test_params.idx); + + usleep(800 * MSECS); + + /* check proc for schedule stats */ + + tracer_proc_read_values(&zstats[0], 32); + rc = tracer_proc_check_values(&zstats[0], &mz[0], 32); + + for (i = 0; i < 32; i++) + untrace_process(fd, mz[i].test_params.pid); + + for (i = 0; i < 32; i++) + stop_helper(fdh, mz[i].test_params.idx); + usleep(800 * MSECS); + + test("test_mini_stress", rc == 1, 12); + + cleanup_helper(fdh); + cleanup_test2(fd); +} + + + + +static void (*test_fun_array[])(void) = { + NULL, + test_insmod_rmmod, + test_open_dev_tracer, + test_dev_minor_major, + test_proc_entry_exists_after_insmod, + test_proc_entry_inexistent_after_rmmod, + test_single, + test_multiple_zero_stats, + test_multiple_nonzero_stats, + test_decent_alloc_free, + test_mini_stress, +}; + +/* + * Usage message for invalid executable call. + */ + +static void usage(const char *argv0) +{ + fprintf(stderr, "Usage: %s test_no\n\n", argv0); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) +{ + int test_idx; + + if (argc != 2) + usage(argv[0]); + + test_idx = atoi(argv[1]); + + if (test_idx < 1 || + test_idx >= ARRAY_SIZE(test_fun_array)) { + fprintf(stderr, "Error: test index %d is out of bounds\n", + test_idx); + exit(EXIT_FAILURE); + } + + srand(time(NULL)); + srand48(time(NULL)); + test_fun_array[test_idx](); + + return 0; +} diff --git a/tools/labs/templates/assignments/1-tracer/checker/_test/tracer_test.h b/tools/labs/templates/assignments/1-tracer/checker/_test/tracer_test.h new file mode 100644 index 00000000000000..ac189824743ee0 --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_test/tracer_test.h @@ -0,0 +1,26 @@ +/* + * SO2 Kprobe based tracer - test suite specific header + * + * Authors: + * Daniel Baluta + */ + +#ifndef TRACER_TEST_H_ +#define TRACER_TEST_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/* tracer test suite macros and structures */ +#define MODULE_NAME "tracer" +#define MODULE_FILENAME MODULE_NAME ".ko" + +#define HELPER_MODULE_NAME "tracer_helper" +#define HELPER_MODULE_FILENAME HELPER_MODULE_NAME ".ko" + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/labs/templates/assignments/1-tracer/checker/_test/util.h b/tools/labs/templates/assignments/1-tracer/checker/_test/util.h new file mode 100644 index 00000000000000..72eb85e8200563 --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_test/util.h @@ -0,0 +1,71 @@ +/* + * useful structures/macros + * + * Operating Systems 2 + */ + +#ifndef UTIL_H_ +#define UTIL_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#if defined(_WIN32) + +#include + +static VOID PrintLastError(const PCHAR message) +{ + CHAR errBuff[1024]; + + FormatMessage( + FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_MAX_WIDTH_MASK, + NULL, + GetLastError(), + 0, + errBuff, + sizeof(errBuff) - 1, + NULL); + + fprintf(stderr, "%s: %s\n", message, errBuff); +} + +#define ERR(call_description) \ + do { \ + fprintf(stderr, "(%s, %d): ", \ + __FILE__, __LINE__); \ + PrintLastError(call_description); \ + } while (0) + +#elif defined(__linux__) + +/* error printing macro */ +#define ERR(call_description) \ + do { \ + fprintf(stderr, "(%s, %d): ", \ + __FILE__, __LINE__); \ + perror(call_description); \ + } while (0) + +#else + #error "Unknown platform" +#endif + +/* print error (call ERR) and exit */ +#define DIE(assertion, call_description) \ + do { \ + if (assertion) { \ + ERR(call_description); \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/labs/templates/assignments/1-tracer/tracer.h b/tools/labs/templates/assignments/1-tracer/tracer.h new file mode 100644 index 00000000000000..7a55257b770aaf --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/tracer.h @@ -0,0 +1,21 @@ +/* + * SO2 kprobe based tracer header file + * + * this is shared with user space + */ + +#ifndef TRACER_H__ +#define TRACER_H__ 1 + +#include +#ifndef __KERNEL__ +#include +#endif /* __KERNEL__ */ + +#define TRACER_DEV_MINOR 42 +#define TRACER_DEV_NAME "tracer" + +#define TRACER_ADD_PROCESS _IOW(_IOC_WRITE, 42, pid_t) +#define TRACER_REMOVE_PROCESS _IOW(_IOC_WRITE, 43, pid_t) + +#endif /* TRACER_H_ */ diff --git a/tools/labs/templates/assignments/2-uart/checker/Makefile b/tools/labs/templates/assignments/2-uart/checker/Makefile new file mode 100644 index 00000000000000..9cb87cb293e9cc --- /dev/null +++ b/tools/labs/templates/assignments/2-uart/checker/Makefile @@ -0,0 +1,14 @@ +CFLAGS = -Wall -g -static -m32 + +.PHONY: all run clean + +all: test solution.ko + +test: _test/test.o + $(CC) $(CFLAGS) -o $@ $^ + +solution.ko: _test/solution.ko + ln -s $< $@ + +clean: + -rm -f *~ test _test/test.o solution.ko diff --git a/tools/labs/templates/assignments/2-uart/checker/README b/tools/labs/templates/assignments/2-uart/checker/README new file mode 100644 index 00000000000000..67ae7234cd192c --- /dev/null +++ b/tools/labs/templates/assignments/2-uart/checker/README @@ -0,0 +1,38 @@ += UART16550 TEST SUITE == + +Test suite for UART16550 + +== FILES == + +README + * this file + +Makefile + * Makefile to build the test suite executable + +_checker + * script to run all tests defined in _test/test.c + +_test/test.c + * test suite for UART16550 + +_test/solution.ko + * kernel module implementing UART16550, + used to transmit/receive data to/from your kernel module + +== BUILDING == + +Use the Makefile to properly build the test executable: + + make + +== RUNNING == + +Copy your uart16550.ko module and _checker, test and solution.ko +to fsimg/root directory on your QEMU/KVM virtual machine. + +In order to run the test suite you can use the _checker script. + +The _checker script runs all tests and computes assignment grade: + + ./_checker diff --git a/tools/labs/templates/assignments/2-uart/checker/_checker b/tools/labs/templates/assignments/2-uart/checker/_checker new file mode 100755 index 00000000000000..a118d02b621c64 --- /dev/null +++ b/tools/labs/templates/assignments/2-uart/checker/_checker @@ -0,0 +1,4 @@ +#!/bin/sh + +insmod uart16550.ko; cat /proc/modules > /dev/kmsg; rmmod uart16550 +./test diff --git a/tools/labs/templates/assignments/2-uart/checker/_test/solution.ko b/tools/labs/templates/assignments/2-uart/checker/_test/solution.ko new file mode 100644 index 00000000000000..d9facf139b8f89 Binary files /dev/null and b/tools/labs/templates/assignments/2-uart/checker/_test/solution.ko differ diff --git a/tools/labs/templates/assignments/2-uart/checker/_test/test.c b/tools/labs/templates/assignments/2-uart/checker/_test/test.c new file mode 100644 index 00000000000000..ab42a72f4e7bc7 --- /dev/null +++ b/tools/labs/templates/assignments/2-uart/checker/_test/test.c @@ -0,0 +1,556 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "uart16550.h" + +#define UART16550_MAJOR 42 +#define COM1_MAJOR 42 +#define COM2_MAJOR 42 +#define STR(x) #x +#define XSTR(x) STR(x) +#define OPTION_COM1_ONLY 1 +#define OPTION_COM2_ONLY 2 +#define OPTION_BOTH 3 + +#define MODULE_NAME "uart16550" +#define SOLUTION_NAME "solution" + +#define PAD_CHARS 60 + +#define UART0 "/dev/uart0" +#define UART1 "/dev/uart1" +#define UART10 "/dev/uart10" + +#define INFILE "testfile.in" +#define OUTFILE "testfile.out" + +#define fail(s) do { \ + printf("%s:%d: ", __func__, __LINE__); \ + fflush(stdout); \ + perror(s); \ + exit(EXIT_FAILURE); \ + } while (0) + +#define test(d, v, e) do_test((d), (v), (e), 0, 0) +#define not_test(d, v, e) do_test((d), (v), (e), 1, 0) +#define fatal_test(d, v, e) do_test((d), (v), (e), 0, 1) + +static int +do_test(const char *description, int value, int expected, int negate, int fatal) +{ + int num_chars; + + num_chars = printf("%s", description); + for (; num_chars < PAD_CHARS - strlen("passed"); num_chars++) + putchar('.'); + + fflush(stdout); + if (!negate) { + if (value == expected) { + printf("passed\n"); + return 0; + } else { + printf("failed\n"); + if (fatal) + exit(EXIT_FAILURE); + } + } else { + if (value != expected) { + printf("passed\n"); + return 0; + } else { + printf("failed\n"); + if (fatal) + exit(EXIT_FAILURE); + } + } + return 1; +} + +static void +test_title(const char *title) +{ + int len = strlen(title); + int pad = (PAD_CHARS - len) / 2 - 1; + int mod = (PAD_CHARS - len) % 2; + int i; + + assert(pad >= 1); + putchar('\n'); + for (i = 0; i < pad; i++) + putchar('='); + printf(" %s ", title); + for (i = 0; i < pad + mod; i++) + putchar('='); + putchar('\n'); +} + +static void +make_nodes(void) +{ + mknod(UART0, S_IFCHR, COM1_MAJOR<<8); + mknod(UART1, S_IFCHR, (COM2_MAJOR<<8) + 1); + mknod(UART10, S_IFCHR, (UART16550_MAJOR<<8)+10); +} + +static void +remove_nodes(void) +{ + unlink(UART0); + unlink(UART1); + unlink(UART10); +} + +static int +test1(void) +{ + int err = 0; + + test_title("Test 1. Module insertion and removal"); + + /* Insert module with default params and test. */ + err |= fatal_test("insmod " MODULE_NAME ", default options", + system("insmod " MODULE_NAME ".ko"), 0); + err |= test("major", + system("cat /proc/devices | grep '" XSTR(COM1_MAJOR) " " MODULE_NAME "' >/dev/null 2>&1"), + 0); + err |= test("ioports COM1", + system("cat /proc/ioports | grep '03f8-03ff : " MODULE_NAME "' > /dev/null 2>&1"), + 0); + err |= test("ioports COM2", + system("cat /proc/ioports | grep '02f8-02ff : " MODULE_NAME "' > /dev/null 2>&1"), + 0); + err |= test("interrupts COM1", + system("cat /proc/interrupts | grep '4:.*" MODULE_NAME "' > /dev/null 2>&1"), + 0); + err |= test("interrupts COM2", + system("cat /proc/interrupts | grep '3:.*" MODULE_NAME "' > /dev/null 2>&1"), + 0); + err |= test("rmmod", system("rmmod " MODULE_NAME), 0); + if (err) + return err; + + /* Insert module with different major. */ + err |= fatal_test("insmod " MODULE_NAME ", major=" XSTR(COM2_MAJOR), + system("insmod " MODULE_NAME ".ko major=" XSTR(COM2_MAJOR)), 0); + err |= test("major", + system("cat /proc/devices | grep '" XSTR(COM2_MAJOR) " " MODULE_NAME "' >/dev/null 2>&1"), + 0); + err |= test("rmmod", system("rmmod " MODULE_NAME), 0); + if (err) + return err; + + /* Insert module only for COM2, check that it works side by side + * with solution. + */ + err |= fatal_test("insmod " MODULE_NAME ", COM2 only", + system("insmod " MODULE_NAME ".ko option=" XSTR(OPTION_COM2_ONLY)), + 0); + err |= fatal_test("insmod " SOLUTION_NAME ", COM1 only", + system("insmod " SOLUTION_NAME ".ko option=" XSTR(OPTION_COM1_ONLY)), + 0); + err |= test("ioports COM1", + system("cat /proc/ioports | grep '03f8-03ff : " SOLUTION_NAME "' > /dev/null 2>&1"), + 0); + err |= test("ioports COM2", + system("cat /proc/ioports | grep '02f8-02ff : " MODULE_NAME "' > /dev/null 2>&1"), + 0); + err |= test("interrupts COM1", + system("cat /proc/interrupts | grep '4:.*" SOLUTION_NAME "' > /dev/null 2>&1"), + 0); + err |= test("interrupts COM2", + system("cat /proc/interrupts | grep '3:.*" MODULE_NAME "' > /dev/null 2>&1"), + 0); + err |= test("rmmod " MODULE_NAME, system("rmmod " MODULE_NAME), 0); + err |= test("rmmod " SOLUTION_NAME, system("rmmod " SOLUTION_NAME), 0); + + return err; +} + +static int +test2(void) +{ + int err = 0; + int fd; + + test_title("Test 2. Invalid parameters"); + + /* Check ioctl sanity. */ + err |= fatal_test("insmod", system("insmod " MODULE_NAME ".ko"), 0); + fd = open(UART0, O_RDWR); + if (fd == -1) + fail("open " UART0); +#define ioctl_test(n) test("invalid ioctl " XSTR((n)), \ + ioctl(fd, UART16550_IOCTL_SET_LINE, (n)), -1) + err |= ioctl_test(0xdeadbeef); + err |= ioctl_test(0x1337cafe); +#undef ioctl_test + err |= test("invalid ioctl wrong operation", ioctl(fd, 0xffff), -1); + close(fd); + err |= test("rmmod", system("rmmod " MODULE_NAME), 0); + + /* Check invalid module parameters. */ + err |= not_test("insmod " MODULE_NAME ", option=0xdeadbabe", + system("insmod " MODULE_NAME ".ko option=0xdeadbabe"), + 0); + + return err; +} + +/* Speed sets: + * 0 -> 1200, 2400, 4800 + * 1 -> 9600, 19200, 38400, 56000 + * 2 -> 115200 + */ +static const struct { + int num; + unsigned char speed[4]; + int bufsizes[2]; /* min and max */ +} speed_sets[3] = { + { + .num = 3, + .speed = { UART16550_BAUD_1200, + UART16550_BAUD_2400, + UART16550_BAUD_4800, -1 }, + .bufsizes = { 128, 256 }, + }, + { + .num = 4, + .speed = { UART16550_BAUD_9600, + UART16550_BAUD_19200, + UART16550_BAUD_38400, + UART16550_BAUD_56000 }, + .bufsizes = { 256, 1024 }, + }, + { + .num = 1, + .speed = { UART16550_BAUD_115200, -1, -1, -1 }, + .bufsizes = { 2048, 2048 }, + }, +}; + +static void +gen_params(struct uart16550_line_info *line, int speed_set) +{ + int r; + + line->baud = speed_sets[speed_set].speed[rand() % + speed_sets[speed_set].num]; + line->len = UART16550_LEN_8; + line->stop = rand() % 2 * 4; + r = rand() % 4; + line->par = r < 2 ? r*8 : 0x18 + (r-2) * 8; +} + +int do_read(int fd, unsigned char *buffer, int size) +{ + int n, from = 0; + + while (1) { + n = read(fd, &buffer[from], size - from); + if (n <= 0) + return -1; + if (n + from == size) + return 0; + from += n; + } +} + +int do_write(int fd, unsigned char *buffer, unsigned int size) +{ + int n, from = 0; + + while (1) { + n = write(fd, &buffer[from], size - from); + if (n <= 0) { + perror("write"); + return -1; + } + if (n + from == size) + return 0; + from += n; + } +} + +static int +gen_test_file(char *fname, int speed_set) +{ + int size, min, max; + char comm[1024]; + + min = speed_sets[speed_set].bufsizes[0]; + max = speed_sets[speed_set].bufsizes[1]; + size = (min == max) ? min : rand() % (min - max) + min; + sprintf(comm, + "dd if=/dev/urandom of=%s bs=1 count=%d >/dev/null 2>/dev/null", + fname, + size); + if (system(comm)) + fprintf(stderr, "failed to generate random file (%s)\n", comm); + return size; +} + +static void +copy_file(int fdr, int fdw, int len) +{ +#define COPY_BUF_SIZE 128 + unsigned char buf[COPY_BUF_SIZE]; + + do { + int partial, rc; + + partial = len < COPY_BUF_SIZE ? len : COPY_BUF_SIZE; + if (partial == 0) + break; + rc = read(fdr, buf, partial); + if (rc == 0) + break; + if (rc == -1) + fail("read"); + len -= rc; + rc = do_write(fdw, buf, rc); + if (rc < 0) + fail("write"); + } while (1); +} + +static int +copy_test(int fd0, int fd1, int speed_set) +{ + pid_t rpid, wpid; + int len, status, rc, fd; + + len = gen_test_file(INFILE, speed_set); + rpid = fork(); + switch (rpid) { + case 0: + fd = open(INFILE, O_RDONLY); + if (fd < 0) + fail("open " INFILE); + copy_file(fd, fd0, len); + close(fd); + exit(EXIT_SUCCESS); + break; + default: + break; + } + + wpid = fork(); + switch (wpid) { + case 0: + fd = open(OUTFILE, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd < 0) + fail("open " OUTFILE); + copy_file(fd1, fd, len); + close(fd); + exit(EXIT_SUCCESS); + break; + default: + break; + } + + rc = waitpid(rpid, &status, 0); + if (rc < 0) + return rc; + if (WEXITSTATUS(status)) + return WEXITSTATUS(status); + + rc = waitpid(wpid, &status, 0); + if (rc < 0) + return rc; + if (WEXITSTATUS(status)) + return WEXITSTATUS(status); + + return system("diff " INFILE " " OUTFILE "> /dev/null 2> /dev/null"); +} + +static int +generic_test(const char *reader, const char *writer, int speed_set, + int num_tests) +{ + int fd0, fd1, i, err = 0; + char dbuf[1024], cbuf[1024]; + struct uart16550_line_info uli; + + if (reader != writer) { + sprintf(dbuf, "insmod %s", reader); + sprintf(cbuf, "insmod %s.ko option=%d", + reader, OPTION_COM2_ONLY); + fatal_test(dbuf, system(cbuf), 0); + sprintf(dbuf, "insmod %s", writer); + sprintf(cbuf, "insmod %s.ko option=%d", + writer, OPTION_COM1_ONLY); + fatal_test(dbuf, system(cbuf), 0); + } else { + sprintf(dbuf, "insmod %s", reader); + sprintf(cbuf, "insmod %s.ko", reader); + fatal_test(dbuf, system(cbuf), 0); + } + + gen_params(&uli, speed_set); + fd0 = open(UART0, O_WRONLY); + if (fd0 == -1) + fail("open " UART0); + fd1 = open(UART1, O_RDONLY); + if (fd1 == -1) + fail("open " UART1); + err |= test("ioctl reader", + ioctl(fd1, UART16550_IOCTL_SET_LINE, &uli), 0); + err |= test("ioctl writer", + ioctl(fd0, UART16550_IOCTL_SET_LINE, &uli), 0); + + for (i = 0; i < num_tests; i++) { + sprintf(dbuf, "test %02d", i + 1); + test(dbuf, copy_test(fd0, fd1, speed_set), 0); + } + + close(fd0); + close(fd1); + + if (reader != writer) { + sprintf(dbuf, "rmmod %s", reader); + sprintf(cbuf, "rmmod %s.ko", reader); + fatal_test(dbuf, system(cbuf), 0); + sprintf(dbuf, "rmmod %s", writer); + sprintf(cbuf, "rmmod %s.ko", writer); + fatal_test(dbuf, system(cbuf), 0); + } else { + sprintf(dbuf, "rmmod %s", reader); + sprintf(cbuf, "rmmod %s.ko", reader); + fatal_test(dbuf, system(cbuf), 0); + } + + return err; +} + +#define choose_one(rd, wr) do { \ + int r = rand() % 2; \ + if (r == 0) { \ + rd = MODULE_NAME; \ + wr = SOLUTION_NAME; \ + } else { \ + rd = SOLUTION_NAME; \ + wr = MODULE_NAME; \ + } \ + } while (0) + +static int +test3(void) +{ + const char *rd, *wr; + + rd = MODULE_NAME; + wr = SOLUTION_NAME; + test_title("Test 3. Read, small speed"); + return generic_test(rd, wr, 0, 5); +} + +static int +test4(void) +{ + const char *rd, *wr; + + rd = SOLUTION_NAME; + wr = MODULE_NAME; + test_title("Test 4. Write, small speed"); + return generic_test(rd, wr, 0, 5); +} + +static int +test5(void) +{ + const char *rd, *wr; + + rd = wr = MODULE_NAME; + test_title("Test 5. Back-to-back, small speed"); + return generic_test(rd, wr, 0, 5); +} + +static int +test6(void) +{ + const char *rd, *wr; + + choose_one(rd, wr); + test_title("Test 6. Read/Write, medium speed"); + return generic_test(rd, wr, 1, 5); +} + +static int +test7(void) +{ + const char *rd, *wr; + + rd = wr = MODULE_NAME; + test_title("Test 7. Back-to-back, medium speed"); + return generic_test(rd, wr, 1, 5); +} + +static int +test8(void) +{ + const char *rd, *wr; + + choose_one(rd, wr); + test_title("Test 8. Read/Write, high speed"); + return generic_test(rd, wr, 2, 5); +} + +static int +test9(void) +{ + const char *rd, *wr; + + rd = wr = MODULE_NAME; + test_title("Test 9. Back-to-back, high speed"); + return generic_test(rd, wr, 2, 5); +} + +int +main(void) +{ + int num_passed = 0; + const int total = 9; + + srand(time(NULL)); + make_nodes(); + + if (test1() == 0) + num_passed++; + if (test2() == 0) + num_passed++; + if (test3() == 0) + num_passed++; + if (test4() == 0) + num_passed++; + if (test5() == 0) + num_passed++; + if (test6() == 0) + num_passed++; + if (test7() == 0) + num_passed++; + if (test8() == 0) + num_passed++; + if (test9() == 0) + num_passed++; + + remove_nodes(); + unlink(INFILE); + unlink(OUTFILE); + printf("\nFinal score: %d/%d\n", num_passed, total); + + return 0; +} + +/* Extra 2 lines so the file is the proper size. */ diff --git a/tools/labs/templates/assignments/2-uart/checker/_test/uart16550.h b/tools/labs/templates/assignments/2-uart/checker/_test/uart16550.h new file mode 100644 index 00000000000000..73008921925769 --- /dev/null +++ b/tools/labs/templates/assignments/2-uart/checker/_test/uart16550.h @@ -0,0 +1,46 @@ +#ifndef UART16550_H +#define UART16550_H + +#define OPTION_COM1 1 +#define OPTION_COM2 2 +#define OPTION_BOTH 3 + +#define UART16550_COM1_SELECTED 0x01 +#define UART16550_COM2_SELECTED 0x02 + +#define MAX_NUMBER_DEVICES 2 + +#ifndef _UART16550_REGS_H + +#define UART16550_BAUD_1200 96 +#define UART16550_BAUD_2400 48 +#define UART16550_BAUD_4800 24 +#define UART16550_BAUD_9600 12 +#define UART16550_BAUD_19200 6 +#define UART16550_BAUD_38400 3 +#define UART16550_BAUD_56000 2 +#define UART16550_BAUD_115200 1 + +#define UART16550_LEN_5 0x00 +#define UART16550_LEN_6 0x01 +#define UART16550_LEN_7 0x02 +#define UART16550_LEN_8 0x03 + +#define UART16550_STOP_1 0x00 +#define UART16550_STOP_2 0x04 + +#define UART16550_PAR_NONE 0x00 +#define UART16550_PAR_ODD 0x08 +#define UART16550_PAR_EVEN 0x18 +#define UART16550_PAR_STICK 0x20 + +#endif + +#define UART16550_IOCTL_SET_LINE 1 + +struct uart16550_line_info { + unsigned char baud, len, par, stop; +}; + +#endif + diff --git a/tools/labs/templates/assignments/2-uart/uart16550.h b/tools/labs/templates/assignments/2-uart/uart16550.h new file mode 100644 index 00000000000000..e47e82945404a4 --- /dev/null +++ b/tools/labs/templates/assignments/2-uart/uart16550.h @@ -0,0 +1,48 @@ +#ifndef _UART16550_H +#define _UART16550_H + +#define OPTION_COM1 1 +#define OPTION_COM2 2 +#define OPTION_BOTH 3 + +#define UART16550_COM1_SELECTED 0x01 +#define UART16550_COM2_SELECTED 0x02 + +#define MAX_NUMBER_DEVICES 2 + +#ifndef _UART16550_REGS_H + + + +#define UART16550_BAUD_1200 96 +#define UART16550_BAUD_2400 48 +#define UART16550_BAUD_4800 24 +#define UART16550_BAUD_9600 12 +#define UART16550_BAUD_19200 6 +#define UART16550_BAUD_38400 3 +#define UART16550_BAUD_56000 2 +#define UART16550_BAUD_115200 1 + +#define UART16550_LEN_5 0x00 +#define UART16550_LEN_6 0x01 +#define UART16550_LEN_7 0x02 +#define UART16550_LEN_8 0x03 + +#define UART16550_STOP_1 0x00 +#define UART16550_STOP_2 0x04 + +#define UART16550_PAR_NONE 0x00 +#define UART16550_PAR_ODD 0x08 +#define UART16550_PAR_EVEN 0x18 +#define UART16550_PAR_STICK 0x20 + +#endif + +#define UART16550_IOCTL_SET_LINE 1 + +struct uart16550_line_info { + unsigned char baud, len, par, stop; +}; + + +#endif diff --git a/tools/labs/templates/assignments/3-raid/Kbuild b/tools/labs/templates/assignments/3-raid/Kbuild new file mode 100644 index 00000000000000..98b113c88dccfa --- /dev/null +++ b/tools/labs/templates/assignments/3-raid/Kbuild @@ -0,0 +1,2 @@ +EXTRA_CFLAGS = -Wall -Wno-unused-function -g +obj-m = ssr.o diff --git a/tools/labs/templates/assignments/3-raid/checker/Makefile b/tools/labs/templates/assignments/3-raid/checker/Makefile new file mode 100644 index 00000000000000..9eea19fbf00cbb --- /dev/null +++ b/tools/labs/templates/assignments/3-raid/checker/Makefile @@ -0,0 +1,14 @@ +CFLAGS = -Wall -Wextra -g -m32 +LDFLAGS = -static -m32 + +.PHONY: all clean + +all: + +test: + make -C _test/ + ln -sf _test/run-test run-test + +clean: + -make -C _test/ clean + rm -rf run-test diff --git a/tools/labs/templates/assignments/3-raid/checker/README b/tools/labs/templates/assignments/3-raid/checker/README new file mode 100644 index 00000000000000..8fb41fd9b25275 --- /dev/null +++ b/tools/labs/templates/assignments/3-raid/checker/README @@ -0,0 +1,40 @@ += SOFTWARE RAID TEST SUITE == + +Test suite for software RAID + +== FILES == + +README + * this file + +Makefile + * Makefile for automating the build process + +_checker + * script to run all tests defined in _test/test.c + +_test/test.c + * test suite for software RAID + +== RUNNING == + +In order to run the test suite you can either use the _checker +script or run the run-test executable. + +The kernel module must be named ssr.ko and must be in the current folder. + +The run-test executable has to be in the current folder. You can create +a link using: + + ln -sf _test/run-test run-test + +The _checker script runs all tests and computes assignment grade. You +can use any of the two commands below. + + make test + ./_checker + +In order to run a specific test, pass the test number (1 .. 78) to the +run-test executable. + + ./run-test 5 diff --git a/tools/labs/templates/assignments/3-raid/checker/_checker b/tools/labs/templates/assignments/3-raid/checker/_checker new file mode 100755 index 00000000000000..da4e4058d736be --- /dev/null +++ b/tools/labs/templates/assignments/3-raid/checker/_checker @@ -0,0 +1,4 @@ +#!/bin/sh + +/bin/dmesg -c > /dev/null 2>&1 +./run-test diff --git a/tools/labs/templates/assignments/3-raid/checker/_test/Makefile b/tools/labs/templates/assignments/3-raid/checker/_test/Makefile new file mode 100644 index 00000000000000..a8a98a2cb2220d --- /dev/null +++ b/tools/labs/templates/assignments/3-raid/checker/_test/Makefile @@ -0,0 +1,15 @@ +CFLAGS = -Wall -Wextra -Wno-unused-function -g -m32 +LDFLAGS = -static -m32 + +.PHONY: all clean + +all: run-test + +run-test: run-test.o test.o + +run-test.o: run-test.c run-test.h + +test.o: test.c run-test.h + +clean: + -rm -f *~ test.o run-test.o run-test test diff --git a/tools/labs/templates/assignments/3-raid/checker/_test/run-test.c b/tools/labs/templates/assignments/3-raid/checker/_test/run-test.c new file mode 100644 index 00000000000000..0ad0e2b256fb37 --- /dev/null +++ b/tools/labs/templates/assignments/3-raid/checker/_test/run-test.c @@ -0,0 +1,104 @@ +#include +#include +#include +#include +#include + +#include "run-test.h" + +/* Enable/disable exiting when program fails. */ +//#define EXIT_IF_FAIL + +static size_t test_index; +static size_t total_points = 0; + +static void test_do_fail(size_t points) +{ + printf("failed [ 0/%3zu]\n", points); +#ifdef EXIT_IF_FAIL + exit(EXIT_FAILURE); +#endif +} + +static void test_do_pass(size_t points) +{ + total_points += points; + printf("passed [%3zu/%3zu]\n", points, points); +} + +void basic_test(int condition) +{ + size_t i; + char *description = test_array[test_index].description; + size_t desc_len = strlen(description); + size_t points = test_array[test_index].points; + + printf("(%3zu) %s", test_index + 1, description); + for (i = 0; i < 56 - desc_len; i++) + printf("."); + if (condition) + test_do_pass(points); + else + test_do_fail(points); +} + +static void print_test_total(void) +{ + size_t i; + + for (i = 0; i < 62; i++) + printf(" "); + printf("Total: [%3zu/%3zu]\n", total_points, max_points); +} + +static void run_test(void) +{ + test_array[test_index].function(); +} + +int main(int argc, char **argv) +{ + size_t num_tests = get_num_tests(); + + if (argc > 2) { + fprintf(stderr, "Usage: %s [test_number]\n", argv[0]); + fprintf(stderr, " 1 <= test_number <= %zu\n", num_tests); + exit(EXIT_FAILURE); + } + + /* Randomize time quantums. */ + srand(time(NULL)); + + /* In case of no arguments run all tests. */ + if (argc == 1) { + init_world(); + for (test_index = 0; test_index < num_tests; test_index++) + run_test(); + print_test_total(); + cleanup_world(); + return 0; + } + + /* If provided, argument is test index. */ + test_index = strtoul(argv[1], NULL, 10); + if (errno == EINVAL || errno == ERANGE) { + fprintf(stderr, "%s is not a number\n", argv[1]); + exit(EXIT_FAILURE); + } + + if (test_index == 0 || test_index > num_tests) { + fprintf(stderr, "Error: Test index is out of range " + "(1 <= test_index <= %zu).\n", num_tests); + exit(EXIT_FAILURE); + } + + /* test_index is one less than what the user provides. */ + test_index--; + + /* Run test_index test. */ + init_world(); + run_test(); + cleanup_world(); + + return 0; +} diff --git a/tools/labs/templates/assignments/3-raid/checker/_test/run-test.h b/tools/labs/templates/assignments/3-raid/checker/_test/run-test.h new file mode 100644 index 00000000000000..e4d64f6aa1b375 --- /dev/null +++ b/tools/labs/templates/assignments/3-raid/checker/_test/run-test.h @@ -0,0 +1,25 @@ +#ifndef _RUN_TEST_H_ +#define _RUN_TEST_H_ + +/* functions exported by the framework */ +void basic_test(int condition); + +/* function exported by the test */ +void init_world(void); +void cleanup_world(void); +size_t get_num_tests(void); + +/* test function prototype */ +typedef void (test_f)(void); + +struct run_test_t { + test_f *function; /* test/evaluation function */ + char *description; /* test description */ + size_t points; /* points for each test */ +}; + +/* Use test_index to pass through test_array. */ +extern struct run_test_t test_array[]; +extern size_t max_points; + +#endif /* _RUN_TEST_H_ */ diff --git a/tools/labs/templates/assignments/3-raid/checker/_test/ssr.h b/tools/labs/templates/assignments/3-raid/checker/_test/ssr.h new file mode 100644 index 00000000000000..5aa4107fb15825 --- /dev/null +++ b/tools/labs/templates/assignments/3-raid/checker/_test/ssr.h @@ -0,0 +1,26 @@ +/* + * Simple Software Raid - Linux header file + */ + +#ifndef SSR_H_ +#define SSR_H_ 1 + +#define SSR_MAJOR 240 +#define SSR_FIRST_MINOR 0 +#define SSR_NUM_MINORS 1 + +#define PHYSICAL_DISK1_NAME "/dev/vdb" +#define PHYSICAL_DISK2_NAME "/dev/vdc" + +/* sector size */ +#define KERNEL_SECTOR_SIZE 512 + +/* physical partition size - 95 MB (more than this results in error) */ +#define LOGICAL_DISK_NAME "/dev/ssr" +#define LOGICAL_DISK_SIZE (95 * 1024 * 1024) +#define LOGICAL_DISK_SECTORS ((LOGICAL_DISK_SIZE) / (KERNEL_SECTOR_SIZE)) + +/* sync data */ +#define SSR_IOCTL_SYNC 1 + +#endif diff --git a/tools/labs/templates/assignments/3-raid/checker/_test/test.c b/tools/labs/templates/assignments/3-raid/checker/_test/test.c new file mode 100644 index 00000000000000..b6a36ec618aee1 --- /dev/null +++ b/tools/labs/templates/assignments/3-raid/checker/_test/test.c @@ -0,0 +1,1769 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "run-test.h" +#include "ssr.h" + +#define SSR_BASE_NAME "ssr" +#define SSR_LIN_EXT ".ko" +#define SSR_MOD_NAME SSR_BASE_NAME SSR_LIN_EXT + +#define CRC_SIZE 4 + +#define ONE_SECTOR KERNEL_SECTOR_SIZE +#define ONE_PAGE 4096 +#define TWO_PAGES 8192 +#define TEN_PAGES 40960 +#define ONE_MEG 1048576 + +/* Read/write buffers. */ +static unsigned char *log_rd_buf, *log_wr_buf; +static unsigned char *phys1_rd_buf, *phys1_wr_buf; +static unsigned char *phys2_rd_buf, *phys2_wr_buf; +static unsigned char *log_rd_crc, *log_wr_crc; +static unsigned char *phys1_rd_crc, *phys1_wr_crc; +static unsigned char *phys2_rd_crc, *phys2_wr_crc; + +/* File descriptors. */ +static int log_fd, phys1_fd, phys2_fd; + +enum { + START = 0, + MIDDLE, + END +}; + +enum { + PHYS_FILL_DATA = 'P', + LOG_FILL_DATA = 'L', + CORRUPT_DATA = 'C', + PHYS1_DISK_DIRTY_DATA = 'a', + PHYS1_BUF_DIRTY_DATA = 'A', + PHYS2_DISK_DIRTY_DATA = 'b', + PHYS2_BUF_DIRTY_DATA = 'B', + LOG_DISK_DIRTY_DATA = 'd', + LOG_BUF_DIRTY_DATA = 'D', +}; + +/* + * "upgraded" read routine + */ + +static ssize_t xread(int fd, void *buffer, size_t len) +{ + ssize_t ret; + ssize_t n; + + n = 0; + while (n < (ssize_t) len) { + ret = read(fd, (char *) buffer + n, len - n); + if (ret < 0) + return -1; + if (ret == 0) + break; + n += ret; + } + + return n; +} + +/* + * "upgraded" write routine + */ + +static ssize_t xwrite(int fd, const void *buffer, size_t len) +{ + ssize_t ret; + ssize_t n; + + n = 0; + while (n < (ssize_t) len) { + ret = write(fd, (const char *) buffer + n, len - n); + if (ret < 0) + return -1; + if (ret == 0) + break; + n += ret; + } + + return n; +} + +/* + * Compute CRC32. + */ + +static unsigned int crc32(unsigned int seed, + const unsigned char *p, unsigned int len) +{ + size_t i; + unsigned int crc = seed; + + while (len--) { + crc ^= *p++; + for (i = 0; i < 8; i++) + crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0); + } + + return crc; +} + +static void compute_crc(const void *data_buffer, void *crc_buffer, size_t len) +{ + size_t i; + unsigned int crc; + + for (i = 0; i < len; i += ONE_SECTOR) { + crc = crc32(0, (const unsigned char *) data_buffer + i, ONE_SECTOR); + memcpy((char *) crc_buffer + i / ONE_SECTOR * CRC_SIZE, + &crc, CRC_SIZE); + } +} + +static off_t data_offset_from_whence(int whence, size_t len) +{ + switch (whence) { + case START: + return 0; + case MIDDLE: + return LOGICAL_DISK_SIZE / 2 - len; + case END: + return LOGICAL_DISK_SIZE - len; + default: + return -1; + } +} + +static off_t crc_offset_from_whence(int whence, size_t len) +{ + off_t data_offset = data_offset_from_whence(whence, len); + + return LOGICAL_DISK_SIZE + data_offset / ONE_SECTOR * CRC_SIZE; +} + +static void fill_buffer(void *buffer, int c, size_t len) +{ + memset(buffer, c, len); +} + +static void log_fill_buffer(size_t len) +{ + fill_buffer(log_wr_buf, LOG_FILL_DATA, len); +} + +static void phys_fill_buffer(size_t len) +{ + fill_buffer(phys1_wr_buf, PHYS_FILL_DATA, len); + fill_buffer(phys2_wr_buf, PHYS_FILL_DATA, len); +} + +static ssize_t read_whence_data(int fd, void *buffer, size_t len, int whence) +{ + off_t offset = data_offset_from_whence(whence, len); + + lseek(fd, offset, SEEK_SET); + return xread(fd, buffer, len); +} + +static ssize_t read_whence_crc(int fd, void *crc_buffer, size_t data_len, + int whence) +{ + off_t offset = crc_offset_from_whence(whence, data_len); + + lseek(fd, offset, SEEK_SET); + return xread(fd, crc_buffer, data_len / ONE_SECTOR * CRC_SIZE); +} + +static ssize_t write_whence_data(int fd, const void *buffer, + size_t len, int whence) +{ + off_t offset = data_offset_from_whence(whence, len); + + lseek(fd, offset, SEEK_SET); + return xwrite(fd, buffer, len); +} + +static ssize_t write_whence_crc(int fd, void *crc_buffer, size_t data_len, + int whence) +{ + off_t offset = crc_offset_from_whence(whence, data_len); + + lseek(fd, offset, SEEK_SET); + return xwrite(fd, crc_buffer, data_len / ONE_SECTOR * CRC_SIZE); +} + +static ssize_t log_read_whence(size_t len, int whence) +{ + ssize_t n; + + n = read_whence_data(log_fd, log_rd_buf, len, whence); + if (n < 0) + return -1; + compute_crc(log_rd_buf, log_rd_crc, len); + return n; +} + +static ssize_t log_write_whence(size_t len, int whence) +{ + compute_crc(log_wr_buf, log_wr_crc, len); + return write_whence_data(log_fd, log_wr_buf, len, whence); +} + +static ssize_t phys_read_whence(size_t id, size_t len, int whence) +{ + ssize_t n_data, n_crc; + int fd = ((id == 1) ? phys1_fd : phys2_fd); + + unsigned char *data_buf = ((id == 1) ? phys1_rd_buf : phys2_rd_buf); + unsigned char *crc_buf = ((id == 1) ? phys1_rd_crc : phys2_rd_crc); + + n_data = read_whence_data(fd, data_buf, len, whence); + if (n_data < 0) + return -1; + n_crc = read_whence_crc(fd, crc_buf, len, whence); + if (n_crc < 0) + return -1; + return n_data; +} + +static ssize_t phys_write_whence(size_t id, size_t len, int whence) +{ + ssize_t n_data, n_crc; + int fd = ((id == 1) ? phys1_fd : phys2_fd); + unsigned char *data_buf = ((id == 1) ? phys1_wr_buf : phys2_wr_buf); + unsigned char *crc_buf = ((id == 1) ? phys1_wr_crc : phys2_wr_crc); + + compute_crc(data_buf, crc_buf, len); + n_data = write_whence_data(fd, data_buf, len, whence); + if (n_data < 0) + return -1; + n_crc = write_whence_crc(fd, crc_buf, len, whence); + if (n_crc < 0) + return -1; + return n_data; +} + +static void corrupt_buffer(void *buffer, size_t sectors) +{ + size_t i; + + for (i = 0; i < sectors; i++) + ((unsigned char *) buffer)[i * ONE_SECTOR] = CORRUPT_DATA; +} + +static ssize_t phys_corrupt_and_write_whence(size_t id, size_t len, + size_t sectors, int whence) +{ + ssize_t n_data, n_crc; + int fd = ((id == 1) ? phys1_fd : phys2_fd); + unsigned char *data_buf = ((id == 1) ? phys1_wr_buf : phys2_wr_buf); + unsigned char *crc_buf = ((id == 1) ? phys1_wr_crc : phys2_wr_crc); + + compute_crc(data_buf, crc_buf, len); + corrupt_buffer(data_buf, sectors); + n_data = write_whence_data(fd, data_buf, len, whence); + if (n_data < 0) + return -1; + n_crc = write_whence_crc(fd, crc_buf, len, whence); + if (n_crc < 0) + return -1; + return n_data; +} + +static ssize_t log_read_start(size_t len) +{ + return log_read_whence(len, START); +} + +static ssize_t log_read_middle(size_t len) +{ + return log_read_whence(len, MIDDLE); +} + +static ssize_t log_read_end(size_t len) +{ + return log_read_whence(len, END); +} + +static ssize_t log_write_start(size_t len) +{ + return log_write_whence(len, START); +} + +static ssize_t log_write_middle(size_t len) +{ + return log_write_whence(len, MIDDLE); +} + +static ssize_t log_write_end(size_t len) +{ + return log_write_whence(len, END); +} + +static ssize_t phys1_read_start(size_t len) +{ + return phys_read_whence(1, len, START); +} + +#if 0 +static ssize_t phys1_read_middle(size_t len) +{ + return phys_read_whence(1, len, MIDDLE); +} + +static ssize_t phys1_read_end(size_t len) +{ + return phys_read_whence(1, len, END); +} +#endif + +static ssize_t phys1_write_start(size_t len) +{ + return phys_write_whence(1, len, START); +} + +static ssize_t phys1_corrupt_and_write_start(size_t len, size_t sectors) +{ + return phys_corrupt_and_write_whence(1, len, sectors, START); +} + +#if 0 +static ssize_t phys1_write_middle(size_t len) +{ + return phys_write_whence(1, len, MIDDLE); +} + +static ssize_t phys1_write_end(size_t len) +{ + return phys_write_whence(1, len, END); +} +#endif + +static ssize_t phys2_read_start(size_t len) +{ + return phys_read_whence(2, len, START); +} + +#if 0 +static ssize_t phys2_read_middle(size_t len) +{ + return phys_read_whence(2, len, MIDDLE); +} + +static ssize_t phys2_read_end(size_t len) +{ + return phys_read_whence(2, len, END); +} +#endif + +static ssize_t phys2_write_start(size_t len) +{ + return phys_write_whence(2, len, START); +} + +static ssize_t phys2_corrupt_and_write_start(size_t len, size_t sectors) +{ + return phys_corrupt_and_write_whence(2, len, sectors, START); +} + +#if 0 +static ssize_t phys2_write_middle(size_t len) +{ + return phys_write_whence(2, len, MIDDLE); +} + +static ssize_t phys2_write_end(size_t len) +{ + return phys_write_whence(2, len, END); +} +#endif + +static int cmp_data_log_rd_phys1_wr(size_t len) +{ + return memcmp(log_rd_buf, phys1_wr_buf, len); +} + +static int cmp_data_log_rd_phys2_wr(size_t len) +{ + return memcmp(log_rd_buf, phys2_wr_buf, len); +} + +static int cmp_data_log_rd_phys1_rd(size_t len) +{ + return memcmp(log_rd_buf, phys1_rd_buf, len); +} + +static int cmp_data_log_rd_phys2_rd(size_t len) +{ + return memcmp(log_rd_buf, phys2_rd_buf, len); +} + +static int cmp_data_log_wr_phys1_rd(size_t len) +{ + return memcmp(log_wr_buf, phys1_rd_buf, len); +} + +static int cmp_data_log_wr_phys2_rd(size_t len) +{ + return memcmp(log_wr_buf, phys2_rd_buf, len); +} + +static int cmp_crc_log_rd_phys1_wr(size_t data_len) +{ + return memcmp(log_rd_crc, phys1_wr_crc, data_len / ONE_SECTOR * CRC_SIZE); +} + +static int cmp_crc_log_rd_phys2_wr(size_t data_len) +{ + return memcmp(log_rd_crc, phys2_wr_crc, data_len / ONE_SECTOR * CRC_SIZE); +} + +static int cmp_crc_log_rd_phys1_rd(size_t data_len) +{ + return memcmp(log_rd_crc, phys1_rd_crc, data_len / ONE_SECTOR * CRC_SIZE); +} + +static int cmp_crc_log_rd_phys2_rd(size_t data_len) +{ + return memcmp(log_rd_crc, phys2_rd_crc, data_len / ONE_SECTOR * CRC_SIZE); +} + +static int cmp_crc_log_wr_phys1_rd(size_t data_len) +{ + return memcmp(log_wr_crc, phys1_rd_crc, data_len / ONE_SECTOR * CRC_SIZE); +} + +static int cmp_crc_log_wr_phys2_rd(size_t data_len) +{ + return memcmp(log_wr_crc, phys2_rd_crc, data_len / ONE_SECTOR * CRC_SIZE); +} + +static void drop_caches(void) +{ + int fd; + char buf[] = "1\n"; + + fd = open("/proc/sys/vm/drop_caches", O_WRONLY); + assert(fd >= 0); + write(fd, buf, strlen(buf)); + close(fd); +} + +static void flush_disk_buffers(void) +{ + sync(); + //system("/bin/echo 1 > /proc/sys/vm/drop_caches"); + drop_caches(); +} + +static void dump_data(const void *buf, size_t len, const char *header) +{ + size_t i; + + printf("%s:", header); + for (i = 0; i < len / sizeof(unsigned int); i++) { + if (i % 4 == 0) + printf("\n\t"); + printf(" %08x", ((unsigned int *) buf)[i]); + } + printf("\n\n"); +} + +void init_world(void) +{ + /* Cleanup if required. */ + flush_disk_buffers(); + system("/sbin/rmmod " SSR_BASE_NAME " > /dev/null 2>&1"); + system("/bin/cat /proc/devices | /bin/grep " SSR_BASE_NAME " > /dev/null"); + system("/bin/rm -f " LOGICAL_DISK_NAME); + + assert(system("/sbin/insmod " SSR_MOD_NAME) == 0); + assert(system("/bin/cat /proc/devices | /bin/grep " SSR_BASE_NAME + " > /dev/null") == 0); + assert(access(PHYSICAL_DISK1_NAME, F_OK) == 0); + assert(access(PHYSICAL_DISK2_NAME, F_OK) == 0); + assert(access(LOGICAL_DISK_NAME, F_OK) == 0); + + log_rd_buf = calloc(1024 * 1024, 1); + assert(log_rd_buf != NULL); + log_wr_buf = calloc(1024 * 1024, 1); + assert(log_rd_buf != NULL); + phys1_rd_buf = calloc(1024 * 1024, 1); + assert(phys1_rd_buf != NULL); + phys1_wr_buf = calloc(1024 * 1024, 1); + assert(phys1_wr_buf != NULL); + phys2_rd_buf = calloc(1024 * 1024, 1); + assert(phys2_rd_buf != NULL); + phys2_wr_buf = calloc(1024 * 1024, 1); + assert(phys2_wr_buf != NULL); + log_rd_crc = calloc(8 * 1024, 1); + assert(log_rd_crc != NULL); + log_wr_crc = calloc(8 * 1024, 1); + assert(log_rd_crc != NULL); + phys1_rd_crc = calloc(8 * 1024, 1); + assert(phys1_rd_crc != NULL); + phys1_wr_crc = calloc(8 * 1024, 1); + assert(phys1_wr_crc != NULL); + phys2_rd_crc = calloc(8 * 1024, 1); + assert(phys2_rd_crc != NULL); + phys2_wr_crc = calloc(8 * 1024, 1); + assert(phys2_wr_crc != NULL); +} + +void cleanup_world(void) +{ + flush_disk_buffers(); + system("/sbin/rmmod " SSR_BASE_NAME); + system("/bin/cat /proc/devices | /bin/grep " SSR_BASE_NAME " > /dev/null"); + system("/bin/rm -f " LOGICAL_DISK_NAME); + free(log_rd_buf); free(log_wr_buf); + free(phys1_rd_buf); free(phys1_wr_buf); + free(phys2_rd_buf); free(phys2_wr_buf); + free(log_rd_crc); free(log_wr_crc); + free(phys1_rd_crc); free(phys1_wr_crc); + free(phys2_rd_crc); free(phys2_wr_crc); +} + +static void make_disks_dirty(void) +{ + fill_buffer(phys1_wr_buf, PHYS1_DISK_DIRTY_DATA, ONE_MEG); + fill_buffer(phys1_wr_crc, PHYS1_DISK_DIRTY_DATA, ONE_MEG / ONE_SECTOR * CRC_SIZE); + fill_buffer(phys2_wr_buf, PHYS2_DISK_DIRTY_DATA, ONE_MEG); + fill_buffer(phys2_wr_crc, PHYS2_DISK_DIRTY_DATA, ONE_MEG / ONE_SECTOR * CRC_SIZE); + phys1_write_start(ONE_MEG); + phys2_write_start(ONE_MEG); +} + +static void make_buffers_dirty(void) +{ + fill_buffer(phys1_wr_buf, PHYS1_BUF_DIRTY_DATA, ONE_MEG); + fill_buffer(phys1_wr_crc, PHYS1_BUF_DIRTY_DATA, ONE_MEG / ONE_SECTOR * CRC_SIZE); + fill_buffer(phys1_rd_buf, PHYS1_BUF_DIRTY_DATA, ONE_MEG); + fill_buffer(phys1_rd_crc, PHYS1_BUF_DIRTY_DATA, ONE_MEG / ONE_SECTOR * CRC_SIZE); + fill_buffer(phys2_wr_buf, PHYS2_BUF_DIRTY_DATA, ONE_MEG); + fill_buffer(phys2_wr_crc, PHYS2_BUF_DIRTY_DATA, ONE_MEG / ONE_SECTOR * CRC_SIZE); + fill_buffer(phys2_rd_buf, PHYS2_BUF_DIRTY_DATA, ONE_MEG); + fill_buffer(phys2_rd_crc, PHYS2_BUF_DIRTY_DATA, ONE_MEG / ONE_SECTOR * CRC_SIZE); + fill_buffer(log_wr_buf, LOG_BUF_DIRTY_DATA, ONE_MEG); + fill_buffer(log_wr_crc, LOG_BUF_DIRTY_DATA, ONE_MEG / ONE_SECTOR * CRC_SIZE); + fill_buffer(log_rd_buf, LOG_BUF_DIRTY_DATA, ONE_MEG); + fill_buffer(log_rd_crc, LOG_BUF_DIRTY_DATA, ONE_MEG / ONE_SECTOR * CRC_SIZE); +} + +static void init_test(void) +{ + flush_disk_buffers(); + log_fd = open(LOGICAL_DISK_NAME, O_RDWR); + assert(log_fd >= 0); + phys1_fd = open(PHYSICAL_DISK1_NAME, O_RDWR); + assert(phys1_fd >= 0); + phys2_fd = open(PHYSICAL_DISK2_NAME, O_RDWR); + assert(phys2_fd >= 0); + make_disks_dirty(); + make_buffers_dirty(); + flush_disk_buffers(); +} + +static void cleanup_test(void) +{ + close(log_fd); + close(phys1_fd); + close(phys2_fd); +} + +static void open_logical(void) +{ + int fd; + + fd = open(LOGICAL_DISK_NAME, O_RDWR); + basic_test(fd >= 0); + close(fd); +} + +static void close_logical(void) +{ + int fd, rc; + + fd = open(LOGICAL_DISK_NAME, O_RDWR); + rc = close(fd); + basic_test(rc == 0); +} + +static void use_after_close_invalid(void) +{ + int fd, val; + ssize_t n; + + fd = open(LOGICAL_DISK_NAME, O_RDWR); + close(fd); + n = read(fd, &val, sizeof(val)); + basic_test(n < 0); +} + +static void lseek_logical(void) +{ + off_t offset; + + init_test(); + offset = lseek(log_fd, LOGICAL_DISK_SIZE / 2, SEEK_SET); + basic_test(offset == LOGICAL_DISK_SIZE / 2); + cleanup_test(); +} + +static void read_one_sector_start(void) +{ + ssize_t n; + + init_test(); + n = log_read_start(ONE_SECTOR); + basic_test(n == ONE_SECTOR); + cleanup_test(); +} + +static void read_one_sector_middle(void) +{ + ssize_t n; + + init_test(); + n = log_read_middle(ONE_SECTOR); + basic_test(n == ONE_SECTOR); + cleanup_test(); +} + +static void read_one_sector_end(void) +{ + ssize_t n; + + init_test(); + n = log_read_end(ONE_SECTOR); + basic_test(n == ONE_SECTOR); + cleanup_test(); +} + +static void write_one_sector_start(void) +{ + ssize_t n; + + init_test(); + n = log_write_start(ONE_SECTOR); + basic_test(n == ONE_SECTOR); + cleanup_test(); +} + +static void write_one_sector_middle(void) +{ + ssize_t n; + + init_test(); + n = log_write_middle(ONE_SECTOR); + basic_test(n == ONE_SECTOR); + cleanup_test(); +} + +static void write_one_sector_end(void) +{ + ssize_t n; + + init_test(); + n = log_write_end(ONE_SECTOR); + basic_test(n == ONE_SECTOR); + cleanup_test(); +} + +static void read_one_page_start(void) +{ + ssize_t n; + + init_test(); + n = log_read_start(ONE_PAGE); + basic_test(n == ONE_PAGE); + cleanup_test(); +} + +static void read_one_page_middle(void) +{ + ssize_t n; + + init_test(); + n = log_read_middle(ONE_PAGE); + basic_test(n == ONE_PAGE); + cleanup_test(); +} + +static void read_one_page_end(void) +{ + ssize_t n; + + init_test(); + n = log_read_end(ONE_PAGE); + basic_test(n == ONE_PAGE); + cleanup_test(); +} + +static void write_one_page_start(void) +{ + ssize_t n; + + init_test(); + n = log_write_start(ONE_PAGE); + basic_test(n == ONE_PAGE); + cleanup_test(); +} + +static void write_one_page_middle(void) +{ + ssize_t n; + + init_test(); + n = log_write_middle(ONE_PAGE); + basic_test(n == ONE_PAGE); + cleanup_test(); +} + +static void write_one_page_end(void) +{ + ssize_t n; + + init_test(); + n = log_write_end(ONE_PAGE); + basic_test(n == ONE_PAGE); + cleanup_test(); +} + +static void read_two_pages_start(void) +{ + ssize_t n; + + init_test(); + n = log_read_start(TWO_PAGES); + basic_test(n == TWO_PAGES); + cleanup_test(); +} + +static void read_two_pages_middle(void) +{ + ssize_t n; + + init_test(); + n = log_read_middle(TWO_PAGES); + basic_test(n == TWO_PAGES); + cleanup_test(); +} + +static void read_two_pages_end(void) +{ + ssize_t n; + + init_test(); + n = log_read_end(TWO_PAGES); + basic_test(n == TWO_PAGES); + cleanup_test(); +} + +static void write_two_pages_start(void) +{ + ssize_t n; + + init_test(); + n = log_write_start(TWO_PAGES); + basic_test(n == TWO_PAGES); + cleanup_test(); +} + +static void write_two_pages_middle(void) +{ + ssize_t n; + + init_test(); + n = log_write_middle(TWO_PAGES); + basic_test(n == TWO_PAGES); + cleanup_test(); +} + +static void write_two_pages_end(void) +{ + ssize_t n; + + init_test(); + n = log_write_end(TWO_PAGES); + basic_test(n == TWO_PAGES); + cleanup_test(); +} + +static void read_one_meg_start(void) +{ + ssize_t n; + + init_test(); + n = log_read_start(ONE_MEG); + basic_test(n == ONE_MEG); + cleanup_test(); +} + +static void read_one_meg_middle(void) +{ + ssize_t n; + + init_test(); + n = log_read_middle(ONE_MEG); + basic_test(n == ONE_MEG); + cleanup_test(); +} + +static void read_one_meg_end(void) +{ + ssize_t n; + + init_test(); + n = log_read_end(ONE_MEG); + basic_test(n == ONE_MEG); + cleanup_test(); +} + +static void write_one_meg_start(void) +{ + ssize_t n; + + init_test(); + n = log_write_start(ONE_MEG); + basic_test(n == ONE_MEG); + cleanup_test(); +} + +static void write_one_meg_middle(void) +{ + ssize_t n; + + init_test(); + n = log_write_middle(ONE_MEG); + basic_test(n == ONE_MEG); + cleanup_test(); +} + +static void write_one_meg_end(void) +{ + ssize_t n; + + init_test(); + n = log_write_end(ONE_MEG); + basic_test(n == ONE_MEG); + cleanup_test(); +} + +static void read_boundary_one_sector(void) +{ + ssize_t n; + + init_test(); + lseek(log_fd, LOGICAL_DISK_SIZE, SEEK_SET); + n = xread(log_fd, log_rd_buf, ONE_SECTOR); + basic_test(n == 0); + cleanup_test(); +} + +static void read_boundary_one_page(void) +{ + ssize_t n; + + init_test(); + lseek(log_fd, LOGICAL_DISK_SIZE, SEEK_SET); + n = xread(log_fd, log_rd_buf, ONE_PAGE); + basic_test(n == 0); + cleanup_test(); +} + +static void read_boundary_two_pages(void) +{ + ssize_t n; + + init_test(); + lseek(log_fd, LOGICAL_DISK_SIZE, SEEK_SET); + n = xread(log_fd, log_rd_buf, TWO_PAGES); + basic_test(n == 0); + cleanup_test(); +} + +static void read_boundary_one_meg(void) +{ + ssize_t n; + + init_test(); + lseek(log_fd, LOGICAL_DISK_SIZE, SEEK_SET); + n = xread(log_fd, log_rd_buf, ONE_MEG); + basic_test(n == 0); + cleanup_test(); +} + +static void write_boundary_one_sector(void) +{ + ssize_t n; + + init_test(); + lseek(log_fd, LOGICAL_DISK_SIZE, SEEK_SET); + n = xwrite(log_fd, log_rd_buf, ONE_SECTOR); + basic_test(n < 0); + cleanup_test(); +} + +static void write_boundary_one_page(void) +{ + ssize_t n; + + init_test(); + lseek(log_fd, LOGICAL_DISK_SIZE, SEEK_SET); + n = xwrite(log_fd, log_wr_buf, ONE_PAGE); + basic_test(n < 0); + cleanup_test(); +} + +static void write_boundary_two_pages(void) +{ + ssize_t n; + + init_test(); + lseek(log_fd, LOGICAL_DISK_SIZE, SEEK_SET); + n = xwrite(log_fd, log_wr_buf, TWO_PAGES); + basic_test(n < 0); + cleanup_test(); +} + +static void write_boundary_one_meg(void) +{ + ssize_t n; + + init_test(); + lseek(log_fd, LOGICAL_DISK_SIZE, SEEK_SET); + n = xwrite(log_fd, log_wr_buf, ONE_MEG); + basic_test(n < 0); + cleanup_test(); +} + +static size_t get_free_memory(void) +{ + FILE *f; + size_t i; + char buf[256]; + char *p; + + f = fopen("/proc/meminfo", "rt"); + assert(f != NULL); + /* Second line is 'MemFree: ...' */ + fgets(buf, 256, f); + fgets(buf, 256, f); + fclose(f); + + p = NULL; + for (i = 0; i < 256; i++) + if (buf[i] == ':') { + p = buf+i+1; + break; + } + + return strtoul(p, NULL, 10); +} + +static void memory_is_freed(void) +{ + size_t mem_used_before, mem_used_after; + size_t i; + + init_test(); + mem_used_before = get_free_memory(); + for (i = 0; i < 5; i++) + log_write_start(ONE_MEG); + mem_used_after = get_free_memory(); + + /* We assume 3MB (3072KB) is a reasonable memory usage in writes. */ + basic_test(mem_used_after < mem_used_before + 3072 && + mem_used_before < mem_used_after + 3072); + cleanup_test(); +} + +static void write_one_sector_check_phys1(void) +{ + int rc; + size_t len = ONE_SECTOR; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc = cmp_data_log_wr_phys1_rd(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void write_one_page_check_phys1(void) +{ + int rc; + size_t len = ONE_PAGE; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc = cmp_data_log_wr_phys1_rd(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void write_two_pages_check_phys1(void) +{ + int rc; + size_t len = TWO_PAGES; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc = cmp_data_log_wr_phys1_rd(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void write_one_meg_check_phys1(void) +{ + int rc; + size_t len = ONE_MEG; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc = cmp_data_log_wr_phys1_rd(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void write_one_sector_check_phys(void) +{ + int rc1, rc2; + size_t len = ONE_SECTOR; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + phys2_read_start(len); + rc1 = cmp_data_log_wr_phys1_rd(len); + rc2 = cmp_data_log_wr_phys2_rd(len); + basic_test(rc1 == 0 && rc2 == 0); + cleanup_test(); +} + +static void write_one_page_check_phys(void) +{ + int rc1, rc2; + size_t len = ONE_PAGE; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + phys2_read_start(len); + rc1 = cmp_data_log_wr_phys1_rd(len); + rc2 = cmp_data_log_wr_phys2_rd(len); + basic_test(rc1 == 0 && rc2 == 0); + cleanup_test(); +} + +static void write_two_pages_check_phys(void) +{ + int rc1, rc2; + size_t len = TWO_PAGES; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + phys2_read_start(len); + rc1 = cmp_data_log_wr_phys1_rd(len); + rc2 = cmp_data_log_wr_phys2_rd(len); + basic_test(rc1 == 0 && rc2 == 0); + cleanup_test(); +} + +static void write_one_meg_check_phys(void) +{ + int rc1, rc2; + size_t len = ONE_MEG; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + phys2_read_start(len); + rc1 = cmp_data_log_wr_phys1_rd(len); + rc2 = cmp_data_log_wr_phys2_rd(len); + basic_test(rc1 == 0 && rc2 == 0); + cleanup_test(); +} + +static void read_one_sector_after_write(void) +{ + int rc; + size_t len = ONE_SECTOR; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + rc = cmp_data_log_rd_phys1_wr(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void read_one_page_after_write(void) +{ + int rc; + size_t len = ONE_PAGE; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + rc = cmp_data_log_rd_phys1_wr(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void read_two_pages_after_write(void) +{ + int rc; + size_t len = TWO_PAGES; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + rc = cmp_data_log_rd_phys1_wr(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void read_one_meg_after_write(void) +{ + int rc; + size_t len = ONE_MEG; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + rc = cmp_data_log_rd_phys1_wr(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void write_one_sector_check_phys1_crc(void) +{ + int rc; + size_t len = ONE_SECTOR; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc = cmp_crc_log_wr_phys1_rd(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void write_one_page_check_phys1_crc(void) +{ + int rc; + size_t len = ONE_PAGE; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc = cmp_crc_log_wr_phys1_rd(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void write_two_pages_check_phys1_crc(void) +{ + int rc; + size_t len = TWO_PAGES; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc = cmp_crc_log_wr_phys1_rd(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void write_one_meg_check_phys1_crc(void) +{ + int rc; + size_t len = ONE_MEG; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc = cmp_crc_log_wr_phys1_rd(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void write_one_sector_check_phys_crc(void) +{ + int rc1, rc2; + size_t len = ONE_SECTOR; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + phys2_read_start(len); + rc1 = cmp_crc_log_wr_phys1_rd(len); + rc2 = cmp_crc_log_wr_phys2_rd(len); + basic_test(rc1 == 0 && rc2 == 0); + cleanup_test(); +} + +static void write_one_page_check_phys_crc(void) +{ + int rc1, rc2; + size_t len = ONE_PAGE; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + phys2_read_start(len); + rc1 = cmp_crc_log_wr_phys1_rd(len); + rc2 = cmp_crc_log_wr_phys2_rd(len); + basic_test(rc1 == 0 && rc2 == 0); + cleanup_test(); +} + +static void write_two_pages_check_phys_crc(void) +{ + int rc1, rc2; + size_t len = TWO_PAGES; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + phys2_read_start(len); + rc1 = cmp_crc_log_wr_phys1_rd(len); + rc2 = cmp_crc_log_wr_phys2_rd(len); + basic_test(rc1 == 0 && rc2 == 0); + cleanup_test(); +} + +static void write_one_meg_check_phys_crc(void) +{ + int rc1, rc2; + size_t len = ONE_MEG; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + phys2_read_start(len); + rc1 = cmp_crc_log_wr_phys1_rd(len); + rc2 = cmp_crc_log_wr_phys2_rd(len); + basic_test(rc1 == 0 && rc2 == 0); + cleanup_test(); +} + +static void corrupt_read_correct_one_sector_disk1(void) +{ + int rc_data, rc_crc; + size_t len = ONE_SECTOR; + + init_test(); + phys_fill_buffer(len); + phys1_corrupt_and_write_start(len, 1); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + rc_data = cmp_data_log_rd_phys2_wr(len); + rc_crc = cmp_crc_log_rd_phys2_wr(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void corrupt_read_correct_one_sector_in_page_disk1(void) +{ + int rc_data, rc_crc; + size_t len = ONE_PAGE; + + init_test(); + phys_fill_buffer(len); + phys1_corrupt_and_write_start(len, 1); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + rc_data = cmp_data_log_rd_phys2_wr(len); + rc_crc = cmp_crc_log_rd_phys2_wr(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void corrupt_read_correct_one_page_disk1(void) +{ + int rc_data, rc_crc; + size_t len = ONE_PAGE; + + init_test(); + phys_fill_buffer(len); + phys1_corrupt_and_write_start(len, ONE_PAGE / ONE_SECTOR); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + rc_data = cmp_data_log_rd_phys2_wr(len); + rc_crc = cmp_crc_log_rd_phys2_wr(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void corrupt_read_correct_ten_pages_in_one_meg_disk1(void) +{ + int rc_data, rc_crc; + size_t len = ONE_MEG; + + init_test(); + phys_fill_buffer(len); + phys1_corrupt_and_write_start(len, TEN_PAGES / ONE_SECTOR); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + rc_data = cmp_data_log_rd_phys2_wr(len); + rc_crc = cmp_crc_log_rd_phys2_wr(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void corrupt_read_correct_one_meg_disk1(void) +{ + int rc_data, rc_crc; + size_t len = ONE_MEG; + + init_test(); + phys_fill_buffer(len); + phys1_corrupt_and_write_start(len, ONE_MEG / ONE_SECTOR); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + rc_data = cmp_data_log_rd_phys2_wr(len); + rc_crc = cmp_crc_log_rd_phys2_wr(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void recover_one_sector_disk1(void) +{ + int rc_data, rc_crc; + size_t len = ONE_SECTOR; + + init_test(); + phys_fill_buffer(len); + phys1_corrupt_and_write_start(len, 1); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc_data = cmp_data_log_rd_phys1_rd(len); + rc_crc = cmp_crc_log_rd_phys1_rd(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void recover_one_sector_in_page_disk1(void) +{ + int rc_data, rc_crc; + size_t len = ONE_PAGE; + + init_test(); + phys_fill_buffer(len); + phys1_corrupt_and_write_start(len, 1); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc_data = cmp_data_log_rd_phys1_rd(len); + rc_crc = cmp_crc_log_rd_phys1_rd(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void recover_one_page_disk1(void) +{ + int rc_data, rc_crc; + size_t len = ONE_PAGE; + + init_test(); + phys_fill_buffer(len); + phys1_corrupt_and_write_start(len, ONE_PAGE / ONE_SECTOR); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc_data = cmp_data_log_rd_phys1_rd(len); + rc_crc = cmp_crc_log_rd_phys1_rd(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void recover_ten_page_in_one_meg_disk1(void) +{ + int rc_data, rc_crc; + size_t len = ONE_MEG; + + init_test(); + phys_fill_buffer(len); + phys1_corrupt_and_write_start(len, TEN_PAGES / ONE_SECTOR); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc_data = cmp_data_log_rd_phys1_rd(len); + rc_crc = cmp_crc_log_rd_phys1_rd(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void recover_one_meg_disk1(void) +{ + int rc_data, rc_crc; + size_t len = ONE_MEG; + + init_test(); + phys_fill_buffer(len); + phys1_corrupt_and_write_start(len, ONE_MEG / ONE_SECTOR); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc_data = cmp_data_log_rd_phys1_rd(len); + rc_crc = cmp_crc_log_rd_phys1_rd(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void corrupt_read_correct_one_sector_disk2(void) +{ + int rc_data, rc_crc; + size_t len = ONE_SECTOR; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_corrupt_and_write_start(len, 1); + flush_disk_buffers(); + log_read_start(len); + rc_data = cmp_data_log_rd_phys1_wr(len); + rc_crc = cmp_crc_log_rd_phys1_wr(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void corrupt_read_correct_one_sector_in_page_disk2(void) +{ + int rc_data, rc_crc; + size_t len = ONE_PAGE; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_corrupt_and_write_start(len, 1); + flush_disk_buffers(); + log_read_start(len); + rc_data = cmp_data_log_rd_phys1_wr(len); + rc_crc = cmp_crc_log_rd_phys1_wr(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void corrupt_read_correct_one_page_disk2(void) +{ + int rc_data, rc_crc; + size_t len = ONE_PAGE; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_corrupt_and_write_start(len, ONE_PAGE / ONE_SECTOR); + flush_disk_buffers(); + log_read_start(len); + rc_data = cmp_data_log_rd_phys1_wr(len); + rc_crc = cmp_crc_log_rd_phys1_wr(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void corrupt_read_correct_ten_pages_in_one_meg_disk2(void) +{ + int rc_data, rc_crc; + size_t len = ONE_MEG; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_corrupt_and_write_start(len, TEN_PAGES / ONE_SECTOR); + flush_disk_buffers(); + log_read_start(len); + rc_data = cmp_data_log_rd_phys1_wr(len); + rc_crc = cmp_crc_log_rd_phys1_wr(len); + basic_test(rc_data == 0 && rc_crc == 0); + flush_disk_buffers(); + cleanup_test(); +} + +static void corrupt_read_correct_one_meg_disk2(void) +{ + int rc_data, rc_crc; + size_t len = ONE_MEG; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_corrupt_and_write_start(len, ONE_MEG / ONE_SECTOR); + flush_disk_buffers(); + log_read_start(len); + rc_data = cmp_data_log_rd_phys1_wr(len); + rc_crc = cmp_crc_log_rd_phys1_wr(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void recover_one_sector_disk2(void) +{ + int rc_data, rc_crc; + size_t len = ONE_SECTOR; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_corrupt_and_write_start(len, 1); + flush_disk_buffers(); + log_read_start(len); + flush_disk_buffers(); + phys2_read_start(len); + rc_data = cmp_data_log_rd_phys2_rd(len); + rc_crc = cmp_crc_log_rd_phys2_rd(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void recover_one_sector_in_page_disk2(void) +{ + int rc_data, rc_crc; + size_t len = ONE_PAGE; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_corrupt_and_write_start(len, 1); + flush_disk_buffers(); + log_read_start(len); + flush_disk_buffers(); + phys2_read_start(len); + rc_data = cmp_data_log_rd_phys2_rd(len); + rc_crc = cmp_crc_log_rd_phys2_rd(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void recover_one_page_disk2(void) +{ + int rc_data, rc_crc; + size_t len = ONE_PAGE; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_corrupt_and_write_start(len, ONE_PAGE / ONE_SECTOR); + flush_disk_buffers(); + log_read_start(len); + flush_disk_buffers(); + phys2_read_start(len); + rc_data = cmp_data_log_rd_phys2_rd(len); + rc_crc = cmp_crc_log_rd_phys2_rd(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void recover_ten_page_in_one_meg_disk2(void) +{ + int rc_data, rc_crc; + size_t len = ONE_MEG; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_corrupt_and_write_start(len, TEN_PAGES / ONE_SECTOR); + flush_disk_buffers(); + log_read_start(len); + flush_disk_buffers(); + phys2_read_start(len); + rc_data = cmp_data_log_rd_phys2_rd(len); + rc_crc = cmp_crc_log_rd_phys2_rd(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void recover_one_meg_disk2(void) +{ + int rc_data, rc_crc; + size_t len = ONE_MEG; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_corrupt_and_write_start(len, ONE_MEG / ONE_SECTOR); + flush_disk_buffers(); + log_read_start(len); + flush_disk_buffers(); + phys2_read_start(len); + rc_data = cmp_data_log_rd_phys2_rd(len); + rc_crc = cmp_crc_log_rd_phys2_rd(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void dual_error(void) +{ + ssize_t n; + size_t len = ONE_SECTOR; + + init_test(); + phys_fill_buffer(len); + phys1_corrupt_and_write_start(len, 1); + phys2_corrupt_and_write_start(len, 1); + flush_disk_buffers(); + n = log_read_start(len); + basic_test(n <= 0); + cleanup_test(); +} + +struct run_test_t test_array[] = { + { open_logical, "open(" LOGICAL_DISK_NAME ")", 4 }, + { close_logical, "close(" LOGICAL_DISK_NAME ")", 4 }, + { use_after_close_invalid, "use after close is invalid", 4 }, + { lseek_logical, "lseek(" LOGICAL_DISK_NAME ")", 4 }, + { read_one_sector_start, "read one sector from the start", 5 }, + { read_one_sector_middle, "read one sector from the middle", 5 }, + { read_one_sector_end, "read one sector from the end", 5 }, + { write_one_sector_start, "write one sector from the start", 5 }, + { write_one_sector_middle, "write one sector from the middle", 5 }, + { write_one_sector_end, "write one sector from the end", 5 }, + { read_one_page_start, "read one page from the start", 5 }, + { read_one_page_middle, "read one page from the middle", 5 }, + { read_one_page_end, "read one page from the end", 5 }, + { write_one_page_start, "write one page from the start", 5 }, + { write_one_page_middle, "write one page from the middle", 5 }, + { write_one_page_end, "write one page from the end", 5 }, + { read_two_pages_start, "read two pages from the start", 5 }, + { read_two_pages_middle, "read two pages from the middle", 5 }, + { read_two_pages_end, "read two pages from the end", 5 }, + { write_two_pages_start, "write two pages from the start", 5 }, + { write_two_pages_middle, "write two pages from the middle", 5 }, + { write_two_pages_end, "write two pages from the end", 5 }, + { read_one_meg_start, "read 1MB from the start", 5 }, + { read_one_meg_middle, "read 1MB from the middle", 5 }, + { read_one_meg_end, "read 1MB from the end", 5 }, + { write_one_meg_start, "write 1MB from the start", 5 }, + { write_one_meg_middle, "write 1MB from the middle", 5 }, + { write_one_meg_end, "write 1MB from the end", 5 }, + { read_boundary_one_sector, "read one sector outside boundary", 7 }, + { read_boundary_one_page, "read one page with contents outside boundary", 7 }, + { read_boundary_two_pages, "read two pages with contents outside boundary", 7 }, + { read_boundary_one_meg, "read 1MB with contents outside boundary", 7 }, + { write_boundary_one_sector, "write one sector outside boundary", 7 }, + { write_boundary_one_page, "write one page with contents outside boundary", 7 }, + { write_boundary_two_pages, "write two pages with contents outside boundary", 7 }, + { write_boundary_one_meg, "write 1MB with contents outside boundary", 7 }, + { memory_is_freed, "check memory is freed", 24 }, + { write_one_sector_check_phys1, "write one sector and check disk1 (no CRC check)", 15 }, + { write_one_page_check_phys1, "write one page and check disk1 (no CRC check)", 15 }, + { write_two_pages_check_phys1, "write two pages and check disk1 (no CRC check)", 15 }, + { write_one_meg_check_phys1, "write 1MB and check disk1 (no CRC check)", 15 }, + { write_one_sector_check_phys, "write one sector and check disks (no CRC check)", 15 }, + { write_one_page_check_phys, "write one page and check disks (no CRC check)", 15 }, + { write_two_pages_check_phys, "write two pages and check disks (no CRC check)", 15 }, + { write_one_meg_check_phys, "write 1MB and check disks (no CRC check)", 15 }, + { read_one_sector_after_write, "read one sector after physical write (correct CRC)", 16 }, + { read_one_page_after_write, "read one page after physical write (correct CRC)", 16 }, + { read_two_pages_after_write, "read two pages after physical write (correct CRC)", 16 }, + { read_one_meg_after_write, "read 1MB after physical write (correct CRC)", 16 }, + { write_one_sector_check_phys1_crc, "write one sector and check disk1 (do CRC check)", 16 }, + { write_one_page_check_phys1_crc, "write one page and check disk1 (do CRC check)", 16 }, + { write_two_pages_check_phys1_crc, "write two pages and check disk1 (do CRC check)", 16 }, + { write_one_meg_check_phys1_crc, "write 1MB and check disk1 (do CRC check)", 16 }, + { write_one_sector_check_phys_crc, "write one sector and check disks (do CRC check)", 16 }, + { write_one_page_check_phys_crc, "write one page and check disks (do CRC check)", 16 }, + { write_two_pages_check_phys_crc, "write two pages and check disks (do CRC check)", 16 }, + { write_one_meg_check_phys_crc, "write 1MB and check disks (do CRC check)", 16 }, + { corrupt_read_correct_one_sector_disk1, "read corrected one sector error from disk1", 18 }, + { corrupt_read_correct_one_sector_in_page_disk1, "read corrected one sector in page error from disk1", 18 }, + { corrupt_read_correct_one_page_disk1, "read corrected one page error from disk1", 18 }, + { corrupt_read_correct_ten_pages_in_one_meg_disk1, "read corrected ten pages error in one meg from disk1", 18 }, + { corrupt_read_correct_one_meg_disk1, "read corrected one meg error from disk1", 18 }, + { recover_one_sector_disk1, "recover one sector error from disk1", 18 }, + { recover_one_sector_in_page_disk1, "recover one sector error in one page from disk1", 18 }, + { recover_one_page_disk1, "recover one page filled with errors from disk1", 18 }, + { recover_ten_page_in_one_meg_disk1, "recover ten pages error in 1MB from disk1", 18 }, + { recover_one_meg_disk1, "recover 1MB filled with errors from disk1", 18 }, + { corrupt_read_correct_one_sector_disk2, "read corrected one sector error from disk2", 18 }, + { corrupt_read_correct_one_sector_in_page_disk2, "read corrected one sector in page error from disk2", 18 }, + { corrupt_read_correct_one_page_disk2, "read corrected one page error from disk2", 18 }, + { corrupt_read_correct_ten_pages_in_one_meg_disk2, "read corrected ten pages error in one meg from disk2", 18 }, + { corrupt_read_correct_one_meg_disk2, "read corrected one meg error from disk2", 18 }, + { recover_one_sector_disk2, "recover one sector error from disk2", 18 }, + { recover_one_sector_in_page_disk2, "recover one sector error in one page from disk2", 18 }, + { recover_one_page_disk2, "recover one page filled with errors from disk2", 18 }, + { recover_ten_page_in_one_meg_disk2, "recover ten pages error in 1MB from disk2", 18 }, + { recover_one_meg_disk2, "recover 1MB filled with errors from disk2", 18 }, + { dual_error, "signal error when both physical disks are corrupted", 12 }, +}; +size_t max_points = 900; + +/* Return number of tests in test_array. */ +size_t get_num_tests(void) +{ + return sizeof(test_array) / sizeof(test_array[0]); +} diff --git a/tools/labs/templates/assignments/3-raid/ssr.h b/tools/labs/templates/assignments/3-raid/ssr.h new file mode 100644 index 00000000000000..cc68b7d56d1720 --- /dev/null +++ b/tools/labs/templates/assignments/3-raid/ssr.h @@ -0,0 +1,26 @@ +/* + * Simple Software Raid - Linux header file + */ + +#ifndef SSR_H_ +#define SSR_H_ 1 + +#define SSR_MAJOR 240 +#define SSR_FIRST_MINOR 0 +#define SSR_NUM_MINORS 1 + +#define PHYSICAL_DISK1_NAME "/dev/vdb" +#define PHYSICAL_DISK2_NAME "/dev/vdc" + +/* sector size */ +#define KERNEL_SECTOR_SIZE 512 + +/* physical partition size - 95 MB (more than this results in error) */ +#define LOGICAL_DISK_NAME "/dev/ssr" +#define LOGICAL_DISK_SIZE (95 * 1024 * 1024) +#define LOGICAL_DISK_SECTORS ((LOGICAL_DISK_SIZE) / (KERNEL_SECTOR_SIZE)) + +/* sync data */ +#define SSR_IOCTL_SYNC 1 + +#endif diff --git a/tools/labs/templates/assignments/4-stp/Kbuild b/tools/labs/templates/assignments/4-stp/Kbuild new file mode 100644 index 00000000000000..feb82305e7f94a --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/Kbuild @@ -0,0 +1,3 @@ +EXTRA_CFLAGS = -Wall -g + +obj-m = af_stp.o diff --git a/tools/labs/templates/assignments/4-stp/checker/.gitignore b/tools/labs/templates/assignments/4-stp/checker/.gitignore new file mode 100644 index 00000000000000..d4b87c0f1dff43 --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/checker/.gitignore @@ -0,0 +1 @@ +/stp_test diff --git a/tools/labs/templates/assignments/4-stp/checker/Makefile b/tools/labs/templates/assignments/4-stp/checker/Makefile new file mode 100644 index 00000000000000..e0c2b42807f22f --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/checker/Makefile @@ -0,0 +1,17 @@ +objects = _test/stp_test.o + +.PHONY: all clean _test_subdir_all _test_subdir_clean + +all: stp_test + +stp_test: _test_subdir_all $(objects) + $(CC) -Wall -g -m32 -static $(objects) -Wl,--whole-archive -lpthread -Wl,--no-whole-archive -o $@ + +_test_subdir_all: + make -C _test + +clean: _test_subdir_clean + -rm -f stp_test *~ + +_test_subdir_clean: + make -C _test clean diff --git a/tools/labs/templates/assignments/4-stp/checker/README b/tools/labs/templates/assignments/4-stp/checker/README new file mode 100644 index 00000000000000..a1d04d84f8653e --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/checker/README @@ -0,0 +1,87 @@ += STP TEST SUITE == + +Test suite for SO2 Tranport Protocol + +== FILES == + +README + * this file + +Makefile + +_checker + * script to run all tests defined in _test/stp_test.c + +_test/Makefile + * test suite internal Makefile (creates necessary object files) + +_test/stp_test.c + * test suite for SO2 Transport Protocol + +_test/stp_test.h + * test suite header file + +_test/stp.h + * SO2 Transport Protocol header file (macros and structures) + +_test/test.h + * useful macros for testing + +_test/debug.h + * debugging macros + +_test/util.h + * useful macros for generic use (error processing) + +== BUILDING == + + +== RUNNING == + +Copy your af_stp.ko module and _checker and stp_test +to fsimg/root directory on your QEMU/KVM virtual machine. + +In order to run the test suite you can either use the _checker +script or run the stp_test executable. + +The _checker script runs all tests and computes assignment grade: + + ./_checker + +In order to run a specific test pass the test number (1 .. 32) to the +stp_test executable. + + ./stp_test 5 + +== TESTS == + +Tests are basically unit tests. A single function in the test_fun_array (see +stp_test.c) is called each time the stp_test executable is invoked, +testing a single functionality (and assuming previous tests have been run and +passed). + +The EXIT_IF_FAIL macro (see test.h) is unnecessary since after each test, the +program completes. + +Each test function follows the unit test pattern: initialization, action, +evaluation. The test macro (see test.h) is invoked at the end of each test +for evaluating and grading the test. + +== DEBUGGING == + +The debug.h header file consists of several macros useful for debugging +(dprintf, dlog). There are multiple uses of these macros throughout the above +files. + +In order to turn debug messages on, you must define the DEBUG macro, either in +a header file, or, I suggest, in the Makefile. The LOG_LEVEL macro limits the +log message types that are to be printed, by default LOG_WARNING (see enum in +debug.h). You may redefine it in a header file or in the Makefile. + +Rapid enabling of debug messages is achieved by commenting out the CPPFLAGS +line in the Makefile. It turns on debugging and enables all log messages +(LOG_DEBUG). + +== OTHER == + +srand48() and drand48() are used for generating random numbers. diff --git a/tools/labs/templates/assignments/4-stp/checker/_checker b/tools/labs/templates/assignments/4-stp/checker/_checker new file mode 100755 index 00000000000000..a2a588a66df0fa --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/checker/_checker @@ -0,0 +1,24 @@ +#!/bin/sh + +first_test=1 +last_test=32 +executable=stp_test + +for i in $(seq $first_test $last_test); do + ./"$executable" $i +done | tee results.txt + +cat results.txt | grep '\[.*\]$' | awk -F '[] /[]+' ' +BEGIN { + sum=0 +} + +{ + sum += $2; +} + +END { + printf "\n%66s [%03d/100]\n", "Total:", sum; +}' + +rm -f results.txt diff --git a/tools/labs/templates/assignments/4-stp/checker/_test/Makefile b/tools/labs/templates/assignments/4-stp/checker/_test/Makefile new file mode 100644 index 00000000000000..d5074dd464a5f5 --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/checker/_test/Makefile @@ -0,0 +1,11 @@ +#CPPFLAGS = -DDEBUG -DLOG_LEVEL=LOG_DEBUG +CFLAGS = -Wall -g -m32 + +.PHONY: all clean + +all: stp_test.o + +stp_test.o: stp_test.c stp_test.h stp.h test.h util.h debug.h + +clean: + -rm -f *~ *.o diff --git a/tools/labs/templates/assignments/4-stp/checker/_test/debug.h b/tools/labs/templates/assignments/4-stp/checker/_test/debug.h new file mode 100644 index 00000000000000..a54e9622647181 --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/checker/_test/debug.h @@ -0,0 +1,77 @@ +/* + * debugging macros + * heavily inspired by previous work and Internet resources + * + * uses C99 variadic macros + * uses non-standard usage of the token-paste operator (##) for + * removing the comma symbol (,) when not followed by a token + * uses non-standard __FUNCTION__ macro (MSVC doesn't support __func__) + * tested on gcc 4.4.5 and Visual Studio 2008 (9.0), compiler version 15.00 + * + * Razvan Deaconescu, razvan.deaconescu@cs.pub.ro + */ + +#ifndef DEBUG_H_ +#define DEBUG_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* log levels */ +enum { + LOG_EMERG = 1, + LOG_ALERT, + LOG_CRIT, + LOG_ERR, + LOG_WARNING, + LOG_NOTICE, + LOG_INFO, + LOG_DEBUG +}; + +/* + * initialize default loglevel (for dlog) + * may be redefined in the including code + */ + +#ifndef LOG_LEVEL +#define LOG_LEVEL LOG_WARNING +#endif + +/* + * define DEBUG macro as a compiler option: + * -DDEBUG for GCC + * /DDEBUG for MSVC + */ + +#if defined DEBUG +#define dprintf(format, ...) \ + fprintf(stderr, " [%s(), %s:%u] " format, \ + __FUNCTION__, __FILE__, __LINE__, \ + ##__VA_ARGS__) +#else +#define dprintf(format, ...) \ + do { \ + } while (0) +#endif + +#if defined DEBUG +#define dlog(level, format, ...) \ + do { \ + if (level <= LOG_LEVEL) \ + dprintf(format, ##__VA_ARGS__); \ + } while (0) +#else +#define dlog(level, format, ...) \ + do { \ + } while (0) +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/labs/templates/assignments/4-stp/checker/_test/stp.h b/tools/labs/templates/assignments/4-stp/checker/_test/stp.h new file mode 100644 index 00000000000000..838f9936bf55c5 --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/checker/_test/stp.h @@ -0,0 +1,51 @@ +/* + * SO2 Transport Protocol + */ + +#ifndef STP_H_ +#define STP_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* STP reuses the defines of ancient protocols like Econet and Xerox PUP + * because adding a new protocol would involve patching the kernel, which we + * don't want to do and besides that, they are probably not used anymore. + */ +#define AF_STP 19 +#define PF_STP AF_STP +#define ETH_P_STP 0x0a00 + +struct stp_hdr { + __be16 dst; /* Destination port */ + __be16 src; /* Source port */ + __be16 len; /* Total length, including header */ + __u8 flags; /* */ + __u8 csum; /* xor of all bytes, including header */ +}; + +struct sockaddr_stp { + unsigned short sas_family; /* Always AF_STP */ + int sas_ifindex; /* Interface index */ + __be16 sas_port; /* Port */ + __u8 sas_addr[6]; /* MAC address */ +}; + +/* STP protocol name; used as identifier in /proc/net/protocols */ +#define STP_PROTO_NAME "STP" + +/* + * STP uses proc interface to communicate statistical information to + * user space (in /proc/net/). + */ +#define STP_PROC_NET_FILENAME "stp_stats" +#define STP_PROC_FULL_FILENAME "/proc/net/" STP_PROC_NET_FILENAME + +#ifdef __cplusplus +} +#endif + +#endif /* STP_H_ */ diff --git a/tools/labs/templates/assignments/4-stp/checker/_test/stp_test.c b/tools/labs/templates/assignments/4-stp/checker/_test/stp_test.c new file mode 100644 index 00000000000000..d6c729e344e634 --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/checker/_test/stp_test.c @@ -0,0 +1,1331 @@ +/* + * SO2 Transport Protocol - test suite + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test.h" +#include "debug.h" +#include "util.h" + +#include "stp.h" +#include "stp_test.h" + +#define SSA struct sockaddr +#define BUFLEN 32 + +/* declared in test.h; used for printing information in test macro */ +int max_points = 100; + +/* values read from STP_PROC_FULL_FILENAME */ +static int rx_pkts, hdr_err, csum_err, no_sock, no_buffs, tx_pkts; + +enum socket_action { + ACTION_SENDTO, + ACTION_SENDMSG, + ACTION_SEND, + ACTION_SENDTO_PING_PONG, + ACTION_SENDMSG_PING_PONG, + ACTION_SEND_PING_PONG, +}; + +/* + * Do initialization for STP test functions. + */ + +static void init_test(void) +{ + system("insmod " MODULE_FILENAME); +} + +/* + * Do cleanup for STP test functions. + */ + +static void cleanup_test(void) +{ + system("rmmod " MODULE_NAME); +} + +/* + * Check for successful module insertion and removal from the kernel. + */ + +static void test_insmod_rmmod(void) +{ + int rc; + + rc = system("insmod " MODULE_FILENAME); + test("test_insmod", rc == 0, 1); + + rc = system("rmmod " MODULE_NAME); + test("test_rmmod", rc == 0, 1); + + rc = system("insmod " MODULE_FILENAME); + test(__FUNCTION__, rc == 0, 1); + + system("rmmod " MODULE_NAME); +} + +/* + * Check /proc/net/protocols for STP protocol. Grep for line starting with + * the string identified by STP_PROTO_NAME. + */ + +static void test_proto_name_exists_after_insmod(void) +{ + int rc; + + init_test(); + + rc = system("grep '^" STP_PROTO_NAME "' /proc/net/protocols > /dev/null 2>&1"); + test(__FUNCTION__, rc == 0, 2); + + cleanup_test(); +} + +/* + * STP entry in /proc/net/protocols is deleted when module is removed. + */ + +static void test_proto_name_inexistent_after_rmmod(void) +{ + int rc; + + init_test(); + cleanup_test(); + + rc = system("grep '^" STP_PROTO_NAME "' /proc/net/protocols > /dev/null 2>&1"); + test(__FUNCTION__, rc != 0, 2); +} + +/* + * Check for proc entry for STP statistics. + */ + +static void test_proc_entry_exists_after_insmod(void) +{ + int rc; + + init_test(); + + rc = access(STP_PROC_FULL_FILENAME, F_OK); + test(__FUNCTION__, rc == 0, 2); + + cleanup_test(); +} + +/* + * STP statistics file in /proc/net/ is deleted when module is removed. + */ + +static void test_proc_entry_inexistent_after_rmmod(void) +{ + int rc; + + init_test(); + cleanup_test(); + + rc = system("file " STP_PROC_FULL_FILENAME " > /dev/null 2>&1"); + test(__FUNCTION__, rc != 0, 2); +} + +/* + * Call socket(2) with proper arguments for creating an AF_STP socket. + */ + +static void test_socket(void) +{ + int s; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, 0); + test(__FUNCTION__, s > 0, 5); + + close(s); + cleanup_test(); +} + +/* + * Create two AF_STP sockets using socket(2). + */ + +static void test_two_sockets(void) +{ + int s1, s2; + + init_test(); + + s1 = socket(AF_STP, SOCK_DGRAM, 0); + s2 = socket(AF_STP, SOCK_DGRAM, 0); + test(__FUNCTION__, s1 > 0 && s2 > 0 && s1 != s2, 2); + + close(s1); + close(s2); + cleanup_test(); +} + +/* + * Pass bad socket type argument to socket(2) (second argument). + * Call should fail. + */ + +static void test_socket_bad_socket_type(void) +{ + int s; + + init_test(); + + s = socket(AF_STP, SOCK_STREAM, 0); + test(__FUNCTION__, s < 0, 1); + + close(s); + cleanup_test(); +} + +/* + * Pass bad protocol argument to socket(2) (third argument). + * Call should fail. + */ + +static void test_socket_bad_protocol(void) +{ + int s; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, IPPROTO_TCP); + test(__FUNCTION__, s < 0, 1); + + close(s); + cleanup_test(); +} + +/* + * Close open socket using close(2). + */ + +static void test_close(void) +{ + int s; + int rc; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + rc = close(s); + test(__FUNCTION__, rc == 0, 2); + + cleanup_test(); +} + +/* + * Pass closed socket descriptor to close(2). Call should fail. + */ + +static void test_close_closed_socket(void) +{ + int s; + int rc; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + close(s); + rc = close(s); + + test(__FUNCTION__, rc < 0, 2); + + cleanup_test(); +} + +/* + * Bind socket to proper address. Use "all" interface. + */ + +static void test_bind(void) +{ + int s; + int rc; + struct sockaddr_stp sas; + const unsigned short port = 12345; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + sas.sas_family = AF_STP; + sas.sas_port = htons(port); + sas.sas_ifindex = 0; + rc = bind(s, (struct sockaddr *) &sas, sizeof(struct sockaddr_stp)); + + test(__FUNCTION__, rc == 0, 5); + + close(s); + cleanup_test(); +} + +/* + * Bind socket to proper address. Use "eth0" interface. + */ + +static void test_bind_eth0(void) +{ + int s; + int rc; + struct sockaddr_stp sas; + const unsigned short port = 12345; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + sas.sas_family = AF_STP; + sas.sas_port = htons(port); + sas.sas_ifindex = if_nametoindex("eth0"); + rc = bind(s, (struct sockaddr *) &sas, sizeof(struct sockaddr_stp)); + + test(__FUNCTION__, rc == 0, 2); + + close(s); + cleanup_test(); +} + +/* + * Use bind(2) on two AF_STP sockets. + */ + +static void test_two_binds(void) +{ + int s1, s2; + int rc1, rc2; + struct sockaddr_stp sas1, sas2; + const unsigned short port1 = 12345, port2 = 54321; + + init_test(); + + s1 = socket(AF_STP, SOCK_DGRAM, 0); + + sas1.sas_family = AF_STP; + sas1.sas_port = htons(port1); + sas1.sas_ifindex = 0; + rc1 = bind(s1, (struct sockaddr *) &sas1, sizeof(struct sockaddr_stp)); + + s2 = socket(AF_STP, SOCK_DGRAM, 0); + + sas2.sas_family = AF_STP; + sas2.sas_port = htons(port2); + sas2.sas_ifindex = 0; + rc2 = bind(s2, (struct sockaddr *) &sas2, sizeof(struct sockaddr_stp)); + + test(__FUNCTION__, rc1 == 0 && rc2 == 0, 2); + + close(s1); close(s2); + cleanup_test(); +} + +/* + * Pass bad address to bind(2) (second argument). + * Call should fail. + */ + +static void test_bind_bad_address(void) +{ + int s; + int rc; + struct sockaddr_stp sas; + const unsigned short port = 12345; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + sas.sas_family = AF_INET; /* invalid */ + sas.sas_port = htons(port); + sas.sas_ifindex = 0; + rc = bind(s, (struct sockaddr *) &sas, sizeof(struct sockaddr_stp)); + + test(__FUNCTION__, rc != 0, 1); + + close(s); + cleanup_test(); +} + +/* + * Use bind(2) on two AF_STP sockets using same port and "all" interface. + * Call should fail. + */ + +static void test_two_binds_same_if(void) +{ + int s1, s2; + int rc1, rc2; + struct sockaddr_stp sas1, sas2; + const unsigned short port = 12345; + + init_test(); + + s1 = socket(AF_STP, SOCK_DGRAM, 0); + + sas1.sas_family = AF_STP; + sas1.sas_port = htons(port); + sas1.sas_ifindex = 0; + rc1 = bind(s1, (struct sockaddr *) &sas1, sizeof(struct sockaddr_stp)); + + s2 = socket(AF_STP, SOCK_DGRAM, 0); + + sas2.sas_family = AF_STP; + sas2.sas_port = htons(port); + sas2.sas_ifindex = 0; + rc2 = bind(s2, (struct sockaddr *) &sas2, sizeof(struct sockaddr_stp)); + + test(__FUNCTION__, rc1 == 0 && rc2 < 0, 2); + + close(s1); close(s2); + cleanup_test(); +} + +/* + * Use bind(2) on two AF_STP sockets using same port and same interface. + * Call should fail. + */ + +static void test_two_binds_same_if_eth0(void) +{ + int s1, s2; + int rc1, rc2; + struct sockaddr_stp sas1, sas2; + const unsigned short port = 12345; + + init_test(); + + s1 = socket(AF_STP, SOCK_DGRAM, 0); + + sas1.sas_family = AF_STP; + sas1.sas_port = htons(port); + sas1.sas_ifindex = if_nametoindex("eth0"); + rc1 = bind(s1, (struct sockaddr *) &sas1, sizeof(struct sockaddr_stp)); + + s2 = socket(AF_STP, SOCK_DGRAM, 0); + + sas2.sas_family = AF_STP; + sas2.sas_port = htons(port); + sas2.sas_ifindex = if_nametoindex("eth0"); + rc2 = bind(s2, (struct sockaddr *) &sas2, sizeof(struct sockaddr_stp)); + + test(__FUNCTION__, rc1 == 0 && rc2 < 0, 2); + + close(s1); close(s2); + cleanup_test(); +} + +/* + * Use bind(2) on two AF_STP sockets using same port and "all" interface and + * "eth0". + * Call should fail. + */ + +static void test_two_binds_same_if_all_eth0(void) +{ + int s1, s2; + int rc1, rc2; + struct sockaddr_stp sas1, sas2; + const unsigned short port = 12345; + + init_test(); + + s1 = socket(AF_STP, SOCK_DGRAM, 0); + + sas1.sas_family = AF_STP; + sas1.sas_port = htons(port); + sas1.sas_ifindex = 0; + rc1 = bind(s1, (struct sockaddr *) &sas1, sizeof(struct sockaddr_stp)); + + s2 = socket(AF_STP, SOCK_DGRAM, 0); + + sas2.sas_family = AF_STP; + sas2.sas_port = htons(port); + sas2.sas_ifindex = if_nametoindex("eth0"); + rc2 = bind(s2, (struct sockaddr *) &sas2, sizeof(struct sockaddr_stp)); + + test(__FUNCTION__, rc1 == 0 && rc2 < 0, 2); + + close(s1); close(s2); + cleanup_test(); +} + +/* + * Use bind(2) on two AF_STP sockets using same port and "eth0" interface and + * "all". + * Call should fail. + */ + +static void test_two_binds_same_if_eth0_all(void) +{ + int s1, s2; + int rc1, rc2; + struct sockaddr_stp sas1, sas2; + const unsigned short port = 12345; + + init_test(); + + s1 = socket(AF_STP, SOCK_DGRAM, 0); + + sas1.sas_family = AF_STP; + sas1.sas_port = htons(port); + sas1.sas_ifindex = if_nametoindex("eth0"); + rc1 = bind(s1, (struct sockaddr *) &sas1, sizeof(struct sockaddr_stp)); + + s2 = socket(AF_STP, SOCK_DGRAM, 0); + + sas2.sas_family = AF_STP; + sas2.sas_port = htons(port); + sas2.sas_ifindex = 0; + rc2 = bind(s2, (struct sockaddr *) &sas2, sizeof(struct sockaddr_stp)); + + test(__FUNCTION__, rc1 == 0 && rc2 < 0, 2); + + close(s1); close(s2); + cleanup_test(); +} + +static ssize_t sendto_message(int sockfd, struct sockaddr_stp *sas, + char *buf, size_t len) +{ + return sendto(sockfd, buf, len, 0, (SSA *) sas, sizeof(*sas)); +} + +static ssize_t sendmsg_message(int sockfd, struct sockaddr_stp *sas, + char *buf, size_t len) +{ + struct iovec iov; + struct msghdr msg; + + iov.iov_base = buf; + iov.iov_len = len; + msg.msg_name = sas; + msg.msg_namelen = sizeof(*sas); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + return sendmsg(sockfd, &msg, 0); +} + +static ssize_t send_message(int sockfd, char *buf, size_t len) +{ + return send(sockfd, buf, len, 0); +} + +/* + * Use recvfrom(2) to receive message. We don't care what is the source + * address of the message. + */ + +static ssize_t recvfrom_message(int sockfd, char *buf, size_t len) +{ + dprintf("ready to receive using recvfrom\n"); + return recvfrom(sockfd, buf, len, 0, NULL, NULL); +} + +/* + * Use recvmsg(2) to receive message. We don't care what is the source + * address of the message. + */ + +static ssize_t recvmsg_message(int sockfd, char *buf, size_t len) +{ + struct iovec iov; + struct msghdr msg; + + iov.iov_base = buf; + iov.iov_len = len; + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + return recvmsg(sockfd, &msg, 0); +} + +/* + * Can not use recv(2) on datagram sockets. call recvfrom_message(). + */ + +static ssize_t recv_message(int sockfd, char *buf, size_t len) +{ + dprintf("ready to receive using recv\n"); + return recv(sockfd, buf, len, 0); +} + +/* + * Use sendto(2) on a socket. + */ + +static void test_sendto(void) +{ + int s; + int rc; + struct sockaddr_stp sas, remote_sas; + const unsigned short port = 12345, remote_port = 54321; + char bufout[BUFLEN] = DEFAULT_SENDER_MESSAGE; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + sas.sas_family = AF_STP; + sas.sas_port = htons(port); + sas.sas_ifindex = if_nametoindex("lo"); + bind(s, (struct sockaddr *) &sas, sizeof(struct sockaddr_stp)); + + remote_sas.sas_family = AF_STP; + remote_sas.sas_port = htons(remote_port); + remote_sas.sas_ifindex = 0; + memcpy(remote_sas.sas_addr, ether_aton("00:00:00:00:00:00"), + sizeof(remote_sas.sas_addr)); + + rc = sendto_message(s, &remote_sas, bufout, BUFLEN); + + test(__FUNCTION__, rc >= 0, 5); + + close(s); + cleanup_test(); +} + +/* + * Use sendmsg(2) on a socket. + */ + +static void test_sendmsg(void) +{ + int s; + int rc; + struct sockaddr_stp sas, remote_sas; + const unsigned short port = 12345, remote_port = 54321; + char bufout[BUFLEN] = DEFAULT_SENDER_MESSAGE; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + sas.sas_family = AF_STP; + sas.sas_port = htons(port); + sas.sas_ifindex = if_nametoindex("lo"); + bind(s, (struct sockaddr *) &sas, sizeof(struct sockaddr_stp)); + + remote_sas.sas_family = AF_STP; + remote_sas.sas_port = htons(remote_port); + remote_sas.sas_ifindex = 0; + memcpy(remote_sas.sas_addr, ether_aton("00:00:00:00:00:00"), + sizeof(remote_sas.sas_addr)); + + rc = sendmsg_message(s, &remote_sas, bufout, BUFLEN); + + test(__FUNCTION__, rc >= 0, 3); + + close(s); + cleanup_test(); +} + +/* + * Connect local socket to remote AF_STP socket. + */ + +static void test_connect(void) +{ + int s; + int rc; + struct sockaddr_stp sas, remote_sas; + const unsigned short port = 12345, remote_port = 54321; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + sas.sas_family = AF_STP; + sas.sas_port = htons(port); + sas.sas_ifindex = if_nametoindex("lo"); + bind(s, (struct sockaddr *) &sas, sizeof(struct sockaddr_stp)); + + remote_sas.sas_family = AF_STP; + remote_sas.sas_port = htons(remote_port); + remote_sas.sas_ifindex = 0; + memcpy(remote_sas.sas_addr, ether_aton("00:00:00:00:00:00"), + sizeof(remote_sas.sas_addr)); + + rc = connect(s, (struct sockaddr *) &remote_sas, sizeof(remote_sas)); + + test(__FUNCTION__, rc >= 0, 5); + + close(s); + cleanup_test(); +} + +/* + * Use send(2) on a connected socket. + */ + +static void test_send(void) +{ + int s; + int rc; + struct sockaddr_stp sas, remote_sas; + const unsigned short port = 12345, remote_port = 54321; + char bufout[BUFLEN] = DEFAULT_SENDER_MESSAGE; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + sas.sas_family = AF_STP; + sas.sas_port = htons(port); + sas.sas_ifindex = if_nametoindex("lo"); + bind(s, (struct sockaddr *) &sas, sizeof(struct sockaddr_stp)); + + remote_sas.sas_family = AF_STP; + remote_sas.sas_port = htons(remote_port); + remote_sas.sas_ifindex = 0; + memcpy(remote_sas.sas_addr, ether_aton("00:00:00:00:00:00"), + sizeof(remote_sas.sas_addr)); + + rc = connect(s, (SSA *) &remote_sas, sizeof(remote_sas)); + assert(rc == 0); + + rc = send_message(s, bufout, BUFLEN); + + test(__FUNCTION__, rc >= 0, 5); + + close(s); + cleanup_test(); +} + +/* + * Read values from STP_PROC_FULL_FILENAME. + */ + +static int stp_proc_read_values(void) +{ + char buffer[256]; + FILE *f; + + f = fopen(STP_PROC_FULL_FILENAME, "rt"); + if (f == NULL) + return -1; + + /* read column line */ + fgets(buffer, 256, f); + + /* read values line */ + fscanf(f, "%d %d %d %d %d %d", + &rx_pkts, &hdr_err, &csum_err, &no_sock, &no_buffs, &tx_pkts); + dprintf("read: %d %d %d %d %d %d\n", + rx_pkts, hdr_err, csum_err, no_sock, no_buffs, tx_pkts); + + fclose(f); + + return 0; +} + +/* + * Send packet updates RxPkts column in STP_PROC_FULL_FILENAME. + * Expected values are 1, 1. + */ + +static void test_stat_tx(void) +{ + int s; + int rc; + struct sockaddr_stp sas, remote_sas; + const unsigned short port = 12345, remote_port = 54321; + char bufout[BUFLEN] = DEFAULT_SENDER_MESSAGE; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + sas.sas_family = AF_STP; + sas.sas_port = htons(port); + sas.sas_ifindex = if_nametoindex("lo"); + bind(s, (struct sockaddr *) &sas, sizeof(struct sockaddr_stp)); + + remote_sas.sas_family = AF_STP; + remote_sas.sas_port = htons(remote_port); + remote_sas.sas_ifindex = 0; + memcpy(remote_sas.sas_addr, ether_aton("00:00:00:00:00:00"), + sizeof(remote_sas.sas_addr)); + + rc = connect(s, (SSA *) &remote_sas, sizeof(remote_sas)); + assert(rc == 0); + + send_message(s, bufout, BUFLEN); + + close(s); + + stp_proc_read_values(); + + test(__FUNCTION__, tx_pkts == 1, 3); + + cleanup_test(); +} + +/* + * Start sender process. + * + * action switches between sendto(2), sendmsg(2), send(2) and whether + * to do ping_pong or not. + */ + +static pid_t start_sender(enum socket_action action) +{ + pid_t pid; + int s; + struct sockaddr_stp sas, remote_sas; + const unsigned short port = 12345, remote_port = 54321; + char bufin[BUFLEN]; + char bufout[BUFLEN] = DEFAULT_SENDER_MESSAGE; + ssize_t bytes_recv = 0, bytes_sent = 0; + sem_t *sem; + + /* set bufin to 0 for testing purposes (it should be overwritten) */ + memset(bufin, 0, BUFLEN); + + pid = fork(); + DIE(pid < 0, "fork"); + + switch (pid) { + case 0: /* child process */ + break; + + default: /* parent process */ + return pid; + } + + /* only child process (sender) is running */ + + sem = sem_open(SEM_NAME_SENDER, 0); + if (sem == SEM_FAILED) + exit(EXIT_FAILURE); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + sas.sas_family = AF_STP; + sas.sas_port = htons(port); + sas.sas_ifindex = if_nametoindex("lo"); + bind(s, (struct sockaddr *) &sas, sizeof(struct sockaddr_stp)); + + remote_sas.sas_family = AF_STP; + remote_sas.sas_port = htons(remote_port); + remote_sas.sas_ifindex = 0; + memcpy(remote_sas.sas_addr, ether_aton("00:00:00:00:00:00"), + sizeof(remote_sas.sas_addr)); + if (action == ACTION_SEND || action == ACTION_SEND_PING_PONG) { + int rc; + + rc = connect(s, (SSA *) &remote_sas, sizeof(remote_sas)); + assert(rc == 0); + } + + switch (action) { + case ACTION_SENDTO: + case ACTION_SENDTO_PING_PONG: + bytes_sent = sendto_message(s, &remote_sas, bufout, BUFLEN); + dprintf("sent %s\n", bufout); + break; + + case ACTION_SENDMSG: + case ACTION_SENDMSG_PING_PONG: + bytes_sent = sendmsg_message(s, &remote_sas, bufout, BUFLEN); + dprintf("sent %s\n", bufout); + break; + + case ACTION_SEND: + case ACTION_SEND_PING_PONG: + bytes_sent = send_message(s, bufout, BUFLEN); + dprintf("sent %s\n", bufout); + break; + + default: + break; + } + + switch (action) { + case ACTION_SENDTO_PING_PONG: + bytes_recv = recvfrom_message(s, bufin, BUFLEN); + dprintf("received %s\n", bufin); + break; + case ACTION_SENDMSG_PING_PONG: + bytes_recv = recvmsg_message(s, bufin, BUFLEN); + dprintf("received %s\n", bufin); + break; + case ACTION_SEND_PING_PONG: + bytes_recv = recv_message(s, bufin, BUFLEN); + dprintf("received %s\n", bufin); + break; + default: + break; + } + + /* Let the parent know we're done. */ + sem_post(sem); + + /* exit with EXIT_SUCCESS in case of successful communication */ + switch (action) { + case ACTION_SENDTO: + case ACTION_SEND: + case ACTION_SENDMSG: + if (bytes_sent > 0) + exit(EXIT_SUCCESS); + break; + + case ACTION_SENDMSG_PING_PONG: + case ACTION_SENDTO_PING_PONG: + case ACTION_SEND_PING_PONG: + dprintf("(ping_pong) bytes_sent: %d, bytes_recv: %d, strcmp: %d\n", + bytes_sent, bytes_recv, strcmp(bufin, bufout)); + dprintf("bufin: #%s#, bufout: #%s#\n", bufin, bufout); + if (bytes_sent > 0 && bytes_recv > 0 && + strcmp(bufin, DEFAULT_RECEIVER_MESSAGE) == 0) + exit(EXIT_SUCCESS); + break; + } + + exit(EXIT_FAILURE); + + /* is not reached */ + return 0; +} + +/* + * Start receiver process. + * + * action switches between sendto(2), sendmsg(2), send(2) and whether + * to do ping_pong or not. + */ + +static pid_t start_receiver(enum socket_action action) +{ + pid_t pid; + int s; + struct sockaddr_stp sas, remote_sas; + const unsigned short port = 54321, remote_port = 12345; + char bufin[BUFLEN]; + char bufout[BUFLEN] = DEFAULT_RECEIVER_MESSAGE; + ssize_t bytes_recv = 0, bytes_sent = 0; + sem_t *sem; + + /* set bufin to 0 for testing purposes (it should be overwritten) */ + memset(bufin, 0, BUFLEN); + + pid = fork(); + DIE(pid < 0, "fork"); + + switch (pid) { + case 0: /* child process */ + break; + + default: /* parent process */ + return pid; + } + + /* only child process (receiver) is running */ + + sem = sem_open(SEM_NAME_RECEIVER, 0); + if (sem == SEM_FAILED) + exit(EXIT_FAILURE); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + sas.sas_family = AF_STP; + sas.sas_port = htons(port); + sas.sas_ifindex = if_nametoindex("lo"); + bind(s, (struct sockaddr *) &sas, sizeof(struct sockaddr_stp)); + + remote_sas.sas_family = AF_STP; + remote_sas.sas_port = htons(remote_port); + remote_sas.sas_ifindex = 0; + memcpy(remote_sas.sas_addr, ether_aton("00:00:00:00:00:00"), + sizeof(remote_sas.sas_addr)); + + if (action == ACTION_SEND || action == ACTION_SEND_PING_PONG) { + int rc; + + rc = connect(s, (SSA *) &remote_sas, sizeof(remote_sas)); + assert(rc == 0); + dprintf("connected\n"); + } + + /* We're set up, let the parent know. */ + sem_post(sem); + + switch (action) { + case ACTION_SENDTO: + case ACTION_SENDTO_PING_PONG: + bytes_recv = recvfrom_message(s, bufin, BUFLEN); + dprintf("received %s\n", bufin); + break; + + case ACTION_SENDMSG: + case ACTION_SENDMSG_PING_PONG: + bytes_recv = recvmsg_message(s, bufin, BUFLEN); + dprintf("received %s\n", bufin); + break; + + case ACTION_SEND: + case ACTION_SEND_PING_PONG: + bytes_recv = recv_message(s, bufin, BUFLEN); + dprintf("received %s\n", bufin); + break; + + default: + break; + } + + switch (action) { + case ACTION_SENDTO_PING_PONG: + bytes_sent = sendto_message(s, &remote_sas, bufout, BUFLEN); + dprintf("sent %s\n", bufout); + break; + case ACTION_SENDMSG_PING_PONG: + bytes_sent = sendmsg_message(s, &remote_sas, bufout, BUFLEN); + dprintf("sent %s\n", bufout); + break; + case ACTION_SEND_PING_PONG: + bytes_sent = send_message(s, bufout, BUFLEN); + dprintf("sent %s\n", bufout); + break; + default: + break; + } + + /* Let the parent know we're done. */ + sem_post(sem); + + /* exit with EXIT_SUCCESS in case of successful communication */ + switch (action) { + case ACTION_SENDTO: + case ACTION_SEND: + case ACTION_SENDMSG: + if (bytes_recv > 0) + exit(EXIT_SUCCESS); + break; + + case ACTION_SENDMSG_PING_PONG: + case ACTION_SENDTO_PING_PONG: + case ACTION_SEND_PING_PONG: + dprintf("(ping_pong) bytes_sent: %d, bytes_recv: %d\n", + bytes_sent, bytes_recv); + dprintf("bufin: #%s#, bufout: #%s#\n", bufin, bufout); + if (bytes_recv > 0 && bytes_sent > 0 && + strcmp(bufin, DEFAULT_SENDER_MESSAGE) == 0) + exit(EXIT_SUCCESS); + break; + } + + exit(EXIT_FAILURE); + + /* is not reached */ + return 0; +} + +int wait_for_semaphore(sem_t *sem, unsigned int secs) +{ + struct timespec ts; + int ret; + + ret = clock_gettime(CLOCK_REALTIME, &ts); + assert(ret == 0); + + ts.tv_sec += secs; + + ret = sem_timedwait(sem, &ts); + return ret; +} + +/* + * Wrapper call for running a sender and a receiver process. + * + * action switches between sendto(2), sendmsg(2), send(2) and whether + * to do ping_pong or not. + * + * Returns boolean value: 1 in case of successful run, 0 otherwise. + */ + +static int run_sender_receiver(enum socket_action action) +{ + pid_t pid_r = 0, pid_s = 0; + int rc1, rc2, ret; + int status1, status2; + sem_t *sem_r, *sem_s; + + /* Create two named semaphores used to communicate + * with the child processes + */ + sem_r = sem_open(SEM_NAME_RECEIVER, O_CREAT, (mode_t)0644, 0); + assert(sem_r != SEM_FAILED); + sem_s = sem_open(SEM_NAME_SENDER, O_CREAT, (mode_t)0644, 0); + assert(sem_s != SEM_FAILED); + + /* start the receiver */ + pid_r = start_receiver(action); + assert(pid_r > 0); + /* wait for it to bind */ + wait_for_semaphore(sem_r, RECV_TIMEOUT); + + /* Receiver is set up, start the sender now. */ + pid_s = start_sender(action); + assert(pid_s > 0); + + /* Wait for both to finish. */ + rc1 = wait_for_semaphore(sem_r, SENDRECV_TIMEOUT); + ret = waitpid(pid_r, &status1, rc1 ? WNOHANG : 0); + assert(ret >= 0); + kill(pid_r, SIGTERM); kill(pid_r, SIGKILL); + + rc2 = wait_for_semaphore(sem_s, SENDRECV_TIMEOUT); + ret = waitpid(pid_s, &status2, rc2 ? WNOHANG : 0); + assert(ret >= 0); + kill(pid_s, SIGTERM); kill(pid_s, SIGKILL); + + sem_close(sem_r); sem_unlink(SEM_NAME_RECEIVER); + sem_close(sem_s); sem_unlink(SEM_NAME_SENDER); + + return !rc1 && !rc2 && + WIFEXITED(status1) && WEXITSTATUS(status1) == EXIT_SUCCESS && + WIFEXITED(status2) && WEXITSTATUS(status2) == EXIT_SUCCESS; +} + +/* + * Send a datagram on one end and receive it on the other end. + * Use sendto(2) and recvfrom(2). + */ + +static void test_sendto_recvfrom(void) +{ + int rc; + + init_test(); + + rc = run_sender_receiver(ACTION_SENDTO); + + test(__FUNCTION__, rc != 0, 10); + + cleanup_test(); +} + +/* + * Send and receive packet updates RxPkts and TxPkts columns in + * STP_PROC_FULL_FILENAME. Expected values are 1, 1. + */ + +static void test_stat_tx_rx(void) +{ + init_test(); + + run_sender_receiver(ACTION_SENDTO); + + stp_proc_read_values(); + + test(__FUNCTION__, tx_pkts == 1 && rx_pkts == 1, 3); + + cleanup_test(); +} + +/* + * Send a packet and then wait for a reply. + */ + +static void test_sendto_recvfrom_ping_pong(void) +{ + int rc; + + init_test(); + + rc = run_sender_receiver(ACTION_SENDTO_PING_PONG); + + test(__FUNCTION__, rc != 0, 5); + + cleanup_test(); +} + +/* + * Send and receive ping pong updates RxPkts and TxPkts column in + * STP_PROC_FULL_FILENAME. Expected values are 2, 2. + */ + +static void test_stat_tx_rx_ping_pong(void) +{ + init_test(); + + run_sender_receiver(ACTION_SENDTO_PING_PONG); + + stp_proc_read_values(); + stp_proc_read_values(); + + test(__FUNCTION__, tx_pkts == 2 && rx_pkts == 2, 3); + + cleanup_test(); +} + +/* + * Send a datagram on one end and receive it on the other end. + * Use sendmsg(2) and recvmsg(2). + */ + +static void test_sendmsg_recvmsg(void) +{ + int rc; + + init_test(); + + rc = run_sender_receiver(ACTION_SENDMSG); + + test(__FUNCTION__, rc != 0, 5); + + cleanup_test(); +} + +/* + * Send a packet and then wait for a reply. + */ + +static void test_sendmsg_recvmsg_ping_pong(void) +{ + int rc; + + init_test(); + + rc = run_sender_receiver(ACTION_SENDMSG_PING_PONG); + + test(__FUNCTION__, rc != 0, 3); + + cleanup_test(); +} + +/* + * Send a packet on one end and receive it on the other end. + * Use send(2) and recv(2). + */ + +static void test_send_receive(void) +{ + int rc; + + init_test(); + + rc = run_sender_receiver(ACTION_SEND); + + test(__FUNCTION__, rc != 0, 5); + + cleanup_test(); +} + +/* + * Send a packet and then wait for a reply. + */ + +static void test_send_receive_ping_pong(void) +{ + int rc; + + init_test(); + + rc = run_sender_receiver(ACTION_SEND_PING_PONG); + + test(__FUNCTION__, rc != 0, 3); + + cleanup_test(); +} + +static void (*test_fun_array[])(void) = { + NULL, + test_insmod_rmmod, + test_proto_name_exists_after_insmod, + test_proto_name_inexistent_after_rmmod, + test_proc_entry_exists_after_insmod, + test_proc_entry_inexistent_after_rmmod, + test_socket, + test_two_sockets, + test_socket_bad_socket_type, + test_socket_bad_protocol, + test_close, + test_close_closed_socket, + test_bind, + test_bind_eth0, + test_two_binds, + test_bind_bad_address, + test_two_binds_same_if, + test_two_binds_same_if_eth0, + test_two_binds_same_if_all_eth0, + test_two_binds_same_if_eth0_all, + test_sendto, + test_sendmsg, + test_connect, + test_send, + test_stat_tx, + test_sendto_recvfrom, + test_stat_tx_rx, + test_sendto_recvfrom_ping_pong, + test_stat_tx_rx_ping_pong, + test_sendmsg_recvmsg, + test_sendmsg_recvmsg_ping_pong, + test_send_receive, + test_send_receive_ping_pong, +}; + +/* + * Usage message for invalid executable call. + */ + +static void usage(const char *argv0) +{ + fprintf(stderr, "Usage: %s test_no\n\n", argv0); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) +{ + int test_idx; + + if (argc != 2) + usage(argv[0]); + + test_idx = atoi(argv[1]); + + if (test_idx < 1 || + test_idx >= sizeof(test_fun_array)/sizeof(test_fun_array[0])) { + fprintf(stderr, "Error: test index %d is out of bounds\n", + test_idx); + exit(EXIT_FAILURE); + } + + srand(time(NULL)); + srand48(time(NULL)); + test_fun_array[test_idx](); + + return 0; +} diff --git a/tools/labs/templates/assignments/4-stp/checker/_test/stp_test.h b/tools/labs/templates/assignments/4-stp/checker/_test/stp_test.h new file mode 100644 index 00000000000000..fb708433c0269f --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/checker/_test/stp_test.h @@ -0,0 +1,31 @@ +/* + * SO2 Transport Protocol - test suite specific header + */ + +#ifndef STP_TEST_H_ +#define STP_TEST_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/* STP test suite macros and structures */ +#define MODULE_NAME "af_stp" +#define MODULE_FILENAME MODULE_NAME ".ko" + +#define SEM_NAME_RECEIVER "/receiver_sem" +#define SEM_NAME_SENDER "/sender_sem" + +/* timeouts waiting for receiver/sender */ +#define RECV_TIMEOUT 1 +#define SENDRECV_TIMEOUT 3 + +/* messages used for "ping-pong" between sender and receiver */ +#define DEFAULT_SENDER_MESSAGE "You called down the thunder" +#define DEFAULT_RECEIVER_MESSAGE "now reap the whirlwind" + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/labs/templates/assignments/4-stp/checker/_test/test.h b/tools/labs/templates/assignments/4-stp/checker/_test/test.h new file mode 100644 index 00000000000000..4bcafad9c7d0f5 --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/checker/_test/test.h @@ -0,0 +1,63 @@ +/* + * generic test suite + * + * test macros and headers + */ + +#ifndef TEST_H_ +#define TEST_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* to be defined by calling program */ +extern int max_points; + +/* + * uncommend EXIT_IF_FAIL macro in order to stop test execution + * at first failed test + */ + +/*#define EXIT_IF_FAIL 1*/ + +#if defined(EXIT_IF_FAIL) +#define test_do_fail(points) \ + do { \ + printf("failed\n"); \ + exit(EXIT_FAILURE); \ + } while (0) +#else +#define test_do_fail(points) \ + printf("failed [ 0/%3d]\n", max_points) +#endif + +#define test_do_pass(points) \ + printf("passed [%3d/%3d]\n", points, max_points) + +#define test(message, test, points) \ + do { \ + size_t i; \ + int t = (test); \ + \ + printf("%s", message); \ + fflush(stdout); \ + \ + for (i = 0; i < 60 - strlen(message); i++) \ + putchar('.'); \ + \ + if (!t) \ + test_do_fail(points); \ + else \ + test_do_pass(points); \ + \ + fflush(stdout); \ + } while (0) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/labs/templates/assignments/4-stp/checker/_test/util.h b/tools/labs/templates/assignments/4-stp/checker/_test/util.h new file mode 100644 index 00000000000000..f06cb833b99635 --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/checker/_test/util.h @@ -0,0 +1,69 @@ +/* + * useful structures/macros + */ + +#ifndef UTIL_H_ +#define UTIL_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#if defined(_WIN32) + +#include + +static VOID PrintLastError(const PCHAR message) +{ + CHAR errBuff[1024]; + + FormatMessage( + FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_MAX_WIDTH_MASK, + NULL, + GetLastError(), + 0, + errBuff, + sizeof(errBuff) - 1, + NULL); + + fprintf(stderr, "%s: %s\n", message, errBuff); +} + +#define ERR(call_description) \ + do { \ + fprintf(stderr, "(%s, %d): ", \ + __FILE__, __LINE__); \ + PrintLastError(call_description); \ + } while (0) + +#elif defined(__linux__) + +/* error printing macro */ +#define ERR(call_description) \ + do { \ + fprintf(stderr, "(%s, %d): ", \ + __FILE__, __LINE__); \ + perror(call_description); \ + } while (0) + +#else + #error "Unknown platform" +#endif + +/* print error (call ERR) and exit */ +#define DIE(assertion, call_description) \ + do { \ + if (assertion) { \ + ERR(call_description); \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/labs/templates/assignments/4-stp/stp.h b/tools/labs/templates/assignments/4-stp/stp.h new file mode 100644 index 00000000000000..838f9936bf55c5 --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/stp.h @@ -0,0 +1,51 @@ +/* + * SO2 Transport Protocol + */ + +#ifndef STP_H_ +#define STP_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* STP reuses the defines of ancient protocols like Econet and Xerox PUP + * because adding a new protocol would involve patching the kernel, which we + * don't want to do and besides that, they are probably not used anymore. + */ +#define AF_STP 19 +#define PF_STP AF_STP +#define ETH_P_STP 0x0a00 + +struct stp_hdr { + __be16 dst; /* Destination port */ + __be16 src; /* Source port */ + __be16 len; /* Total length, including header */ + __u8 flags; /* */ + __u8 csum; /* xor of all bytes, including header */ +}; + +struct sockaddr_stp { + unsigned short sas_family; /* Always AF_STP */ + int sas_ifindex; /* Interface index */ + __be16 sas_port; /* Port */ + __u8 sas_addr[6]; /* MAC address */ +}; + +/* STP protocol name; used as identifier in /proc/net/protocols */ +#define STP_PROTO_NAME "STP" + +/* + * STP uses proc interface to communicate statistical information to + * user space (in /proc/net/). + */ +#define STP_PROC_NET_FILENAME "stp_stats" +#define STP_PROC_FULL_FILENAME "/proc/net/" STP_PROC_NET_FILENAME + +#ifdef __cplusplus +} +#endif + +#endif /* STP_H_ */ diff --git a/tools/labs/templates/assignments/5-pitix/checker/Makefile b/tools/labs/templates/assignments/5-pitix/checker/Makefile new file mode 100644 index 00000000000000..e0f7e2f32f7e07 --- /dev/null +++ b/tools/labs/templates/assignments/5-pitix/checker/Makefile @@ -0,0 +1,19 @@ +# +# pitix filesystem - test Makefile +# + +CFLAGS = -Wall -Wextra -g -m32 +LDFLAGS = -static -m32 + +.PHONY: all + +all: mkfs.pitix + ln -sf _test/mkfs.pitix mkfs.pitix + +mkfs.pitix: _test/ + make -C _test/ + +clean: + -rm -f *~ + -rm -f mkfs.pitix + make -C _test/ clean diff --git a/tools/labs/templates/assignments/5-pitix/checker/README b/tools/labs/templates/assignments/5-pitix/checker/README new file mode 100644 index 00000000000000..6c28010cac375f --- /dev/null +++ b/tools/labs/templates/assignments/5-pitix/checker/README @@ -0,0 +1,46 @@ += PITIX FS TEST SUITE == + +Test suite for PITIX FS + +== FILES == + +README + * this file + +Makefile.checker + * Makefile for automating the build process + +_checker + * script to run all tests + +_test/mkfs.pitix.c + * tool for formatting a device to PITIX FS + +pitix.loop.gz + * image to be mounted by tests + +pitix.files.tar.gz + * files to be verified by tests + +create_pitix_loop.sh + * script to create pitix.loop.gz from pitix.files.tar.gz + * needs fully functional pitix.ko + * should be run in QEMU/KVM virtual machine + +== BUILDING == + +Use the Makefile to properly build the mkfs.pitix executable: + + make -f Makefile.checker + +== RUNNING == + +Copy your pitix.ko module and _checker, pitix.files.tar.gz, pitix.loop.gz and +mkfs.pitix to fsimg/root directory on your QEMU/KVM virtual machine. + +In order to run the test suite you can use the _checker script. + +The _checker script runs all tests: + + ./_checker + diff --git a/tools/labs/templates/assignments/5-pitix/checker/_checker b/tools/labs/templates/assignments/5-pitix/checker/_checker new file mode 100755 index 00000000000000..e717eedf7b73b8 --- /dev/null +++ b/tools/labs/templates/assignments/5-pitix/checker/_checker @@ -0,0 +1,383 @@ +#!/bin/sh + +mkfs_cmd=./mkfs.pitix + +inode_direct_data_blocks=5 +inode_size=32 + +test_ok=0 +test_no=0 + +# +# $1 - start string +# $2 - end string +# $3 - extra string +pretty_print() +{ + start_string="$1" + end_string="$2" + extra_string="$3" + dot_string="" + + n_dots=$((72 - ${#start_string} - ${#end_string})) + + for local_i in $(seq 0 $(($n_dots-1))); do + dot_string="${dot_string}." + done + + echo "$start_string$dot_string$end_string" + if ! test -z "$extra_string"; then + echo -e " -- $extra_string" + fi +} + +# +# $1 - string +# $2 - command +verbose_comm() +{ + start_string="$1" + shift + + eval $@ > /dev/null 2>&1; ret=$?; + if ! test "$ret" = "0"; then + end_string="failed" + else + end_string="ok" + fi + + pretty_print "$start_string" "$end_string" "" +} + + +# +# $1 - string +# $2 - command +check_true() +{ + test_no=$(($test_no+1)) + start_string="test $test_no: $1" + shift + + eval $@ > /dev/null 2>&1; ret=$?; + if ! test "$ret" = "0"; then + end_string="failed" + extra_string="$@" + ret=1 + else + test_ok=$(($test_ok+1)) + extra_string="" + end_string="ok" + ret=0 + fi + + pretty_print "$start_string" "$end_string" "$extra_string" + + return $ret +} + +# +# $1 - string +# $2 - command +check_false() +{ + test_no=$(($test_no+1)) + start_string="test $test_no: $1" + shift + + eval $@ > /dev/null 2>&1; ret=$?; + if test "$ret" = "0"; then + end_string="failed" + extra_string="$@" + else + test_ok=$(($test_ok+1)) + end_string="ok" + extra_string="" + fi + + pretty_print "$start_string" "$end_string" "$extra_string" +} + + +get_file_attrs() +{ + rights=0$(stat -c %a $1) + uid=$(stat -c %u $1) + gid=$(stat -c %g $1) + size=$(stat -c %s $1) + case "$(stat -c %F $1)" in + "regular file") type="-f" ;; + "regular empty file") type="-f" ;; +"directory") type="-d" ;; + *) type="-bad" ;; + esac +} + +# $1 file +# $2 size +# $3 rights +# $4 uid +# $5 gid +# $6 type +# $7 path to gold file +check_file() +{ + get_file_attrs /tmp/pitix.mnt/$1 + + check_true "check $1" test $6 /tmp/pitix.mnt/$1 + check_true "check $1 size" test "$size" = "$2" + check_true "check $1 rights" let \"$rights == $3\" + check_true "check $1 uid" let \"$uid == $4\" + check_true "check $1 gid" let \"$gid == $5\" + check_true "check $1 data" diff /tmp/pitix.mnt/$1 $7/$1 +} + +get_statfs() +{ + cfblocks=$(stat -f -c %f $1) + cfinodes=$(stat -f -c %d $1) + cblocks=$(stat -f -c %b $1) + cinodes=$(stat -f -c %c $1) +} + +check_statfs() +{ + get_statfs /tmp/pitix.mnt/ + check_true "check free blocks" test "$cfblocks" = "$fblocks" || echo "$cfblocks" = "$fblocks" + check_true "check free inodes" test "$cfinodes" = "$finodes" || echo "$cfinodes" = "$finodes" +} + + +# $1 file +# $2 size +# $3 rights +# $4 uid +# $5 gid +do_create() +{ + verbose_comm "generating random data" dd if=/dev/urandom bs=1 count=$2 \> /tmp/pitix.rw/$1 + chmod $3 /tmp/pitix.rw/$1 + chown $4.$5 /tmp/pitix.rw/$1 + check_true "copy file to /tmp/pitix.mnt/$1" \ + cp -p /tmp/pitix.rw/$1 /tmp/pitix.mnt/$1 + + if [ $? -eq 0 ]; then + finodes=$(($finodes-1)) + current_file_blocks=$((($2+$block_size-1)/$block_size)) + if [ $current_file_blocks -gt $inode_direct_data_blocks ]; then + current_file_blocks=$(($current_file_blocks+1)) + fi + fblocks=$(($fblocks-$current_file_blocks)) + fi + + check_file $1 $2 $3 $4 $5 -f /tmp/pitix.rw/ + check_statfs +} + +# $1 dir +# $2 rights +# $3 uid +# $4 gid +do_mkdir() +{ + mkdir /tmp/pitix.rw/$1 + chmod $2 /tmp/pitix.rw/$1 + chown $3.$4 /tmp/pitix.rw/$1 + + check_true "check mkdir" mkdir /tmp/pitix.mnt/$1 && \ + finodes=$(($finodes-1)) && \ + fblocks=$(($fblocks-1)) + + check_true "check chmod" chmod $2 /tmp/pitix.mnt/$1 + check_true "check chown" chown $3.$4 /tmp/pitix.mnt/$1 + + check_file $1 $block_size $2 $3 $4 -d /tmp/pitix.rw/ + check_statfs +} + +# $1 file +do_unlink() +{ + get_file_attrs /tmp/pitix.mnt/$1 + + rm -f /tmp/pitix.rw/$1 + check_true "check unlink 1" rm /tmp/pitix.mnt/$1 + check_true "check unlink 2" ! test -e /tmp/pitix.mnt/$1 + + if [ $? -eq 0 ]; then + finodes=$(($finodes+1)) + current_file_blocks=$((($size+$block_size-1)/$block_size)) + if [ $current_file_blocks -gt $inode_direct_data_blocks ]; then + current_file_blocks=$(($current_file_blocks+1)) + fi + fblocks=$(($fblocks+$current_file_blocks)) + fi + + check_statfs +} + +# $1 file +do_false_unlink() +{ + check_false "check false unlink " rm /tmp/pitix.mnt/$1 +} + + +# $1 file +do_rmdir() +{ + rmdir /tmp/pitix.rw/$1 + check_true "check rmdir 1" rmdir /tmp/pitix.mnt/$1 + check_true "check rmdir 2" ! test -e /tmp/pitix.mnt/$1 + finodes=$(($finodes+1)) + fblocks=$(($fblocks+1)) + check_statfs +} + +# $1 file +do_false_rmdir() +{ + check_false "check false rmdir" rmdir /tmp/pitix.mnt/$1 +} + + +# $1 file +do_truncate() +{ + get_file_attrs /tmp/pitix.mnt/$1 + old_blocks=$((($size+$block_size-1)/$block_size)) + + if [ $old_blocks -gt $inode_direct_data_blocks ]; then + old_blocks=$(($old_blocks+1)) + fi + + echo dummy > /tmp/pitix.rw/$1 + check_true "check truncate" echo dummy \> /tmp/pitix.mnt/$1 && \ + fblocks=$(($fblocks+$old_blocks-1)) + + check_file $1 6 $rights $uid $gid -f /tmp/pitix.rw/ + check_statfs +} + + +# $1 block size +test_rw() +{ + block_size=$1 + de_per_dir=$(($block_size/20)) + blocks=$((8*$block_size)) + inodes=$((32*$block_size/$inode_size)) + fblocks=$(($blocks-1)) + finodes=$(($inodes-1)) + + rm -Rf /tmp/pitix.loop /tmp/pitix.rw /tmp/pitix.mnt > /dev/null 2>&1; mkdir /tmp/pitix.rw; mkdir /tmp/pitix.mnt + + dd if=/dev/zero bs=1 count=4096 > /tmp/pitix.loop + check_false "mounting bad fs 1" mount -t pitix /tmp/pitix.loop /tmp/pitix.mnt/ -o loop + echo > /tmp/pitix.loop + check_false "mounting bad fs 2" mount -t pitix /tmp/pitix.loop /tmp/pitix.mnt/ -o loop + + verbose_comm "formating fs" "$mkfs_cmd" $1 /tmp/pitix.loop + check_true "mounting fs" mount -t pitix /tmp/pitix.loop /tmp/pitix.mnt/ -o loop + if [ $? -ne 0 ]; then + exit 1 + fi + + get_statfs /tmp/pitix.mnt + check_true "check total blocks" test "$cblocks" = "$blocks" || echo "$cblocks" = "$blocks" + check_true "check total inodes" test "$cinodes" = "$inodes" || echo "$cinodes" = "$inodes" + + do_mkdir lots_of_files 0777 0 0 + for i in $(seq 0 $(($de_per_dir-1))); do + do_create lots_of_files/file$i $(($i*1024)) 0$(($i%8))$((($i/2)%8))$((($i/4)%8)) $(($i%100)) $(($i*32%100)) + done + + check_false "dir overflow" touch /tmp/pitix.mnt/lots_of_files/the_drop + + check_statfs + + check_true "umounting fs" umount /tmp/pitix.mnt/ + check_true "mounting fs" mount -t pitix /tmp/pitix.loop /tmp/pitix.mnt/ -o loop + if [ $? -ne 0 ]; then + exit 1 + fi + + check_statfs + + for i in $(seq 0 $(($de_per_dir-1))); do + check_file lots_of_files/file$i $(($i*1024)) 0$(($i%8))$((($i/2)%8))$((($i/4)%8)) $(($i%100)) $(($i*32%100)) -f /tmp/pitix.rw/ + if let "$(($i%2)) == 0"; then + do_truncate lots_of_files/file$i + else + do_unlink lots_of_files/file$i + do_false_unlink lots_of_files/file$i + fi + done + + for i in $(seq 1 $(($de_per_dir-1))); do + do_mkdir dir$i 0666 0 0 + do_create dir$i/dummy 4096 0666 0 0 + done + + for i in $(seq 1 $(($de_per_dir-1))); do + do_false_rmdir dir$i + do_unlink dir$i/dummy + do_rmdir dir$i + done + + + check_false "module unloading" rmmod pitix + check_true "umounting fs" umount /tmp/pitix.mnt/ +} + +test_ro() +{ + rm -Rf /tmp/pitix.loop /tmp/pitix.mnt /tmp/pitix.ro > /dev/null 2>&1; mkdir /tmp/pitix.ro; mkdir /tmp/pitix.mnt + cp pitix.loop.gz /tmp/pitix.loop.gz + gunzip /tmp/pitix.loop.gz + tar xzf pitix.files.tar.gz -C /tmp/pitix.ro + + check_true "mounting fs" mount -t pitix /tmp/pitix.loop /tmp/pitix.mnt/ -o loop + if [ $? -ne 0 ]; then + exit 1 + fi + + for file in $(find /tmp/pitix.ro -type f | cut -f4- -d/); do + get_file_attrs /tmp/pitix.ro/$file + check_file $file $size $rights $uid $gid $type /tmp/pitix.ro/ + done + + check_false "module unloading" rmmod pitix + check_true "umounting fs" umount /tmp/pitix.mnt/ +} + +cleanup_world() +{ + umount /tmp/pitix.mnt > /dev/null 2>&1 + rmmod pitix > /dev/null 2>&1 + rm /tmp/pitix.loop > /dev/null 2>&1 + rm pitix.loop > /dev/null 2>&1 + rm -r /tmp/pitix.ro > /dev/null 2>&1 + rm -r /tmp/pitix.rw > /dev/null 2>&1 + rm -r /tmp/pitix.mnt > /dev/null 2>&1 +} + +cleanup_world + +mkdir -p /tmp + +check_true "module loading" insmod pitix.ko +check_true "test /proc/filesystems" grep pitix /proc/filesystems +test_ro +check_true "module unloading" rmmod pitix + +check_true "module loading" insmod pitix.ko +check_true "test /proc/filesystems" grep pitix /proc/filesystems +test_rw 512 +check_true "module unloading" rmmod pitix + +cleanup_world + +echo "Tests ok: $test_ok/$test_no" # could also display as percent + +# vim: set tabstop=4 shiftwidth=4: diff --git a/tools/labs/templates/assignments/5-pitix/checker/_test/.gitignore b/tools/labs/templates/assignments/5-pitix/checker/_test/.gitignore new file mode 100644 index 00000000000000..3a9802ba54214a --- /dev/null +++ b/tools/labs/templates/assignments/5-pitix/checker/_test/.gitignore @@ -0,0 +1 @@ +/mkfs.pitix diff --git a/tools/labs/templates/assignments/5-pitix/checker/_test/Makefile b/tools/labs/templates/assignments/5-pitix/checker/_test/Makefile new file mode 100644 index 00000000000000..f2f8374f1260b1 --- /dev/null +++ b/tools/labs/templates/assignments/5-pitix/checker/_test/Makefile @@ -0,0 +1,13 @@ +CFLAGS = -Wall -Wextra -g -m32 +LDFLAGS = -static -m32 + +.PHONY: all clean + +all: mkfs.pitix + +mkfs.pitix: mkfs.pitix.o + +mkfs.pitix.o: mkfs.pitix.c pitix.h + +clean: + -rm -f *~ mkfs.pitix.o mkfs.pitix diff --git a/tools/labs/templates/assignments/5-pitix/checker/_test/mkfs.pitix.c b/tools/labs/templates/assignments/5-pitix/checker/_test/mkfs.pitix.c new file mode 100644 index 00000000000000..75f9e3260e76ac --- /dev/null +++ b/tools/labs/templates/assignments/5-pitix/checker/_test/mkfs.pitix.c @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include + +#include "pitix.h" + +/* + * mkfs.pitix block_size file + */ + +int main(int argc, char **argv) +{ + FILE *file; + char buffer[4096]; + int block_size, bits, i; + struct pitix_super_block psb; + struct pitix_inode root_inode; + + if (argc != 3) + return -1; + + block_size = atoi(argv[1]); + + switch (block_size) { + case 512: + bits = 9; + break; + case 1024: + bits = 10; + break; + case 2048: + bits = 11; + break; + case 4096: + bits = 12; + break; + default: + return -1; + } + + file = fopen(argv[2], "w+"); + if (!file) + return -1; + + memset(&psb, 0, sizeof(struct pitix_super_block)); + + psb.magic = PITIX_MAGIC; + psb.version = PITIX_VERSION; + psb.block_size_bits = bits; + psb.imap_block = PITIX_SUPERBLOCK_SIZE / block_size; + psb.dmap_block = psb.imap_block + 1; + psb.izone_block = psb.dmap_block + 1; + psb.dzone_block = psb.izone_block + IZONE_BLOCKS; + psb.bfree = 8 * block_size; + psb.ffree = IZONE_BLOCKS * block_size / inode_size(); + + printf("mkfs.pitix block_size=%d\n", block_size); + + /* zero disk */ + memset(buffer, 0, block_size); + for (i = 0; i < psb.bfree + IZONE_BLOCKS + 1 + 1 + psb.imap_block; i++) + fwrite(buffer, block_size, 1, file); + + fseek(file, 0, SEEK_SET); + + /* alloc the 1st block and inode to the roor dir */ + psb.bfree--; psb.ffree--; + /* initialize super block */ + fwrite(&psb, sizeof(psb), 1, file); + + fseek(file, PITIX_SUPERBLOCK_SIZE, SEEK_SET); + memset(buffer, 0, block_size); + buffer[0] = 0x01; + /* alloc inode 0 */ + fwrite(buffer, block_size, 1, file); + /* alloc block 0 */ + fwrite(buffer, block_size, 1, file); + + /* initialize root inode */ + memset(&root_inode, 0, sizeof(root_inode)); + root_inode.mode = S_IFDIR; + root_inode.size = block_size; + fseek(file, psb.izone_block * block_size, SEEK_SET); + fwrite(&root_inode, sizeof(root_inode), 1, file); + + return 0; +} diff --git a/tools/labs/templates/assignments/5-pitix/checker/_test/pitix.h b/tools/labs/templates/assignments/5-pitix/checker/_test/pitix.h new file mode 100644 index 00000000000000..2cca5839aac3a1 --- /dev/null +++ b/tools/labs/templates/assignments/5-pitix/checker/_test/pitix.h @@ -0,0 +1,138 @@ +#ifndef PITIX_H_ +#define PITIX_H_ + +#include + +#define PITIX_MAGIC 0x58495450 /* ascii little endian for PTIX */ +#define PITIX_VERSION 2 +#define PITIX_SUPERBLOCK_SIZE 4096 +#define PITIX_NAME_LEN 16 +#define IZONE_BLOCKS 32 +#define INODE_DIRECT_DATA_BLOCKS 5 + +/* + * filesystem layout: + * + * SB IMAP DMAP IZONE DATA + * ^ ^ (1 block) (1 block) (32 blocks) + * | | + * +-0 +-- 4096 + */ + +/* PITIX super block on disk + * could be reused for in-memory super block + */ +struct pitix_super_block { + unsigned long magic; + __u8 version; + __u8 block_size_bits; + __u8 imap_block; + __u8 dmap_block; + __u8 izone_block; + __u8 dzone_block; + __u16 bfree; + __u16 ffree; +#ifdef __KERNEL__ + struct buffer_head *sb_bh, *dmap_bh, *imap_bh; + __u8 *dmap, *imap; +#endif +}; + +/* PITIX dir entry on disk */ +struct __attribute__((__packed__)) pitix_dir_entry { + __u32 ino; + char name[PITIX_NAME_LEN]; +}; + +/* PITIX inode on disk */ +struct __attribute__((__packed__)) pitix_inode { + __u32 mode; + uid_t uid; + gid_t gid; + __u32 size; + __u32 time; + __u16 direct_data_blocks[INODE_DIRECT_DATA_BLOCKS]; + __u16 indirect_data_block; +}; + +/* returns size of PITIX inode on disk */ +static inline int inode_size(void) +{ + return sizeof(struct pitix_inode); +} + +/* returns size of PITIX dir entry on disk */ +static inline int dir_entry_size(void) +{ + return sizeof(struct pitix_dir_entry); +} + +#ifdef __KERNEL__ + +/* returns number of PITIX inodes on disk */ +static inline long get_inodes(struct super_block *sb) +{ + return IZONE_BLOCKS * sb->s_blocksize / inode_size(); +} + +/* returns number of PITIX dir entries per block */ +static inline int dir_entries_per_block(struct super_block *sb) +{ + return sb->s_blocksize / dir_entry_size(); +} + +/* returns number of data blocks on disk */ +static inline long get_blocks(struct super_block *sb) +{ + return 8 * sb->s_blocksize; +} + +/* file system ops */ +extern struct file_system_type pitix_fs_type; +extern struct dentry *pitix_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data); + +/* super block ops */ +extern const struct super_operations pitix_sops; +extern struct inode *pitix_alloc_inode(struct super_block *sb); +extern void pitix_destroy_inode(struct inode *inode); +extern int pitix_write_inode(struct inode *inode, + struct writeback_control *wbc); +extern void pitix_evict_inode(struct inode *inode); +extern int pitix_statfs(struct dentry *dentry, struct kstatfs *buf); +extern void pitix_put_super(struct super_block *sb); + +/* file ops */ +extern const struct file_operations pitix_file_operations; + +/* file inode ops */ +extern const struct inode_operations pitix_file_inode_operations; +extern int pitix_setattr(struct dentry *dentry, struct iattr *attr); + +/* dir ops */ +extern const struct file_operations pitix_dir_operations; +extern int pitix_readdir(struct file *filp, struct dir_context *ctx); + +/* dir inode ops */ +extern const struct inode_operations pitix_dir_inode_operations; +extern struct dentry *pitix_lookup(struct inode *dir, + struct dentry *dentry, unsigned int flags); +extern int pitix_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool excl); +extern int pitix_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); +extern int pitix_unlink(struct inode *dir, struct dentry *dentry); +extern int pitix_rmdir(struct inode *dir, struct dentry *dentry); + +/* addr space ops */ +extern const struct address_space_operations pitix_aops; +extern int pitix_readpage(struct file *file, struct page *page); +extern int pitix_writepage(struct page *page, struct writeback_control *wbc); +extern int pitix_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned int len, unsigned int flags, + struct page **pagep, void **fsdata); +extern sector_t pitix_bmap(struct address_space *mapping, sector_t block); + +#endif + +#endif + diff --git a/tools/labs/templates/assignments/5-pitix/checker/create_pitix_loop.sh b/tools/labs/templates/assignments/5-pitix/checker/create_pitix_loop.sh new file mode 100755 index 00000000000000..d5fc29a21add9c --- /dev/null +++ b/tools/labs/templates/assignments/5-pitix/checker/create_pitix_loop.sh @@ -0,0 +1,33 @@ +#!/bin/sh + +set -e +set -x + +if [ $# -ne 1 ]; then + echo "usage: $0 " + exit 1 +fi + +block_size=$1 + +mkdir -p /tmp/pitix.ro +mkdir -p /tmp/pitix.mnt + +tar -xzf pitix.files.tar.gz -C /tmp/pitix.ro +./mkfs.pitix $block_size /tmp/pitix.loop + +insmod pitix.ko +mount -t pitix /tmp/pitix.loop /tmp/pitix.mnt -o loop + +cp -pr /tmp/pitix.ro/* /tmp/pitix.mnt/ +ls -lR /tmp/pitix.mnt + +umount /tmp/pitix.mnt +rmmod pitix + +gzip /tmp/pitix.loop +mv /tmp/pitix.loop.gz . + +rm -rf /tmp/pitix.ro +rm -rf /tmp/pitix.mnt + diff --git a/tools/labs/templates/assignments/5-pitix/checker/pitix.files.tar.gz b/tools/labs/templates/assignments/5-pitix/checker/pitix.files.tar.gz new file mode 100644 index 00000000000000..eff75e041b331d Binary files /dev/null and b/tools/labs/templates/assignments/5-pitix/checker/pitix.files.tar.gz differ diff --git a/tools/labs/templates/assignments/5-pitix/checker/pitix.loop.gz b/tools/labs/templates/assignments/5-pitix/checker/pitix.loop.gz new file mode 100644 index 00000000000000..8c282d6173054c Binary files /dev/null and b/tools/labs/templates/assignments/5-pitix/checker/pitix.loop.gz differ diff --git a/tools/labs/templates/assignments/5-pitix/pitix.h b/tools/labs/templates/assignments/5-pitix/pitix.h new file mode 100644 index 00000000000000..214846cf9f1103 --- /dev/null +++ b/tools/labs/templates/assignments/5-pitix/pitix.h @@ -0,0 +1,121 @@ +#ifndef _PITIX_H +#define _PITIX_H + +#define PITIX_MAGIC 0x58495450 /* ascii little endian for PTIX */ +#define IZONE_BLOCKS 32 +#define INODE_DIRECT_DATA_BLOCKS 5 +#define PITIX_NAME_LEN 16 + +/* + * filesystem layout: + * + * SB IMAP DMAP IZONE DATA + * ^ ^ (1 block) (1 block) (32 blocks) + * | | + * +-0 +-- 4096 + */ + +struct pitix_super_block { + unsigned long magic; + __u8 version; + __u8 block_size_bits; + __u8 imap_block; + __u8 dmap_block; + __u8 izone_block; + __u8 dzone_block; + __u16 bfree; + __u16 ffree; +#ifdef __KERNEL__ + struct buffer_head *sb_bh, *dmap_bh, *imap_bh; + __u8 *dmap, *imap; +#endif +}; + +struct pitix_dir_entry { + __u32 ino; + char name[PITIX_NAME_LEN]; +}; + +struct pitix_inode { + __u32 mode; + uid_t uid; + gid_t gid; + __u32 size; + __u32 time; + __u16 direct_data_blocks[INODE_DIRECT_DATA_BLOCKS]; + __u16 indirect_data_block; +}; + +#ifdef __KERNEL__ +static inline int inode_size(void) +{ + return sizeof(struct pitix_inode); +} + +static inline int dir_entry_size(void) +{ + return sizeof(struct pitix_dir_entry); +} + +static inline int dir_entries_per_block(struct super_block *sb) +{ + return sb->s_blocksize/dir_entry_size(); +} + +static inline long get_blocks(struct super_block *sb) +{ + return 8*sb->s_blocksize; +} + +static inline long get_inodes(struct super_block *sb) +{ + return IZONE_BLOCKS*sb->s_blocksize/inode_size(); +} + +static inline long pitix_inodes_per_block(struct super_block *sb) +{ + return sb->s_blocksize / inode_size(); +} + +/* Bitmap operations */ +extern int pitix_alloc_block(struct super_block *sb); +extern void pitix_free_block(struct super_block *sb, int block); +extern int pitix_alloc_inode(struct super_block *sb); +extern void pitix_free_inode(struct super_block *sb, int ino); +extern int pitix_get_block(struct inode *inode, sector_t block, + struct buffer_head *bh_result, int create); +extern struct address_space_operations pitix_aops; + +/* Dir operations */ +extern struct inode_operations pitix_dir_inode_operations; +extern struct file_operations pitix_dir_operations; +ino_t pitix_inode_by_name(struct dentry *dentry, int delete); + +/* File operations */ +extern struct file_operations pitix_file_operations; +extern struct inode_operations pitix_file_inode_operations; +void pitix_truncate(struct inode *inode); + +/* Inode operations */ +extern struct inode *pitix_new_inode(struct super_block *sb); +extern int pitix_write_inode(struct inode *inode, struct writeback_control *wbc); +extern void pitix_evict_inode(struct inode *inode); + +extern struct inode *pitix_iget(struct super_block *sb, unsigned long ino); + +/* Super operations */ +extern int pitix_fill_super(struct super_block *sb, void *data, int silent); +extern struct super_operations pitix_sops; +#endif + +static inline struct pitix_super_block *pitix_sb(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct pitix_inode *pitix_i(struct inode *inode) +{ + return inode->i_private; +} + +#endif diff --git a/tools/labs/templates/assignments/6-e100/Kbuild b/tools/labs/templates/assignments/6-e100/Kbuild new file mode 100644 index 00000000000000..c8ed5a306348eb --- /dev/null +++ b/tools/labs/templates/assignments/6-e100/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = e100-ix.o diff --git a/tools/labs/templates/assignments/6-e100/_checker b/tools/labs/templates/assignments/6-e100/_checker new file mode 100755 index 00000000000000..0a00a7bd3f46d2 --- /dev/null +++ b/tools/labs/templates/assignments/6-e100/_checker @@ -0,0 +1,59 @@ +#!/bin/sh + +DEV=ixeth0 +ADDR=172.30.0.2 +REM_ADDR=172.30.0.1 +NTTCP=./nttcp +MODULE=./e100-ix.ko + +set -e + +# Uncomment this to show what commands are run. +#set -x + +dmesg -c &> /dev/null + +echo +echo "== 01. insmod, rmmod ==" +insmod $MODULE +dmesg -c +rmmod $MODULE +dmesg -c + +echo +echo "== 02. link up, set address ==" +insmod $MODULE +dmesg -c +ip link set up dev $DEV +ip address add $ADDR/24 dev $DEV + +echo +echo "== 03. ping ==" +ping -c1 $REM_ADDR + +echo +echo "== 04. nttcp transmit ==" +$NTTCP -T -t -c $REM_ADDR + +echo +echo "== 05. nttcp receive ==" +$NTTCP -T -r -c $REM_ADDR + +echo +echo "== 06. rmmod, reinsert ==" +dmesg -c +rmmod $MODULE +dmesg -c +insmod $MODULE +dmesg -c + +echo +echo "== 07. one last test ==" +ip link set up dev $DEV +ip address add $ADDR/24 dev $DEV +ping -c1 $REM_ADDR +$NTTCP -T -t -c $REM_ADDR +rmmod $MODULE + +echo +echo "Success!" diff --git a/tools/labs/templates/assignments/6-e100/e100-ix.c b/tools/labs/templates/assignments/6-e100/e100-ix.c new file mode 100644 index 00000000000000..587e8362495174 --- /dev/null +++ b/tools/labs/templates/assignments/6-e100/e100-ix.c @@ -0,0 +1,193 @@ +/* + * e100-ix.c, driver implementation + * + * TODO 0: FirstName LastName + */ + +#include +#include + +#include "e100-ix.h" + +/* + * e100 private data + * + * @pdev - PCI device + * @netdev - network device + */ +struct e100_priv_data { + struct pci_dev *pdev; + struct net_device *netdev; + /* TODO 3: device control and configuration + * e.g: + * - CSR register address + */ +}; + +static irqreturn_t e100_intr(int irq, void *private_data) +{ + struct e100_priv_data *data; + + data = (struct e100_priv_data *)private_data; + + /* TODO 6: read STAT/ACK byte from CSR */ + + /* TODO 6: return IRQ_NONE if interrupt is not for this device */ + + /* TODO 6: handle Frame Reception interrupt */ + /* + * while receving frames + * allocate skb + * copy data from Receive Frame Descriptor to skb + * free current RFD + * resume receive unit + * push skb up to network stack using netif_rx + */ + + /* ACK all interrupts */ + + return IRQ_HANDLED; +} + +static int e100_ndo_open(struct net_device *netdev) +{ + struct e100_priv_data *data; + + data = netdev_priv(netdev); + + /* TODO 5: Create TX ring buffer to store CB_RING_LEN Command Blocks */ + + /* TODO 5: first command to ring buffer to set MAC */ + + /* TODO 6: Create RX ring buffer to store RFD_RING_LEN */ + + /* TODO 6: register interrupt handler */ + + /* TODO 6: enable interrupts */ + + /* TODO 5: start command unit */ + + /* TODO 6: start receive unit */ + + /* TODO 5: allow transmit by calling netif_start_queue */ + + return 0; +} + +static int e100_ndo_stop(struct net_device *netdev) +{ + struct e100_priv_data *data; + + date = netdev_priv(netdev); + + /* TODO 5: stop transmit by calling netif_stop_queue */ + + /* TODO 6: disable network interrupts and free irq */ + + /* TODO 5: deallocate TX ring */ + /* TODO 6: deallocate RX ring */ + + return 0; +} + +/* + * e100_ndo_start_xmit - transmit skb over netdev + * + */ +static netdev_tx_t e100_ndo_start_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct e100_priv_data *data; + + data = netdev_priv(netdev); + + /* TODO 5: reclaim all buffers which were transmitted */ + /* TODO 5: create new transmit command for current skb */ + + /* TODO 5: resume command unit */ + return NETDEV_TX_OK; +} + +struct net_device_ops e100_netdev_ops = { + .ndo_open = e100_ndo_open, + .ndo_stop = e100_ndo_stop, + .ndo_start_xmit = e100_ndo_start_xmit +}; + +static int e100_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + /* TODO 4: allocate netdevice, may use alloc_etherdev + * + * .. set proper name, irq, netdev_ops + * .. set mac address (may use eth_hw_addr_random) + */ + + /* TODO 4: get netdevice private data using netdev_priv */ + + /* TODO 2: hide e100_priv_data into pdev using dev_set_drvdata */ + + + /* TODO 2: initialize PCI device: use pci_enable_device */ + + /* TODO 2: reserve PCI I/O and memory resources: use pci_request_regions */ + + /* TODO 2: we will use BAR 1, use pci_resource_flags to check for BAR 1*/ + + /* TODO 2: Check if device supports 32-bit DMA, use pci_set_dma_mask */ + + /* TODO 2: map Control Status Register into our address space, use pci_iomap */ + + /* TODO 2: enable DMA by calling pci_set master */ + + /* TODO 4: register netdevice with the networking subsystem */ + + return 0; +} + +static void e100_remove(struct pci_dev *pdev) +{ + struct e100_priv_data *data; + + /* TODO 2: restore e100_priv_data from pdev using dev_get_drvdata */ + + /* TODO 4: unregister netdevice from the networking subsystem */ + + /* TODO 2: PCI cleanup + * * unmap CSR + * * release PCI regions + * * disable pci device + */ + + /* TODO 4: free netdevice */ +} + +static const struct pci_device_id e100_pci_driver_ids[] = { + { + /* TODO 1: fill in .vendor and .device */ + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID + }, + { }, +}; +MODULE_DEVICE_TABLE(pci, e100_pci_driver_ids); + + +/* TODO 1: initialize struct pci_driver */ +static struct pci_driver e100_pci_driver = { +}; + +static int e100_init(void) +{ + /* TODO 1: register PCI driver */ +} + +static void e100_exit(void) +{ + /* TODO 1: unregister PCI driver */ +} + +module_init(e100_init); +module_exit(e100_exit); + +MODULE_DESCRIPTION("e100 network driver"); +MODULE_AUTHOR("TODO 0: FirstName LastName "); +MODULE_LICENSE("GPL"); diff --git a/tools/labs/templates/assignments/6-e100/e100-ix.h b/tools/labs/templates/assignments/6-e100/e100-ix.h new file mode 100644 index 00000000000000..8a2b48866d25f7 --- /dev/null +++ b/tools/labs/templates/assignments/6-e100/e100-ix.h @@ -0,0 +1,149 @@ +#ifndef __E100_IX__ +#define __E100_IX__ + +#define DRIVER_NAME "e100-ix" + +/* misc useful bits, use this or define your own bits */ +#define CB_SET_INDIVIDUAL_ADDR 0x01 +#define CB_TRANSMIT 0x04 + +#define CU_START 0x10 +#define CU_RESUME 0x20 +#define RU_START 0x01 +#define RU_RESUME 0x02 +#define RU_SUSPENDED 0x04 +#define CU_SUSPENDED 0x40 + +#define SOFTWARE_RESET 0x00000000 + +#define ENABLE_INTERRUPTS 0x00 +#define DISABLE_INTERRUPTS 0x01 + +#define MAC_ADDRESS_LEN 6 +#define DATA_LEN 1518 + +#define CSR_COMMAND 0x02 +#define CSR_INT_CONTROL 0x03 + +#define CB_RING_LEN 64 +#define RFD_RING_LEN 64 + +#define E100_VENDOR 0x8086 +#define E100_DEVICE 0x1209 + +/* struct csr - Control/Status Register */ +struct csr { + /* System-Control Block */ + struct { + u8 status; + u8 stat_ack; + u8 cmd_lo; + u8 cmd_hi; + u32 gen_ptr; + } scb; + + /* Device Reset */ + u32 port; + u16 flash_ctrl; + u8 eeprom_ctrl_lo; + u8 eeprom_ctrl_hi; + u32 mdi_ctrl; + u32 rx_dma_count; +}; + +/* struct tcb - Transmit Command Block */ +struct tcb { + u32 tbd_array; + u16 tcb_byte_count; + u8 threshold; + u8 tbd_number; + + /* Transmit Buffer Descriptor */ + struct { + __le32 buf_addr; + __le16 size; + u16 unused; + } tbd; +}; + +/* struct cb - Command Block */ +struct cb { + struct cb_status { + u16 unused1:12; + u8 u:1; + u8 ok:1; + u8 unused2:1; + u8 c:1; + } status; + + struct cb_command { + u16 cmd:3; + u8 sf:1; + u8 nc:1; + u8 zero:3; + u8 cid:5; + u8 i:1; + u8 suspend:1; + u8 el:1; + } command; + + u32 link; + + union { + /* Transmit Command Block */ + struct tcb tcb; + + /* Individual Address Setup */ + u8 ias[8]; + } u; + + struct cb *prev, *next; /* for CBL ring buffer */ + dma_addr_t dma_addr; + struct sk_buff *skb; /* when CB is of Transmit Command Block type */ +}; + +/* struct rfd - Receive Frame Descriptor */ +struct rfd { + struct rfd_status { + u16 status:13; + u8 ok:1; + u8 zero:1; + u8 c:1; + } status; + + struct rfd_command { + u16 zero1:3; + u8 sf:1; + u8 h:1; + u16 zero2:9; + u8 suspend:1; + u8 el:1; + } command; + + u32 link; + + u32 reserved; + + u16 actual_count:14; + u8 f:1; + u8 eol:1; + u16 size; + + char data[DATA_LEN]; + + struct rfd *prev, *next; + dma_addr_t dma_addr; +}; + +struct csr_stat_ack { + u8 fcp:1; + u8 res:1; + u8 swi:1; + u8 mdi:1; + u8 rnr:1; + u8 cna:1; + u8 frame_reception:1; + u8 cx:1; +}; + +#endif /* __E100_IX__ */ diff --git a/tools/labs/templates/assignments/6-e100/nttcp b/tools/labs/templates/assignments/6-e100/nttcp new file mode 100755 index 00000000000000..9cf0c97c5c2d19 Binary files /dev/null and b/tools/labs/templates/assignments/6-e100/nttcp differ diff --git a/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/kernel/Kbuild b/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/kernel/Kbuild new file mode 100644 index 00000000000000..4f1b616bf92028 --- /dev/null +++ b/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/kernel/Kbuild @@ -0,0 +1,3 @@ +EXTRA_CFLAGS = -Wall -g -Wno-unused + +obj-m = ram-disk.o diff --git a/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/kernel/ram-disk.c b/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/kernel/ram-disk.c new file mode 100644 index 00000000000000..f6b2962decdd5f --- /dev/null +++ b/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/kernel/ram-disk.c @@ -0,0 +1,235 @@ +/* + * SO2 - Block device drivers lab (#7) + * Linux - Exercise #1, #2, #3, #6 (RAM Disk) + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Simple RAM Disk"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + + +#define KERN_LOG_LEVEL KERN_ALERT + +#define MY_BLOCK_MAJOR 240 +#define MY_BLKDEV_NAME "mybdev" +#define MY_BLOCK_MINORS 1 +#define NR_SECTORS 128 + +#define KERNEL_SECTOR_SIZE 512 + +/* TODO 6/0: use bios for read/write requests */ +#define USE_BIO_TRANSFER 0 + + +static struct my_block_dev { + spinlock_t lock; + struct request_queue *queue; + struct gendisk *gd; + u8 *data; + size_t size; +} g_dev; + +static int my_block_open(struct block_device *bdev, fmode_t mode) +{ + return 0; +} + +static void my_block_release(struct gendisk *gd, fmode_t mode) +{ +} + +static const struct block_device_operations my_block_ops = { + .owner = THIS_MODULE, + .open = my_block_open, + .release = my_block_release +}; + +static void my_block_transfer(struct my_block_dev *dev, sector_t sector, + unsigned long len, char *buffer, int dir) +{ + unsigned long offset = sector * KERNEL_SECTOR_SIZE; + + /* check for read/write beyond end of block device */ + if ((offset + len) > dev->size) + return; + + /* TODO 3/4: read/write to dev buffer depending on dir */ + if (dir == 1) /* write */ + memcpy(dev->data + offset, buffer, len); + else + memcpy(buffer, dev->data + offset, len); +} + +/* to transfer data using bio structures enable USE_BIO_TRANFER */ +#if USE_BIO_TRANSFER == 1 +static void my_xfer_request(struct my_block_dev *dev, struct request *req) +{ + /* TODO 6/10: iterate segments */ + struct bio_vec bvec; + struct req_iterator iter; + + rq_for_each_segment(bvec, req, iter) { + sector_t sector = iter.iter.bi_sector; + unsigned long offset = bvec.bv_offset; + size_t len = bvec.bv_len; + int dir = bio_data_dir(iter.bio); + char *buffer = kmap_atomic(bvec.bv_page); + printk(KERN_LOG_LEVEL "%s: buf %8p offset %lu len %u dir %d\n", __func__, buffer, offset, len, dir); + + /* TODO 6/3: copy bio data to device buffer */ + my_block_transfer(dev, sector, len, buffer + offset, dir); + kunmap_atomic(buffer); + } +} +#endif + +static void my_block_request(struct request_queue *q) +{ + struct request *rq; + struct my_block_dev *dev = q->queuedata; + + while (1) { + + /* TODO 2/3: fetch request */ + rq = blk_fetch_request(q); + if (rq == NULL) + break; + + /* TODO 2/5: check fs request */ + if (blk_rq_is_passthrough(rq)) { + printk(KERN_NOTICE "Skip non-fs request\n"); + __blk_end_request_all(rq, -EIO); + continue; + } + + /* TODO 2/6: print request information */ + printk(KERN_LOG_LEVEL + "request received: pos=%llu bytes=%u " + "cur_bytes=%u dir=%c\n", + (unsigned long long) blk_rq_pos(rq), + blk_rq_bytes(rq), blk_rq_cur_bytes(rq), + rq_data_dir(rq) ? 'W' : 'R'); + +#if USE_BIO_TRANSFER == 1 + /* TODO 6/1: process the request by calling my_xfer_request */ + my_xfer_request(dev, rq); +#else + /* TODO 3/3: process the request by calling my_block_transfer */ + my_block_transfer(dev, blk_rq_pos(rq), + blk_rq_bytes(rq), + bio_data(rq->bio), rq_data_dir(rq)); +#endif + + /* TODO 2/1: end request successfully */ + __blk_end_request_all(rq, 0); + } +} + +static int create_block_device(struct my_block_dev *dev) +{ + int err; + + dev->size = NR_SECTORS * KERNEL_SECTOR_SIZE; + dev->data = vmalloc(dev->size); + if (dev->data == NULL) { + printk(KERN_ERR "vmalloc: out of memory\n"); + err = -ENOMEM; + goto out_vmalloc; + } + + /* initialize the I/O queue */ + spin_lock_init(&dev->lock); + dev->queue = blk_init_queue(my_block_request, &dev->lock); + if (dev->queue == NULL) { + printk(KERN_ERR "blk_init_queue: out of memory\n"); + err = -ENOMEM; + goto out_blk_init; + } + blk_queue_logical_block_size(dev->queue, KERNEL_SECTOR_SIZE); + dev->queue->queuedata = dev; + + /* initialize the gendisk structure */ + dev->gd = alloc_disk(MY_BLOCK_MINORS); + if (!dev->gd) { + printk(KERN_ERR "alloc_disk: failure\n"); + err = -ENOMEM; + goto out_alloc_disk; + } + + dev->gd->major = MY_BLOCK_MAJOR; + dev->gd->first_minor = 0; + dev->gd->fops = &my_block_ops; + dev->gd->queue = dev->queue; + dev->gd->private_data = dev; + snprintf(dev->gd->disk_name, DISK_NAME_LEN, "myblock"); + set_capacity(dev->gd, NR_SECTORS); + + add_disk(dev->gd); + + return 0; + +out_alloc_disk: + blk_cleanup_queue(dev->queue); +out_blk_init: + vfree(dev->data); +out_vmalloc: + return err; +} + +static int __init my_block_init(void) +{ + int err = 0; + + /* TODO 1/5: register block device */ + err = register_blkdev(MY_BLOCK_MAJOR, MY_BLKDEV_NAME); + if (err < 0) { + printk(KERN_ERR "register_blkdev: unable to register\n"); + return err; + } + + /* TODO 2/3: create block device using create_block_device */ + err = create_block_device(&g_dev); + if (err < 0) + goto out; + + return 0; + +out: + /* TODO 1/1: unregister block device in case of an error */ + unregister_blkdev(MY_BLOCK_MAJOR, MY_BLKDEV_NAME); + return err; +} + +static void delete_block_device(struct my_block_dev *dev) +{ + if (dev->gd) { + del_gendisk(dev->gd); + put_disk(dev->gd); + } + if (dev->queue) + blk_cleanup_queue(dev->queue); + if (dev->data) + vfree(dev->data); +} + +static void __exit my_block_exit(void) +{ + /* TODO 2/1: cleanup block device using delete_block_device */ + delete_block_device(&g_dev); + + /* TODO 1/1: unregister block device */ + unregister_blkdev(MY_BLOCK_MAJOR, MY_BLKDEV_NAME); +} + +module_init(my_block_init); +module_exit(my_block_exit); diff --git a/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/user/.gitignore b/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/user/.gitignore new file mode 100644 index 00000000000000..1330ce0fdf63c7 --- /dev/null +++ b/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/user/.gitignore @@ -0,0 +1 @@ +/ram-disk-test diff --git a/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/user/Makefile b/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/user/Makefile new file mode 100644 index 00000000000000..a653ce1e2faf79 --- /dev/null +++ b/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/user/Makefile @@ -0,0 +1,8 @@ +CFLAGS = -Wall -g -m32 -static + +all: ram-disk-test + +.PHONY: clean + +clean: + -rm -f *~ *.o ram-disk-test diff --git a/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/user/ram-disk-test.c b/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/user/ram-disk-test.c new file mode 100644 index 00000000000000..f9a677244cb9c1 --- /dev/null +++ b/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/user/ram-disk-test.c @@ -0,0 +1,85 @@ +/* + * SO2 - Block device driver (#8) + * Test suite for exercise #3 (RAM Disk) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define NR_SECTORS 128 +#define SECTOR_SIZE 512 + +#define DEVICE_NAME "/dev/myblock" +#define MODULE_NAME "ram-disk" +#define MY_BLOCK_MAJOR "240" +#define MY_BLOCK_MINOR "0" + + +#define max_elem_value(elem) \ + (1 << 8*sizeof(elem)) + +static unsigned char buffer[SECTOR_SIZE]; +static unsigned char buffer_copy[SECTOR_SIZE]; + +static void test_sector(int fd, size_t sector) +{ + int i; + + for (i = 0; i < sizeof(buffer) / sizeof(buffer[0]); i++) + buffer[i] = rand() % max_elem_value(buffer[0]); + + lseek(fd, sector * SECTOR_SIZE, SEEK_SET); + write(fd, buffer, sizeof(buffer)); + + fsync(fd); + + lseek(fd, sector * SECTOR_SIZE, SEEK_SET); + read(fd, buffer_copy, sizeof(buffer_copy)); + + printf("test sector %3d ... ", sector); + if (memcmp(buffer, buffer_copy, sizeof(buffer_copy)) == 0) + printf("passed\n"); + else + printf("failed\n"); +} + +int main(void) +{ + int fd; + size_t i; + int back_errno; + + printf("insmod ../kernel/" MODULE_NAME ".ko\n"); + system("insmod ../kernel/" MODULE_NAME ".ko\n"); + sleep(1); + + printf("mknod " DEVICE_NAME " b " MY_BLOCK_MAJOR " " MY_BLOCK_MINOR "\n"); + system("mknod " DEVICE_NAME " b " MY_BLOCK_MAJOR " " MY_BLOCK_MINOR "\n"); + sleep(1); + + fd = open(DEVICE_NAME, O_RDWR); + if (fd < 0) { + back_errno = errno; + perror("open"); + fprintf(stderr, "errno is %d\n", back_errno); + exit(EXIT_FAILURE); + } + + srand(time(NULL)); + for (i = 0; i < NR_SECTORS; i++) + test_sector(fd, i); + + close(fd); + + sleep(1); + printf("rmmod " MODULE_NAME "\n"); + system("rmmod " MODULE_NAME "\n"); + + return 0; +} diff --git a/tools/labs/templates/block_device_drivers/4-5-relay-disk/Kbuild b/tools/labs/templates/block_device_drivers/4-5-relay-disk/Kbuild new file mode 100644 index 00000000000000..222ee815adcb9e --- /dev/null +++ b/tools/labs/templates/block_device_drivers/4-5-relay-disk/Kbuild @@ -0,0 +1,3 @@ +EXTRA_CFLAGS = -Wall -g -Wno-unused + +obj-m = relay-disk.o diff --git a/tools/labs/templates/block_device_drivers/4-5-relay-disk/relay-disk.c b/tools/labs/templates/block_device_drivers/4-5-relay-disk/relay-disk.c new file mode 100644 index 00000000000000..a4f19570e3fafa --- /dev/null +++ b/tools/labs/templates/block_device_drivers/4-5-relay-disk/relay-disk.c @@ -0,0 +1,106 @@ +/* + * SO2 Lab - Block device drivers (#7) + * Linux - Exercise #4, #5 (Relay disk - bio) + */ + +#include +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("SO2"); +MODULE_DESCRIPTION("Relay disk"); +MODULE_LICENSE("GPL"); + +#define KERN_LOG_LEVEL KERN_ALERT + +#define PHYSICAL_DISK_NAME "/dev/vdb" +#define KERNEL_SECTOR_SIZE 512 + +#define BIO_WRITE_MESSAGE "def" + + +/* pointer to physical device structure */ +static struct block_device *phys_bdev; + +static void send_test_bio(struct block_device *bdev, int dir) +{ + struct bio *bio = bio_alloc(GFP_NOIO, 1); + struct page *page; + char *buf; + + /* TODO 4/3: fill bio (bdev, sector, direction) */ + bio->bi_disk = bdev->bd_disk; + bio->bi_iter.bi_sector = 0; + bio->bi_opf = dir; + + page = alloc_page(GFP_NOIO); + bio_add_page(bio, page, KERNEL_SECTOR_SIZE, 0); + + /* TODO 5/5: write message to bio buffer if direction is write */ + if (dir == REQ_OP_WRITE) { + buf = kmap_atomic(page); + memcpy(buf, BIO_WRITE_MESSAGE, strlen(BIO_WRITE_MESSAGE)); + kunmap_atomic(buf); + } + + /* TODO 4/3: submit bio and wait for completion */ + printk(KERN_LOG_LEVEL "[send_test_bio] Submiting bio\n"); + submit_bio_wait(bio); + printk(KERN_LOG_LEVEL "[send_test_bio] Done bio\n"); + + /* TODO 4/3: read data (first 3 bytes) from bio buffer and print it */ + buf = kmap_atomic(page); + printk(KERN_LOG_LEVEL "read %02x %02x %02x\n", buf[0], buf[1], buf[2]); + kunmap_atomic(buf); + + bio_put(bio); + __free_page(page); +} + +static struct block_device *open_disk(char *name) +{ + struct block_device *bdev; + + /* TODO 4/5: get block device in exclusive mode */ + bdev = blkdev_get_by_path(name, FMODE_READ | FMODE_WRITE | FMODE_EXCL, THIS_MODULE); + if (IS_ERR(bdev)) { + printk(KERN_ERR "blkdev_get_by_path\n"); + return NULL; + } + + return bdev; +} + +static int __init relay_init(void) +{ + phys_bdev = open_disk(PHYSICAL_DISK_NAME); + if (phys_bdev == NULL) { + printk(KERN_ERR "[relay_init] No such device\n"); + return -EINVAL; + } + + send_test_bio(phys_bdev, REQ_OP_READ); + + return 0; +} + +static void close_disk(struct block_device *bdev) +{ + /* TODO 4/1: put block device */ + blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); +} + +static void __exit relay_exit(void) +{ + /* TODO 5/1: send test write bio */ + send_test_bio(phys_bdev, REQ_OP_WRITE); + + close_disk(phys_bdev); +} + +module_init(relay_init); +module_exit(relay_exit); diff --git a/tools/labs/templates/block_device_drivers/4-5-relay-disk/test-relay-disk b/tools/labs/templates/block_device_drivers/4-5-relay-disk/test-relay-disk new file mode 100755 index 00000000000000..23933e853a1773 --- /dev/null +++ b/tools/labs/templates/block_device_drivers/4-5-relay-disk/test-relay-disk @@ -0,0 +1,12 @@ +#!/bin/sh + +PHYSICAL_DISK_NAME="/dev/vdb" +TMP_FILE="/tmp/disk_data" +echo "abc" > "$PHYSICAL_DISK_NAME" +insmod relay-disk.ko +rmmod relay-disk +sleep 1 + +echo -n "read from $PHYSICAL_DISK_NAME: " +dd if=$PHYSICAL_DISK_NAME of=$TMP_FILE count=3 bs=1 &> /dev/null +cat $TMP_FILE | hexdump -v -e '/1 "%02X "'; echo diff --git a/tools/labs/templates/debugging/dumpstack/Kbuild b/tools/labs/templates/debugging/dumpstack/Kbuild new file mode 100644 index 00000000000000..6e5dcf0a1c349a --- /dev/null +++ b/tools/labs/templates/debugging/dumpstack/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = dumpstack.o diff --git a/tools/labs/templates/debugging/dumpstack/dumpstack.c b/tools/labs/templates/debugging/dumpstack/dumpstack.c new file mode 100644 index 00000000000000..d5a1c1a47dfeb7 --- /dev/null +++ b/tools/labs/templates/debugging/dumpstack/dumpstack.c @@ -0,0 +1,36 @@ +#include + +static noinline void foo3(void) +{ + pr_info("foo3()\n"); + dump_stack(); +} + +static noinline void foo2(void) +{ + pr_info("foo2()\n"); + foo3(); +} + +static noinline void foo1(void) +{ + pr_info("foo1()\n"); + foo2(); +} + +static int so2_dumpstack_init(void) +{ + pr_info("dumpstack_init\n"); + foo1(); + + return 0; +} + +static void so2_dumpstack_exit(void) +{ + pr_info("dumpstack exit\n"); +} + +MODULE_LICENSE("GPL v2"); +module_init(so2_dumpstack_init); +module_exit(so2_dumpstack_exit); diff --git a/tools/labs/templates/debugging/leak/Kbuild b/tools/labs/templates/debugging/leak/Kbuild new file mode 100644 index 00000000000000..bf0dc5216a8434 --- /dev/null +++ b/tools/labs/templates/debugging/leak/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = leak.o diff --git a/tools/labs/templates/debugging/leak/leak.c b/tools/labs/templates/debugging/leak/leak.c new file mode 100644 index 00000000000000..26ab404f255ff2 --- /dev/null +++ b/tools/labs/templates/debugging/leak/leak.c @@ -0,0 +1,23 @@ +#include +#include +#include +#include +#include +#include + +static int leak_init(void) +{ + pr_info("%s\n", __func__); + + (void)kmalloc(16, GFP_KERNEL); + + return 0; +} + +static void leak_exit(void) +{ +} + +MODULE_LICENSE("GPL v2"); +module_init(leak_init); +module_exit(leak_exit); diff --git a/tools/labs/templates/debugging/list/Kbuild b/tools/labs/templates/debugging/list/Kbuild new file mode 100644 index 00000000000000..7187139dbdb7af --- /dev/null +++ b/tools/labs/templates/debugging/list/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = list.o diff --git a/tools/labs/templates/debugging/list/list.c b/tools/labs/templates/debugging/list/list.c new file mode 100644 index 00000000000000..ce76e3510ef203 --- /dev/null +++ b/tools/labs/templates/debugging/list/list.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include +#include + +static char *op = "ubi"; + +module_param(op, charp, 0000); +MODULE_PARM_DESC(op, "List error type"); + +struct list_m { + int a; + struct list_head list; +}; + +LIST_HEAD(head); + +static noinline void use_before_init(void) +{ + struct list_m *m = kmalloc(sizeof(*m), GFP_KERNEL); + + pr_info("use before init\n"); + + list_del(&m->list); +} + +static noinline void use_after_free(void) +{ + struct list_m *m = kmalloc(sizeof(*m), GFP_KERNEL); + + pr_info("use after free\n"); + + kfree(m); + list_del(&m->list); +} + +static noinline void crush(void) +{ + struct list_m e1, e2; + struct list_head *i; + + e1.a = 2; + e2.a = 3; + + list_add(&e1.list, &head); + list_add(&e2.list, &head); + + list_for_each(i, &head) { + struct list_m *x = list_entry(i, struct list_m, list); + + pr_info("list_for each %p\n", &x->a); + list_del(&x->list); + } +} + +static int so2_list_init(void) +{ + pr_info("list_init with op %s\n", op); + + /* use before init */ + if (strncmp(op, "ubi", 3) == 0) + use_before_init(); + if (strncmp(op, "uaf", 3) == 0) + use_after_free(); + if (strncmp(op, "crush", 5) == 0) + crush(); + + return 0; +} + +static void so2_list_exit(void) +{ +} + +MODULE_LICENSE("GPL v2"); +module_init(so2_list_init); +module_exit(so2_list_exit); diff --git a/tools/labs/templates/debugging/locking/Kbuild b/tools/labs/templates/debugging/locking/Kbuild new file mode 100644 index 00000000000000..4656b8c1ac6c62 --- /dev/null +++ b/tools/labs/templates/debugging/locking/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = locking.o diff --git a/tools/labs/templates/debugging/locking/locking.c b/tools/labs/templates/debugging/locking/locking.c new file mode 100644 index 00000000000000..12a11511e91fe0 --- /dev/null +++ b/tools/labs/templates/debugging/locking/locking.c @@ -0,0 +1,42 @@ +#include +#include + +static DEFINE_MUTEX(a); +static DEFINE_MUTEX(b); + +static noinline int thread_a(void *unused) +{ + mutex_lock(&a); pr_info("%s acquired A\n", __func__); + mutex_lock(&b); pr_info("%s acquired B\n", __func__); + + mutex_unlock(&b); + mutex_unlock(&a); + + return 0; +} + +static noinline int thread_b(void *unused) +{ + mutex_lock(&b); pr_info("%s acquired B\n", __func__); + mutex_lock(&a); pr_info("%s acquired A\n", __func__); + + mutex_unlock(&a); + mutex_unlock(&b); + + return 0; +} + + +int init_module(void) +{ + kthread_run(thread_a, NULL, "thread_a"); + kthread_run(thread_b, NULL, "thread_b"); + + return 0; +} + +void exit_module(void) +{ +} + +MODULE_LICENSE("GPL v2"); diff --git a/tools/labs/templates/debugging/locking2/Kbuild b/tools/labs/templates/debugging/locking2/Kbuild new file mode 100644 index 00000000000000..639f09cc298826 --- /dev/null +++ b/tools/labs/templates/debugging/locking2/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = locking2.o diff --git a/tools/labs/templates/debugging/locking2/locking2.c b/tools/labs/templates/debugging/locking2/locking2.c new file mode 100644 index 00000000000000..0905dcb0830192 --- /dev/null +++ b/tools/labs/templates/debugging/locking2/locking2.c @@ -0,0 +1,30 @@ +#include +#include + +static DEFINE_SPINLOCK(lock); + +static void timerfn(struct timer_list *unused) +{ + pr_info("%s acquiring lock\n", __func__); + spin_lock(&lock); pr_info("%s acquired lock\n", __func__); + spin_unlock(&lock); pr_info("%s released lock\n", __func__); +} + +static DEFINE_TIMER(timer, timerfn); + +int init_module(void) +{ + mod_timer(&timer, jiffies); + + pr_info("%s acquiring lock\n", __func__); + spin_lock(&lock); pr_info("%s acquired lock\n", __func__); + spin_unlock(&lock); pr_info("%s released lock\n", __func__); + return 0; +} + +void exit_module(void) +{ + del_timer_sync(&timer); +} + +MODULE_LICENSE("GPL v2"); diff --git a/tools/labs/templates/debugging/oops/Kbuild b/tools/labs/templates/debugging/oops/Kbuild new file mode 100644 index 00000000000000..3916b6c818523b --- /dev/null +++ b/tools/labs/templates/debugging/oops/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = oops.o diff --git a/tools/labs/templates/debugging/oops/oops.c b/tools/labs/templates/debugging/oops/oops.c new file mode 100644 index 00000000000000..26390a510a9904 --- /dev/null +++ b/tools/labs/templates/debugging/oops/oops.c @@ -0,0 +1,23 @@ +#include + +static noinline void do_oops(void) +{ + *(int*)0x42 = 'a'; +} + +static int so2_oops_init(void) +{ + pr_info("oops_init\n"); + do_oops(); + + return 0; +} + +static void so2_oops_exit(void) +{ + pr_info("oops exit\n"); +} + +MODULE_LICENSE("GPL v2"); +module_init(so2_oops_init); +module_exit(so2_oops_exit); diff --git a/tools/labs/templates/debugging/panic/Kbuild b/tools/labs/templates/debugging/panic/Kbuild new file mode 100644 index 00000000000000..cefb5c9f0de161 --- /dev/null +++ b/tools/labs/templates/debugging/panic/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = panic.o diff --git a/tools/labs/templates/debugging/panic/panic.c b/tools/labs/templates/debugging/panic/panic.c new file mode 100644 index 00000000000000..440ead94d15d2e --- /dev/null +++ b/tools/labs/templates/debugging/panic/panic.c @@ -0,0 +1,29 @@ +#include +#include + +static struct timer_list panic_timer; + +static void do_panic(struct timer_list *unused) +{ + *(int*)0x42 = 'a'; +} + +static int so2_panic_init(void) +{ + pr_info("panic_init\n"); + + timer_setup(&panic_timer, do_panic, 0); + mod_timer(&panic_timer, jiffies + 2 * HZ); + + return 0; +} + +static void so2_panic_exit(void) +{ + pr_info("panic exit\n"); + del_timer_sync(&panic_timer); +} + +MODULE_LICENSE("GPL v2"); +module_init(so2_panic_init); +module_exit(so2_panic_exit); diff --git a/tools/labs/templates/deferred_work/1-2-timer/Kbuild b/tools/labs/templates/deferred_work/1-2-timer/Kbuild new file mode 100644 index 00000000000000..fa3cd3e263f84b --- /dev/null +++ b/tools/labs/templates/deferred_work/1-2-timer/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = timer.o diff --git a/tools/labs/templates/deferred_work/1-2-timer/timer.c b/tools/labs/templates/deferred_work/1-2-timer/timer.c new file mode 100644 index 00000000000000..eba76bffc9135d --- /dev/null +++ b/tools/labs/templates/deferred_work/1-2-timer/timer.c @@ -0,0 +1,54 @@ +/* + * Deferred Work + * + * Exercise #1, #2: simple timer + */ + +#include +#include +#include +#include + +MODULE_DESCRIPTION("Simple kernel timer"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +#define TIMER_TIMEOUT 1 + +static struct timer_list timer; + +static void timer_handler(struct timer_list *tl) +{ + /* TODO 1/4: print a message */ + static size_t nseconds; + + nseconds += TIMER_TIMEOUT; + pr_info("[timer_handler] nseconds = %d\n", nseconds); + + /* TODO 2: rechedule timer */ + mod_timer(tl, jiffies + TIMER_TIMEOUT * HZ); +} + +static int __init timer_init(void) +{ + pr_info("[timer_init] Init module\n"); + + /* TODO 1: initialize timer */ + timer_setup(&timer, timer_handler, 0); + + /* TODO 1: schedule timer for the first time */ + mod_timer(&timer, jiffies + TIMER_TIMEOUT * HZ); + + return 0; +} + +static void __exit timer_exit(void) +{ + pr_info("[timer_exit] Exit module\n"); + + /* TODO 1: cleanup; make sure the timer is not running after we exit */ + del_timer_sync(&timer); +} + +module_init(timer_init); +module_exit(timer_exit); diff --git a/tools/labs/templates/deferred_work/3-4-5-deferred/include/deferred.h b/tools/labs/templates/deferred_work/3-4-5-deferred/include/deferred.h new file mode 100644 index 00000000000000..f9408c704401ad --- /dev/null +++ b/tools/labs/templates/deferred_work/3-4-5-deferred/include/deferred.h @@ -0,0 +1,35 @@ +/* + * SO2 - Lab 6 - Deferred Work + * + * Exercises #3, #4, #5: deferred work + * + * Header file. + */ + +#ifndef __DEFERRED_H__ +#define __DEFERRED_H__ + +#include + +#define MY_IOCTL_TIMER_SET _IOW('k', 1, unsigned long) +#define MY_IOCTL_TIMER_CANCEL _IO ('k', 2) +#define MY_IOCTL_TIMER_ALLOC _IOW('k', 3, unsigned long) +#define MY_IOCTL_TIMER_MON _IO ('k', 4) + +/* converts ioctl command code to message */ +inline static char *ioctl_command_to_string(int cmd) +{ + switch(cmd) { + case MY_IOCTL_TIMER_SET: + return "Set timer"; + case MY_IOCTL_TIMER_CANCEL: + return "Cancel timer"; + case MY_IOCTL_TIMER_ALLOC: + return "Allocate memory"; + case MY_IOCTL_TIMER_MON: + return "Monitor pid"; + } + return "Unknown command"; +} + +#endif /* __DEFERRED_H__ */ diff --git a/tools/labs/templates/deferred_work/3-4-5-deferred/kernel/Kbuild b/tools/labs/templates/deferred_work/3-4-5-deferred/kernel/Kbuild new file mode 100644 index 00000000000000..fa3f727c8a53d2 --- /dev/null +++ b/tools/labs/templates/deferred_work/3-4-5-deferred/kernel/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = deferred.o diff --git a/tools/labs/templates/deferred_work/3-4-5-deferred/kernel/deferred.c b/tools/labs/templates/deferred_work/3-4-5-deferred/kernel/deferred.c new file mode 100644 index 00000000000000..97775aad9baeee --- /dev/null +++ b/tools/labs/templates/deferred_work/3-4-5-deferred/kernel/deferred.c @@ -0,0 +1,260 @@ +/* + * SO2 - Lab 6 - Deferred Work + * + * Exercises #3, #4, #5: deferred work + * + * Code skeleton. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../include/deferred.h" + +#define MY_MAJOR 42 +#define MY_MINOR 0 +#define MODULE_NAME "deferred" + +#define TIMER_TYPE_NONE -1 +#define TIMER_TYPE_SET 0 +#define TIMER_TYPE_ALLOC 1 +#define TIMER_TYPE_MON 2 + +MODULE_DESCRIPTION("Deferred work character device"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +struct mon_proc { + struct task_struct *task; + struct list_head list; +}; + +static struct my_device_data { + struct cdev cdev; + /* TODO 1: add timer */ + struct timer_list timer; + /* TODO 2: add flag */ + int flag; + /* TODO 3: add work */ + struct work_struct work; + /* TODO 4: add list for monitored processes */ + struct list_head list; + /* TODO 4: add spinlock to protect list */ + spinlock_t lock; +} dev; + +static void alloc_io(void) +{ + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(5 * HZ); + pr_info("Yawn! I've been sleeping for 5 seconds.\n"); +} + +static struct mon_proc *get_proc(pid_t pid) +{ + struct task_struct *task; + struct mon_proc *p; + + task = pid_task(find_vpid(pid), PIDTYPE_PID); + if (!task) + return ERR_PTR(-ESRCH); + + p = kmalloc(sizeof(p), GFP_ATOMIC); + if (!p) + return ERR_PTR(-ENOMEM); + + get_task_struct(task); + p->task = task; + + return p; +} + + +/* TODO 3/4: define work handler */ +static void work_handler(struct work_struct *work) +{ + alloc_io(); +} + +#define ALLOC_IO_DIRECT +/* TODO 3: undef ALLOC_IO_DIRECT*/ +#undef ALLOC_IO_DIRECT + +static void timer_handler(struct timer_list *tl) +{ + /* TODO 1/44: implement timer handler */ + struct my_device_data *my_data = from_timer(my_data, tl, timer); + + pr_info("[timer_handler] pid = %d, comm = %s\n", + current->pid, current->comm); + + /* TODO 2/38: check flags: TIMER_TYPE_SET or TIMER_TYPE_ALLOC */ + switch (my_data->flag) { + case TIMER_TYPE_SET: + break; + case TIMER_TYPE_ALLOC: +#ifdef ALLOC_IO_DIRECT + alloc_io(); +#else + /* TODO 3: schedule work */ + schedule_work(&my_data->work); +#endif + break; + case TIMER_TYPE_MON: + { + /* TODO 4/19: iterate the list and check the proccess state */ + struct mon_proc *p, *n; + + spin_lock(&my_data->lock); + list_for_each_entry_safe(p, n, &my_data->list, list) { + /* TODO 4: if task is dead print info ... */ + /* TODO 4: ... decrement task usage counter ... */ + /* TODO 4: ... remove it from the list ... */ + /* TODO 4: ... free the struct mon_proc */ + if (p->task->state == TASK_DEAD) { + pr_info("task %s (%d) is dead\n", p->task->comm, + p->task->pid); + put_task_struct(p->task); + list_del(&p->list); + kfree(p); + } + } + spin_unlock(&my_data->lock); + + mod_timer(&my_data->timer, jiffies + HZ); + break; + } + default: + break; + } +} + +static int deferred_open(struct inode *inode, struct file *file) +{ + struct my_device_data *my_data = + container_of(inode->i_cdev, struct my_device_data, cdev); + file->private_data = my_data; + pr_info("[deferred_open] Device opened\n"); + return 0; +} + +static int deferred_release(struct inode *inode, struct file *file) +{ + pr_info("[deferred_release] Device released\n"); + return 0; +} + +static long deferred_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct my_device_data *my_data = (struct my_device_data*) file->private_data; + + pr_info("[deferred_ioctl] Command: %s\n", ioctl_command_to_string(cmd)); + + switch (cmd) { + case MY_IOCTL_TIMER_SET: + /* TODO 2: set flag */ + my_data->flag = TIMER_TYPE_SET; + /* TODO 1: schedule timer */ + mod_timer(&my_data->timer, jiffies + arg * HZ); + break; + case MY_IOCTL_TIMER_CANCEL: + /* TODO 1: cancel timer */ + del_timer(&my_data->timer); + break; + case MY_IOCTL_TIMER_ALLOC: + /* TODO 2/2: set flag and schedule timer */ + my_data->flag = TIMER_TYPE_ALLOC; + mod_timer(&my_data->timer, jiffies + arg * HZ); + break; + case MY_IOCTL_TIMER_MON: + { + /* TODO 4/8: use get_proc() and add task to list */ + struct mon_proc *p = get_proc(arg); + if (IS_ERR(p)) + return PTR_ERR(p); + + /* TODO 4: protect access to list */ + spin_lock_bh(&my_data->lock); + list_add(&p->list, &my_data->list); + spin_unlock_bh(&my_data->lock); + + /* TODO 4/2: set flag and schedule timer */ + my_data->flag = TIMER_TYPE_MON; + mod_timer(&my_data->timer, jiffies + HZ); + break; + } + default: + return -ENOTTY; + } + return 0; +} + +struct file_operations my_fops = { + .owner = THIS_MODULE, + .open = deferred_open, + .release = deferred_release, + .unlocked_ioctl = deferred_ioctl, +}; + +static int deferred_init(void) +{ + int err; + + pr_info("[deferred_init] Init module\n"); + err = register_chrdev_region(MKDEV(MY_MAJOR, MY_MINOR), 1, MODULE_NAME); + if (err) { + pr_info("[deffered_init] register_chrdev_region: %d\n", err); + return err; + } + + /* TODO 2: Initialize flag. */ + dev.flag = TIMER_TYPE_NONE; + /* TODO 3: Initialize work. */ + INIT_WORK(&dev.work, work_handler); + + /* TODO 4/2: Initialize lock and list. */ + spin_lock_init(&dev.lock); + INIT_LIST_HEAD(&dev.list); + + cdev_init(&dev.cdev, &my_fops); + cdev_add(&dev.cdev, MKDEV(MY_MAJOR, MY_MINOR), 1); + + /* TODO 1: Initialize timer. */ + timer_setup(&dev.timer, timer_handler, 0); + + return 0; +} + +static void deferred_exit(void) +{ + struct mon_proc *p, *n; + + pr_info("[deferred_exit] Exit module\n" ); + + cdev_del(&dev.cdev); + unregister_chrdev_region(MKDEV(MY_MAJOR, MY_MINOR), 1); + + /* TODO 1: Cleanup: make sure the timer is not running after exiting. */ + del_timer_sync(&dev.timer); + /* TODO 3: Cleanup: make sure the work handler is not scheduled. */ + flush_scheduled_work(); + + /* TODO 4/8: Cleanup the monitered process list */ + list_for_each_entry_safe(p, n, &dev.list, list) { + /* TODO 4: ... decrement task usage counter ... */ + /* TODO 4: ... remove it from the list ... */ + /* TODO 4: ... free the struct mon_proc */ + put_task_struct(p->task); + list_del(&p->list); + kfree(p); + } +} + +module_init(deferred_init); +module_exit(deferred_exit); diff --git a/tools/labs/templates/deferred_work/3-4-5-deferred/kernel/makenode b/tools/labs/templates/deferred_work/3-4-5-deferred/kernel/makenode new file mode 100755 index 00000000000000..1e46669d4a1c98 --- /dev/null +++ b/tools/labs/templates/deferred_work/3-4-5-deferred/kernel/makenode @@ -0,0 +1,9 @@ +#!/bin/sh + +device=deferred +type=c +major=42 +minor=0 + +rm -f /dev/${device} +mknod /dev/${device} $type $major $minor && ls -al /dev/${device} diff --git a/tools/labs/templates/deferred_work/3-4-5-deferred/user/.gitignore b/tools/labs/templates/deferred_work/3-4-5-deferred/user/.gitignore new file mode 100644 index 00000000000000..ee4c92682341e4 --- /dev/null +++ b/tools/labs/templates/deferred_work/3-4-5-deferred/user/.gitignore @@ -0,0 +1 @@ +/test diff --git a/tools/labs/templates/deferred_work/3-4-5-deferred/user/Makefile b/tools/labs/templates/deferred_work/3-4-5-deferred/user/Makefile new file mode 100644 index 00000000000000..62768622c42fee --- /dev/null +++ b/tools/labs/templates/deferred_work/3-4-5-deferred/user/Makefile @@ -0,0 +1,9 @@ +CFLAGS=-Wall -m32 +LDFLAGS=-static -m32 + +test: test.o + +.PHONY: clean + +clean: + -rm -f *~ *.o test diff --git a/tools/labs/templates/deferred_work/3-4-5-deferred/user/test.c b/tools/labs/templates/deferred_work/3-4-5-deferred/user/test.c new file mode 100644 index 00000000000000..3cef70f86e2b75 --- /dev/null +++ b/tools/labs/templates/deferred_work/3-4-5-deferred/user/test.c @@ -0,0 +1,93 @@ +/* + * SO2 - Lab 6 - Deferred Work + * + * Exercises #3, #4, #5: deferred work + * + * User-mode test program. + */ + +#include +#include +#include +#include +#include +#include +#include "../include/deferred.h" + +#define DEVICE_PATH "/dev/deferred" + +/* prints error message and exits */ +void error(char *message) +{ + perror(message); + exit(EXIT_FAILURE); +} + +/* prints usage message and exits */ +void usage() +{ + printf("Usage: test \n options:\n" + "\ts - set timer to run after seconds\n" + "\tc - cancel timer\n" + "\ta - allocate memory after seconds\n" + "\tp - monitor pid\n" + "\n"); + exit(1); +} + +#define BUFFER_LEN 128 + +int main(int argc, char **argv) +{ + int fd; + unsigned long seconds, pid; + + if (argc < 2) + usage(); + + fd = open(DEVICE_PATH, O_RDONLY); + if (fd < 0) + error(DEVICE_PATH); + + switch (argv[1][0]) { + case 's': + /* Set timer. */ + if (argc < 3) + usage(); + seconds = atoi(argv[2]); + printf("Set timer to %ld seconds\n", seconds); + if (ioctl(fd, MY_IOCTL_TIMER_SET, seconds) < 0) + error("ioctl set timer error"); + break; + case 'c': + /* Cancel timer. */ + printf("Cancel timer\n"); + if (ioctl(fd, MY_IOCTL_TIMER_CANCEL) < 0) + error("ioctl cancel timer error"); + break; + case 'a': + /* Allocate memory. */ + if (argc < 3) + usage(); + seconds = atoi(argv[2]); + printf("Allocate memory after %ld seconds\n",seconds); + if (ioctl(fd, MY_IOCTL_TIMER_ALLOC, seconds) < 0) + error("ioctl allocate memory error"); + break; + case 'p': + /* Monitor pid. */ + if (argc < 3) + usage(); + pid = atoi(argv[2]); + printf("Monitor PID %lu.\n", pid); + if (ioctl(fd, MY_IOCTL_TIMER_MON, pid) < 0) + error("ioctl monitor pid error"); + break; + default: + error("Wrong parameter"); + } + + close(fd); + + return 0; +} diff --git a/tools/labs/templates/deferred_work/6-kthread/Kbuild b/tools/labs/templates/deferred_work/6-kthread/Kbuild new file mode 100644 index 00000000000000..028c060071dd8c --- /dev/null +++ b/tools/labs/templates/deferred_work/6-kthread/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = kthread.o diff --git a/tools/labs/templates/deferred_work/6-kthread/kthread.c b/tools/labs/templates/deferred_work/6-kthread/kthread.c new file mode 100644 index 00000000000000..d47ce9619dce06 --- /dev/null +++ b/tools/labs/templates/deferred_work/6-kthread/kthread.c @@ -0,0 +1,65 @@ +/* + * SO2 - Lab 6 - Deferred Work + * + * Exercise #6: kernel thread + */ + +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Simple kernel thread"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +wait_queue_head_t wq_stop_thread; +atomic_t flag_stop_thread; +wait_queue_head_t wq_thread_terminated; +atomic_t flag_thread_terminated; + + +int my_thread_f(void *data) +{ + pr_info("[my_thread_f] Current process id is %d (%s)\n", + current->pid, current->comm); + /* TODO: Wait for command to remove module on wq_stop_thread queue. */ + wait_event_interruptible(wq_stop_thread, atomic_read(&flag_stop_thread) != 0); + + /* TODO: set flag to mark kernel thread termination */ + atomic_set(&flag_thread_terminated, 1); + /* TODO: notify the unload process that we have exited */ + wake_up_interruptible(&wq_thread_terminated); + pr_info("[my_thread_f] Exiting\n"); + do_exit(0); +} + +static int __init kthread_init(void) +{ + pr_info("[kthread_init] Init module\n"); + + /* TODO/4: init the waitqueues and flags */ + init_waitqueue_head(&wq_stop_thread); + atomic_set(&flag_stop_thread, 0); + init_waitqueue_head(&wq_thread_terminated); + atomic_set(&flag_thread_terminated, 0); + /* TODO: create and start the kernel thread */ + kthread_run(my_thread_f, NULL, "%skthread%d", "my", 0); + + return 0; +} + +static void __exit kthread_exit(void) +{ + /* TODO/2: notify the kernel thread that its time to exit */ + atomic_set(&flag_stop_thread, 1); + wake_up_interruptible(&wq_stop_thread); + /* TODO: wait for the kernel thread to exit */ + wait_event_interruptible(wq_thread_terminated, atomic_read(&flag_thread_terminated) != 0); + pr_info("[kthread_exit] Exit module\n"); +} + +module_init(kthread_init); +module_exit(kthread_exit); diff --git a/tools/labs/templates/device_drivers/extra/char-driver-lin/Kbuild b/tools/labs/templates/device_drivers/extra/char-driver-lin/Kbuild new file mode 100644 index 00000000000000..9399052a035415 --- /dev/null +++ b/tools/labs/templates/device_drivers/extra/char-driver-lin/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = modul.o diff --git a/tools/labs/templates/device_drivers/extra/char-driver-lin/makenode b/tools/labs/templates/device_drivers/extra/char-driver-lin/makenode new file mode 100755 index 00000000000000..e66abd3c6a1cd0 --- /dev/null +++ b/tools/labs/templates/device_drivers/extra/char-driver-lin/makenode @@ -0,0 +1,9 @@ +#!/bin/sh +device="my_device" +major=42 + + +rm -f /dev/${device} +mknod /dev/${device} c $major 0 + + diff --git a/tools/labs/templates/device_drivers/extra/char-driver-lin/modul.c b/tools/labs/templates/device_drivers/extra/char-driver-lin/modul.c new file mode 100644 index 00000000000000..b92e74fabfded6 --- /dev/null +++ b/tools/labs/templates/device_drivers/extra/char-driver-lin/modul.c @@ -0,0 +1,124 @@ +#include +#include +#include +#include +#include +#include + +#define MY_MAJOR 42 +#define MY_MAX_MINORS 2 +/* #define IOCTL_IN _IOC(_IOC_WRITE, 'k', 1, sizeof(my_ioctl_data)) */ +#define MY_IOCTL_IN _IOC(_IOC_WRITE, 'k', 1, 0) + +struct my_device_data { + struct cdev cdev; + /* my data starts here */ +}devs[MY_MAX_MINORS]; + +MODULE_DESCRIPTION("My kernel module"); +MODULE_AUTHOR("Me"); +MODULE_LICENSE("GPL"); + +static int my_open(struct inode *inode, struct file *file) { + struct my_device_data *my_data = + container_of(inode->i_cdev, struct my_device_data, cdev); + + printk( KERN_DEBUG "[my_open]\n" ); + /* validate access to device */ + file->private_data = my_data; + /* initialize device */ + + return 0; +} + +static int my_close(struct inode *inode, struct file *file) { + printk( KERN_DEBUG "[my_close]\n" ); + /* deinitialize device */ + return 0; +} + +static int my_read(struct file *file, char __user *user_buffer, size_t size, loff_t *offset) { + struct my_device_data *my_data = + (struct my_device_data*) file->private_data; + int sizeRead = 0; + + printk( KERN_DEBUG "[my_read]\n" ); + /* read data from device in my_data->buffer */ + /* if(copy_to_user(user_buffer, my_data->buffer, my_data->size)) + return -EFAULT; */ + + return sizeRead; +} + +static int my_write(struct file *file, const char __user *user_buffer, size_t size, loff_t *offset) { + struct my_device_data *my_data = + (struct my_device_data*) file->private_data; + int sizeWritten = 0; + + printk( KERN_DEBUG "[my_write]\n" ); + /* copy_from_user */ + /* write data to device from my_data->buffer */ + sizeWritten = size; //only if sizeWritten == size ! + + return sizeWritten; +} + +static long my_ioctl (struct file *file, unsigned int cmd, unsigned long arg) { + struct my_device_data *my_data = + (struct my_device_data*) file->private_data; + /* my_ioctl_data mid; */ + + printk( KERN_DEBUG "[my_ioctl]\n" ); + switch(cmd) { + case MY_IOCTL_IN: + /* if( copy_from_user(&mid, (my_ioctl_data *) arg, sizeof(my_ioctl_data)) ) + return -EFAULT; */ + + /* process data and execute command */ + break; + default: + return -ENOTTY; + } + return 0; +} + + +struct file_operations my_fops = { + .owner = THIS_MODULE, + .open = my_open, + .read = my_read, + .write = my_write, + .release = my_close, + .unlocked_ioctl = my_ioctl +}; + +int init_module(void) { + int i, err; + + printk( KERN_DEBUG "[init_module]\n" ); + err = register_chrdev_region(MKDEV(MY_MAJOR, 0), MY_MAX_MINORS,"my_device_driver"); + if (err != 0) { + /* report error */ + return err; + } + + for(i = 0; i < MY_MAX_MINORS; i++) { + /* initialize devs[i] fields */ + cdev_init(&devs[i].cdev, &my_fops); + cdev_add(&devs[i].cdev, MKDEV(MY_MAJOR, i), 1); + } + + return 0; +} + +void cleanup_module(void) { + int i; + + printk( KERN_DEBUG "[cleanup_module]\n" ); + for(i = 0; i < MY_MAX_MINORS; i++) { + /* release devs[i] fields */ + cdev_del(&devs[i].cdev); + } + unregister_chrdev_region(MKDEV(MY_MAJOR, 0), MY_MAX_MINORS); +} + diff --git a/tools/labs/templates/device_drivers/include/so2_cdev.h b/tools/labs/templates/device_drivers/include/so2_cdev.h new file mode 100644 index 00000000000000..a88a8809dc5ab0 --- /dev/null +++ b/tools/labs/templates/device_drivers/include/so2_cdev.h @@ -0,0 +1,14 @@ +#ifndef __PSOCDEV_H__ +#define __PSOCDEV_H__ 1 + +#include + +#define BUFFER_SIZE 256 + +#define MY_IOCTL_PRINT _IOC(_IOC_NONE, 'k', 1, 0) +#define MY_IOCTL_SET_BUFFER _IOC(_IOC_WRITE, 'k', 2, BUFFER_SIZE) +#define MY_IOCTL_GET_BUFFER _IOC(_IOC_READ, 'k', 3, BUFFER_SIZE) +#define MY_IOCTL_DOWN _IOC(_IOC_NONE, 'k', 4, 0) +#define MY_IOCTL_UP _IOC(_IOC_NONE, 'k', 5, 0) + +#endif diff --git a/tools/labs/templates/device_drivers/kernel/Kbuild b/tools/labs/templates/device_drivers/kernel/Kbuild new file mode 100644 index 00000000000000..462ca0f2a4d467 --- /dev/null +++ b/tools/labs/templates/device_drivers/kernel/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = so2_cdev.o diff --git a/tools/labs/templates/device_drivers/kernel/so2_cdev.c b/tools/labs/templates/device_drivers/kernel/so2_cdev.c new file mode 100644 index 00000000000000..7f206a00b3f983 --- /dev/null +++ b/tools/labs/templates/device_drivers/kernel/so2_cdev.c @@ -0,0 +1,243 @@ +/* + * Character device drivers lab + * + * All tasks + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../include/so2_cdev.h" + +MODULE_DESCRIPTION("SO2 character device"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +#define LOG_LEVEL KERN_INFO + +#define MY_MAJOR 42 +#define MY_MINOR 0 +#define NUM_MINORS 1 +#define MODULE_NAME "so2_cdev" +#define MESSAGE "hello\n" +#define IOCTL_MESSAGE "Hello ioctl" + +#ifndef BUFSIZ +#define BUFSIZ 4096 +#endif + + +struct so2_device_data { + /* TODO 2/1: add cdev member */ + struct cdev cdev; + /* TODO 4/2: add buffer with BUFSIZ elements */ + char buffer[BUFSIZ]; + size_t size; + /* TODO 7/2: extra members for home */ + wait_queue_head_t wq; + int flag; + /* TODO 3/1: add atomic_t access variable to keep track if file is opened */ + atomic_t access; +}; + +struct so2_device_data devs[NUM_MINORS]; + +static int so2_cdev_open(struct inode *inode, struct file *file) +{ + struct so2_device_data *data; + + /* TODO 2/1: print message when the device file is open. */ + printk(LOG_LEVEL "open called!\n"); + + /* TODO 3/1: inode->i_cdev contains our cdev struct, use container_of to obtain a pointer to so2_device_data */ + data = container_of(inode->i_cdev, struct so2_device_data, cdev); + + file->private_data = data; + + /* TODO 3/2: return immediately if access is != 0, use atomic_cmpxchg */ + if (atomic_cmpxchg(&data->access, 0, 1) != 0) + return -EBUSY; + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(10); + + return 0; +} + +static int +so2_cdev_release(struct inode *inode, struct file *file) +{ + /* TODO 2/1: print message when the device file is closed. */ + printk(LOG_LEVEL "close called!\n"); + +#ifndef EXTRA + struct so2_device_data *data = + (struct so2_device_data *) file->private_data; + + /* TODO 3/1: reset access variable to 0, use atomic_set */ + atomic_set(&data->access, 0); +#endif + return 0; +} + +static ssize_t +so2_cdev_read(struct file *file, + char __user *user_buffer, + size_t size, loff_t *offset) +{ + struct so2_device_data *data = + (struct so2_device_data *) file->private_data; + size_t to_read; + +#ifdef EXTRA + /* TODO 7/6: extra tasks for home */ + if (!data->size) { + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + if (wait_event_interruptible(data->wq, data->size != 0)) + return -ERESTARTSYS; + } +#endif + + /* TODO 4/4: Copy data->buffer to user_buffer, use copy_to_user */ + to_read = (size > data->size - *offset) ? (data->size - *offset) : size; + if (copy_to_user(user_buffer, data->buffer + *offset, to_read) != 0) + return -EFAULT; + *offset += to_read; + + return to_read; +} + +static ssize_t +so2_cdev_write(struct file *file, + const char __user *user_buffer, + size_t size, loff_t *offset) +{ + struct so2_device_data *data = + (struct so2_device_data *) file->private_data; + + + /* TODO 5/5: copy user_buffer to data->buffer, use copy_from_user */ + size = (*offset + size > BUFSIZ) ? (BUFSIZ - *offset) : size; + if (copy_from_user(data->buffer + *offset, user_buffer, size) != 0) + return -EFAULT; + *offset += size; + data->size = *offset; + /* TODO 7/3: extra tasks for home */ +#ifdef EXTRA + wake_up_interruptible(&data->wq); +#endif + + return size; +} + +static long +so2_cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct so2_device_data *data = + (struct so2_device_data *) file->private_data; + int ret = 0; + int remains; + + switch (cmd) { + /* TODO 6/3: if cmd = MY_IOCTL_PRINT, display IOCTL_MESSAGE */ + case MY_IOCTL_PRINT: + printk(LOG_LEVEL "%s\n", IOCTL_MESSAGE); + break; + /* TODO 7/19: extra tasks, for home */ + case MY_IOCTL_DOWN: + data->flag = 0; + ret = wait_event_interruptible(data->wq, data->flag != 0); + break; + case MY_IOCTL_UP: + data->flag = 1; + wake_up_interruptible(&data->wq); + break; + case MY_IOCTL_SET_BUFFER: + remains = copy_from_user(data->buffer, (char __user *)arg, + BUFFER_SIZE); + if (remains) + ret = -EFAULT; + data->size = BUFFER_SIZE - remains; + break; + case MY_IOCTL_GET_BUFFER: + if (copy_to_user((char __user *)arg, data->buffer, data->size)) + ret = -EFAULT; + break; + default: + ret = -EINVAL; + } + + return ret; +} + +static const struct file_operations so2_fops = { + .owner = THIS_MODULE, +/* TODO 2/2: add open and release functions */ + .open = so2_cdev_open, + .release = so2_cdev_release, +/* TODO 4/1: add read function */ + .read = so2_cdev_read, +/* TODO 5/1: add write function */ + .write = so2_cdev_write, +/* TODO 6/1: add ioctl function */ + .unlocked_ioctl = so2_cdev_ioctl, +}; + +static int so2_cdev_init(void) +{ + int err; + int i; + + /* TODO 1/6: register char device region for MY_MAJOR and NUM_MINORS starting at MY_MINOR */ + err = register_chrdev_region(MKDEV(MY_MAJOR, MY_MINOR), + NUM_MINORS, MODULE_NAME); + if (err != 0) { + pr_info("register_chrdev_region"); + return err; + } + + for (i = 0; i < NUM_MINORS; i++) { +#ifdef EXTRA + /* TODO 7/2: extra tasks, for home */ + devs[i].size = 0; + memset(devs[i].buffer, 0, sizeof(devs[i].buffer)); +#else + /*TODO 4/2: initialize buffer with MESSAGE string */ + memcpy(devs[i].buffer, MESSAGE, sizeof(MESSAGE)); + devs[i].size = sizeof(MESSAGE); +#endif + /* TODO 7/2: extra tasks for home */ + init_waitqueue_head(&devs[i].wq); + devs[i].flag = 0; + /* TODO 3/1: set access variable to 0, use atomic_set */ + atomic_set(&devs[i].access, 0); + /* TODO 2/2: init and add cdev to kernel core */ + cdev_init(&devs[i].cdev, &so2_fops); + cdev_add(&devs[i].cdev, MKDEV(MY_MAJOR, i), 1); + } + + return 0; +} + +static void so2_cdev_exit(void) +{ + int i; + + for (i = 0; i < NUM_MINORS; i++) { + /* TODO 2/1: delete cdev from kernel core */ + cdev_del(&devs[i].cdev); + } + + /* TODO 1/1: unregister char device region, for MY_MAJOR and NUM_MINORS starting at MY_MINOR */ + unregister_chrdev_region(MKDEV(MY_MAJOR, MY_MINOR), NUM_MINORS); +} + +module_init(so2_cdev_init); +module_exit(so2_cdev_exit); diff --git a/tools/labs/templates/device_drivers/user/Makefile b/tools/labs/templates/device_drivers/user/Makefile new file mode 100644 index 00000000000000..8b4ace8e72fe9f --- /dev/null +++ b/tools/labs/templates/device_drivers/user/Makefile @@ -0,0 +1,7 @@ +all: so2_cdev_test.c + gcc -m32 -static -o so2_cdev_test so2_cdev_test.c + +.PHONY: clean + +clean: + -rm -f *~ *.o so2_cdev_test diff --git a/tools/labs/templates/device_drivers/user/so2_cdev_test b/tools/labs/templates/device_drivers/user/so2_cdev_test new file mode 100755 index 00000000000000..e8e7a2e86967b3 Binary files /dev/null and b/tools/labs/templates/device_drivers/user/so2_cdev_test differ diff --git a/tools/labs/templates/device_drivers/user/so2_cdev_test.c b/tools/labs/templates/device_drivers/user/so2_cdev_test.c new file mode 100644 index 00000000000000..24a1f39d6898ea --- /dev/null +++ b/tools/labs/templates/device_drivers/user/so2_cdev_test.c @@ -0,0 +1,125 @@ +/* + * SO2 Lab - Linux device drivers (#4) + * User-space test file + */ + +#include +#include +#include +#include +#include +#include +#include +#include "../include/so2_cdev.h" + +#define DEVICE_PATH "/dev/so2_cdev" + +/* + * prints error message and exits + */ + +static void error(const char *message) +{ + perror(message); + exit(EXIT_FAILURE); +} + +/* + * print use case + */ + +static void usage(const char *argv0) +{ + printf("Usage: %s \n options:\n" + "\tp - print\n" + "\ts string - set buffer\n" + "\tg - get buffer\n" + "\td - down\n" + "\tu - up\n" + "\tn - open with O_NONBLOCK and read data\n", argv0); + exit(EXIT_FAILURE); +} + +/* + * Sample run: + * ./so2_cdev_test p ; print ioctl message + * ./so2_cdev_test d ; wait on wait_queue + * ./so2_cdev_test u ; wait on wait_queue + */ + +int main(int argc, char **argv) +{ + int fd; + char buffer[BUFFER_SIZE]; + + if (argc < 2) + usage(argv[0]); + + if (strlen(argv[1]) != 1) + usage(argv[0]); + + fd = open(DEVICE_PATH, O_RDONLY); + if (fd < 0) { + perror("open"); + exit(EXIT_FAILURE); + } + + switch (argv[1][0]) { + case 'p': /* print */ + if (ioctl(fd, MY_IOCTL_PRINT, 0) < 0) { + perror("ioctl"); + exit(EXIT_FAILURE); + } + + break; + case 's': /* set buffer */ + if (argc < 3) + usage(argv[0]); + memset(buffer, 0, BUFFER_SIZE); + strncpy(buffer, argv[2], BUFFER_SIZE); + if (ioctl(fd, MY_IOCTL_SET_BUFFER, buffer) < 0) { + perror("ioctl"); + exit(EXIT_FAILURE); + } + break; + case 'g': /* get buffer */ + if (ioctl(fd, MY_IOCTL_GET_BUFFER, buffer) < 0) { + perror("ioctl"); + exit(EXIT_FAILURE); + } + buffer[BUFFER_SIZE-1] = 0; + printf("IOCTL buffer contains %s\n", buffer); + break; + case 'd': /* down */ + if (ioctl(fd, MY_IOCTL_DOWN, 0) < 0) { + perror("ioctl"); + exit(EXIT_FAILURE); + } + break; + case 'u': /* up */ + if (ioctl(fd, MY_IOCTL_UP, 0) < 0) { + perror("ioctl"); + exit(EXIT_FAILURE); + } + break; + case 'n': + if (fcntl(fd, F_SETFL, O_RDONLY | O_NONBLOCK) < 0) { + perror("fcntl"); + exit(EXIT_FAILURE); + } + + if (read(fd, buffer, BUFFER_SIZE) < 0) { + perror("read"); + exit(EXIT_FAILURE); + } + buffer[BUFFER_SIZE-1] = 0; + printf("Device buffer contains %s\n", buffer); + break; + default: + error("Wrong parameter"); + } + + close(fd); + + return 0; +} diff --git a/tools/labs/templates/device_model/Kbuild b/tools/labs/templates/device_model/Kbuild new file mode 100644 index 00000000000000..984571ae0c7f53 --- /dev/null +++ b/tools/labs/templates/device_model/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = bex.o bex_misc.o diff --git a/tools/labs/templates/device_model/bex.c b/tools/labs/templates/device_model/bex.c new file mode 100644 index 00000000000000..4419b2389bf7a7 --- /dev/null +++ b/tools/labs/templates/device_model/bex.c @@ -0,0 +1,220 @@ +#include +#include +#include +#include +#include +#include + +#include "bex.h" + +MODULE_AUTHOR ("Kernel Hacker"); +MODULE_LICENSE ("GPL"); +MODULE_DESCRIPTION ("BEX bus module"); + +static int bex_match(struct device *dev, struct device_driver *driver) +{ + /* TODO 5/5: implement the bus match function */ + struct bex_device *bex_dev = to_bex_device(dev); + struct bex_driver *bex_drv = to_bex_driver(driver); + + if (!strcmp(bex_dev->type, bex_drv->type)) + return 1; + + return 0; +} + +static int bex_probe(struct device *dev) +{ + struct bex_device *bex_dev = to_bex_device(dev); + struct bex_driver *bex_drv = to_bex_driver(dev->driver); + + return bex_drv->probe(bex_dev); +} + +static int bex_remove(struct device *dev) +{ + struct bex_device *bex_dev = to_bex_device(dev); + struct bex_driver *bex_drv = to_bex_driver(dev->driver); + + bex_drv->remove(bex_dev); + return 0; +} + +static int bex_add_dev(const char *name, const char *type, int version); + +/* TODO 3/14: implement write only add attribute */ +static ssize_t +add_store(struct bus_type *bt, const char *buf, size_t count) +{ + char type[32], name[32]; + int version; + int ret; + + ret = sscanf(buf, "%31s %31s %d", name, type, &version); + if (ret != 3) + return -EINVAL; + + return bex_add_dev(name, type, version) ? : count; +} +BUS_ATTR(add, S_IWUSR, NULL, add_store); + +static int bex_del_dev(const char *name); + +/* TODO 3/13: implement write only del attribute */ +static ssize_t +del_store(struct bus_type *bt, const char *buf, size_t count) +{ + char name[32]; + int version; + + if (sscanf(buf, "%s", name) != 1) + return -EINVAL; + + return bex_del_dev(name) ? 0 : count; + +} +BUS_ATTR(del, S_IWUSR, NULL, del_store); + +static struct attribute *bex_bus_attrs[] = { + /* TODO 3/2: add del and add attributes */ + &bus_attr_add.attr, + &bus_attr_del.attr, + NULL +}; +ATTRIBUTE_GROUPS(bex_bus); + +struct bus_type bex_bus_type = { + .name = "bex", + .match = bex_match, + .probe = bex_probe, + .remove = bex_remove, + /* TODO 3: add bus groups attributes */ + .bus_groups = bex_bus_groups, +}; + +/*TODO 2/8: add read-only device attribute to show the type */ +static ssize_t +type_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct bex_device *bex_dev = to_bex_device(dev); + + return sprintf(buf, "%s\n", bex_dev->type); +} +DEVICE_ATTR_RO(type); + +/*TODO 2/8: add read-only device attribute to show the version */ +static ssize_t +version_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct bex_device *bex_dev = to_bex_device(dev); + + return sprintf(buf, "%d\n", bex_dev->version); +} +DEVICE_ATTR_RO(version); + +static struct attribute *bex_dev_attrs[] = { + /* TODO 2/2: add type and version attributes */ + &dev_attr_type.attr, + &dev_attr_version.attr, + NULL +}; +ATTRIBUTE_GROUPS(bex_dev); + +static int bex_dev_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + return add_uevent_var(env, "MODALIAS=bex:%s", dev_name(dev)); +} + +static void bex_dev_release(struct device *dev) +{ + struct bex_device *bex_dev = to_bex_device(dev); + + kfree(bex_dev->type); + kfree(bex_dev); +} + +struct device_type bex_device_type = { + /* TODO 2: set the device groups attributes */ + .groups = bex_dev_groups, + .uevent = bex_dev_uevent, + .release = bex_dev_release, +}; + +static int bex_add_dev(const char *name, const char *type, int version) +{ + struct bex_device *bex_dev; + + bex_dev = kzalloc(sizeof(*bex_dev), GFP_KERNEL); + if (!bex_dev) + return -ENOMEM; + + bex_dev->type = kstrdup(type, GFP_KERNEL); + bex_dev->version = version; + + bex_dev->dev.bus = &bex_bus_type; + bex_dev->dev.type = &bex_device_type; + bex_dev->dev.parent = NULL; + + dev_set_name(&bex_dev->dev, "%s", name); + + return device_register(&bex_dev->dev); +} + +static int bex_del_dev(const char *name) +{ + struct device *dev; + + dev = bus_find_device_by_name(&bex_bus_type, NULL, name); + if (!dev) + return -EINVAL; + + device_unregister(dev); + put_device(dev); + + return 0; +} + +int bex_register_driver(struct bex_driver *drv) +{ + int ret; + + drv->driver.bus = &bex_bus_type; + ret = driver_register(&drv->driver); + if (ret) + return ret; + + return 0; +} +EXPORT_SYMBOL(bex_register_driver); + +void bex_unregister_driver(struct bex_driver *drv) +{ + driver_unregister(&drv->driver); +} +EXPORT_SYMBOL(bex_unregister_driver); + +static int __init my_bus_init (void) +{ + int ret; + + /* TODO 1/5: register the bus driver */ + ret = bus_register(&bex_bus_type); + if (ret < 0) { + pr_err("Unable to register bus\n"); + return ret; + } + + /* TODO 1: add a device */ + bex_add_dev("root", "none", 1); + + return 0; +} + +static void my_bus_exit (void) +{ + /* TODO 1: unregister the bus driver */ + bus_unregister(&bex_bus_type); +} + +module_init (my_bus_init); +module_exit (my_bus_exit); diff --git a/tools/labs/templates/device_model/bex.h b/tools/labs/templates/device_model/bex.h new file mode 100644 index 00000000000000..ba914bead634b6 --- /dev/null +++ b/tools/labs/templates/device_model/bex.h @@ -0,0 +1,28 @@ +#ifndef _BEX_H +#define _BEX_H + +#include + +struct bex_device { + const char *type; + int version; + struct device dev; +}; + +#define to_bex_device(drv) container_of(dev, struct bex_device, dev) + +struct bex_driver { + const char *type; + + int (*probe)(struct bex_device *dev); + void (*remove)(struct bex_device *dev); + + struct device_driver driver; +}; + +#define to_bex_driver(drv) container_of(drv, struct bex_driver, driver) + +int bex_register_driver(struct bex_driver *drv); +void bex_unregister_driver(struct bex_driver *drv); + +#endif diff --git a/tools/labs/templates/device_model/bex_misc.c b/tools/labs/templates/device_model/bex_misc.c new file mode 100644 index 00000000000000..0c224ffe17b0d8 --- /dev/null +++ b/tools/labs/templates/device_model/bex_misc.c @@ -0,0 +1,155 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "bex.h" + +MODULE_DESCRIPTION("BEX misc driver"); +MODULE_AUTHOR("Kernel Hacker"); +MODULE_LICENSE("GPL"); + +#define BUF_SIZE 1024 + +struct bex_misc_device { + struct miscdevice misc; + struct bex_device *dev; + char buf[BUF_SIZE]; +}; + +#define to_bex_misc_dev(dev) container_of(dev, struct bex_misc_device, dev) + +static int my_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static int my_release(struct inode *inode, struct file *file) +{ + return 0; +} + +static int my_read(struct file *file, char __user *user_buffer, + size_t size, loff_t *offset) +{ + struct bex_misc_device *bmd = (struct bex_misc_device *)file->private_data; + ssize_t len = min(sizeof(bmd->buf) - (ssize_t)*offset, size); + + if (len <= 0) + return 0; + + if (copy_to_user(user_buffer, bmd->buf + *offset, len)) + return -EFAULT; + + *offset += len; + return len; +} + +static int my_write(struct file *file, const char __user *user_buffer, + size_t size, loff_t *offset) +{ + struct bex_misc_device *bmd = (struct bex_misc_device *)file->private_data; + ssize_t len = min(sizeof(bmd->buf) - (ssize_t)*offset, size); + + if (len <= 0) + return 0; + + if (copy_from_user(bmd->buf + *offset, user_buffer, len)) + return -EFAULT; + + *offset += len; + return len; +} + +struct file_operations bex_misc_fops = { + .owner = THIS_MODULE, + .open = my_open, + .read = my_read, + .write = my_write, + .release = my_release, +}; + +static int bex_misc_count; + +int bex_misc_probe(struct bex_device *dev) +{ + struct bex_misc_device *bmd; + char buf[32]; + int ret; + + dev_info(&dev->dev, "%s: %s %d\n", __func__, dev->type, dev->version); + + /* TODO 6/4: refuse the probe is version > 1 */ + if (dev->version > 1) { + dev_info(&dev->dev, "unknown version: %d\n", dev->version); + return -ENODEV; + } + + bmd = kzalloc(sizeof(struct miscdevice), GFP_KERNEL); + if (!bmd) + return -ENOMEM; + + bmd->misc.minor = MISC_DYNAMIC_MINOR; + snprintf(buf, sizeof(buf), "bex-misc-%d", bex_misc_count++); + bmd->misc.name = kstrdup(buf, GFP_KERNEL); + bmd->misc.parent = &dev->dev; + bmd->misc.fops = &bex_misc_fops; + bmd->dev = dev; + dev_set_drvdata(&dev->dev, bmd); + + /* TODO 6/5: register the misc device */ + ret = misc_register(&bmd->misc); + if (ret) { + dev_err(&dev->dev, "failed to register misc device: %d\n", ret); + return ret; + } + + return 0; +} + +void bex_misc_remove(struct bex_device *dev) +{ + struct bex_misc_device *bmd; + + bmd = (struct bex_misc_device *)dev_get_drvdata(&dev->dev); + + /* TODO 6: deregister the misc device */ + misc_deregister(&bmd->misc); + kfree(bmd); +} + +struct bex_driver bex_misc_driver = { + .type = "misc", + .probe = bex_misc_probe, + .remove = bex_misc_remove, + .driver = { + .owner = THIS_MODULE, + .name = "bex_misc", + }, +}; + +static int my_init(void) +{ + int err; + + /* TODO 4/5: register the driver */ + err = bex_register_driver(&bex_misc_driver); + if(err) { + pr_err("unable to register driver: %d\n", err); + return err; + } + + return 0; +} + +static void my_exit(void) +{ + /* TODO 4: unregister the driver */ + bex_unregister_driver(&bex_misc_driver); +} + +module_init(my_init); +module_exit(my_exit); diff --git a/tools/labs/templates/device_model/makenode b/tools/labs/templates/device_model/makenode new file mode 100755 index 00000000000000..feaa232c2bfd35 --- /dev/null +++ b/tools/labs/templates/device_model/makenode @@ -0,0 +1,8 @@ +#!/bin/sh +device="echodev" +major=42 +minor=0 + +rm -f /dev/${device} +mknod /dev/${device} c $major $minor + diff --git a/tools/labs/templates/filesystems/minfs/kernel/Kbuild b/tools/labs/templates/filesystems/minfs/kernel/Kbuild new file mode 100644 index 00000000000000..b243430771727e --- /dev/null +++ b/tools/labs/templates/filesystems/minfs/kernel/Kbuild @@ -0,0 +1,3 @@ +EXTRA_CFLAGS = -Wall -g -Wno-unused + +obj-m = minfs.o diff --git a/tools/labs/templates/filesystems/minfs/kernel/minfs.c b/tools/labs/templates/filesystems/minfs/kernel/minfs.c new file mode 100644 index 00000000000000..54aee1a558ea95 --- /dev/null +++ b/tools/labs/templates/filesystems/minfs/kernel/minfs.c @@ -0,0 +1,621 @@ +/* + * SO2 Lab - Filesystem drivers + * Exercise #2 (dev filesystem) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "minfs.h" + +MODULE_DESCRIPTION("Simple filesystem"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +#define LOG_LEVEL KERN_ALERT + + +struct minfs_sb_info { + __u8 version; + unsigned long imap; + struct buffer_head *sbh; +}; + +struct minfs_inode_info { + __u16 data_block; + struct inode vfs_inode; +}; + +/* declarations of functions that are part of operation structures */ + +static int minfs_readdir(struct file *filp, struct dir_context *ctx); +static struct dentry *minfs_lookup(struct inode *dir, + struct dentry *dentry, unsigned int flags); +static int minfs_create(struct inode *dir, struct dentry *dentry, + umode_t mode, bool excl); + +/* dir and inode operation structures */ + +static const struct file_operations minfs_dir_operations = { + .read = generic_read_dir, + .iterate = minfs_readdir, +}; + +static const struct inode_operations minfs_dir_inode_operations = { + .lookup = minfs_lookup, + /* TODO 7/1: Use minfs_create as the create function. */ + .create = minfs_create, +}; + +static const struct address_space_operations minfs_aops = { + .readpage = simple_readpage, + .write_begin = simple_write_begin, + .write_end = simple_write_end, +}; + +static const struct file_operations minfs_file_operations = { + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, + .mmap = generic_file_mmap, + .llseek = generic_file_llseek, +}; + +static const struct inode_operations minfs_file_inode_operations = { + .getattr = simple_getattr, +}; + +static struct inode *minfs_iget(struct super_block *s, unsigned long ino) +{ + struct minfs_inode *mi; + struct buffer_head *bh; + struct inode *inode; + struct minfs_inode_info *mii; + + /* Allocate VFS inode. */ + inode = iget_locked(s, ino); + if (inode == NULL) { + printk(LOG_LEVEL "error aquiring inode\n"); + return ERR_PTR(-ENOMEM); + } + + /* Return inode from cache */ + if (!(inode->i_state & I_NEW)) + return inode; + + /* TODO 4/2: Read block with inodes. It's the second block on + * the device, i.e. the block with the index 1. This is the index + * to be passed to sb_bread(). + */ + if (!(bh = sb_bread(s, MINFS_INODE_BLOCK))) + goto out_bad_sb; + + /* TODO 4/1: Get inode with index ino from the block. */ + mi = ((struct minfs_inode *) bh->b_data) + ino; + + /* TODO 4/6: fill VFS inode */ + inode->i_mode = mi->mode; + i_uid_write(inode, mi->uid); + i_gid_write(inode, mi->gid); + inode->i_size = mi->size; + inode->i_blocks = 0; + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + + /* TODO 7/1: Fill address space operations (inode->i_mapping->a_ops) */ + inode->i_mapping->a_ops = &minfs_aops; + + if (S_ISDIR(inode->i_mode)) { + /* TODO 4/2: Fill dir inode operations. */ + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + + /* TODO 5/2: Use minfs_dir_inode_operations for i_op + * and minfs_dir_operations for i_fop. */ + inode->i_op = &minfs_dir_inode_operations; + inode->i_fop = &minfs_dir_operations; + + /* TODO 4/1: Directory inodes start off with i_nlink == 2. + * (use inc_link) */ + inc_nlink(inode); + } + + /* TODO 7/4: Fill inode and file operations for regular files + * (i_op and i_fop). Use the S_ISREG macro. + */ + if (S_ISREG(inode->i_mode)) { + inode->i_op = &minfs_file_inode_operations; + inode->i_fop = &minfs_file_operations; + } + + /* fill data for mii */ + mii = container_of(inode, struct minfs_inode_info, vfs_inode); + + /* TODO 4/1: uncomment after the minfs_inode is initialized */ + mii->data_block = mi->data_block; + //mii->data_block = mi->data_block; + + /* Free resources. */ + /* TODO 4/1: uncomment after the buffer_head is initialized */ + brelse(bh); + //brelse(bh); + unlock_new_inode(inode); + + return inode; + +out_bad_sb: + iget_failed(inode); + return NULL; +} + +static int minfs_readdir(struct file *filp, struct dir_context *ctx) +{ + struct buffer_head *bh; + struct minfs_dir_entry *de; + struct minfs_inode_info *mii; + struct inode *inode; + struct super_block *sb; + int over; + int err = 0; + + /* TODO 5/2: Get inode of directory and container inode. */ + inode = file_inode(filp); + mii = container_of(inode, struct minfs_inode_info, vfs_inode); + + /* TODO 5/1: Get superblock from inode (i_sb). */ + sb = inode->i_sb; + + /* TODO 5/6: Read data block for directory inode. */ + bh = sb_bread(sb, mii->data_block); + if (bh == NULL) { + printk(LOG_LEVEL "could not read block\n"); + err = -ENOMEM; + goto out_bad_sb; + } + + for (; ctx->pos < MINFS_NUM_ENTRIES; ctx->pos++) { + /* TODO 5/1: Data block contains an array of + * "struct minfs_dir_entry". Use `de' for storing. + */ + de = (struct minfs_dir_entry *) bh->b_data + ctx->pos; + + /* TODO 5/3: Step over empty entries (de->ino == 0). */ + if (de->ino == 0) { + continue; + } + + /* + * Use `over` to store return value of dir_emit and exit + * if required. + */ + over = dir_emit(ctx, de->name, MINFS_NAME_LEN, de->ino, + DT_UNKNOWN); + if (over) { + printk(KERN_DEBUG "Read %s from folder %s, ctx->pos: %lld\n", + de->name, + filp->f_path.dentry->d_name.name, + ctx->pos); + ctx->pos++; + goto done; + } + } + +done: + brelse(bh); +out_bad_sb: + return err; +} + +/* + * Find dentry in parent folder. Return parent folder's data buffer_head. + */ + +static struct minfs_dir_entry *minfs_find_entry(struct dentry *dentry, + struct buffer_head **bhp) +{ + struct buffer_head *bh; + struct inode *dir = dentry->d_parent->d_inode; + struct minfs_inode_info *mii = container_of(dir, + struct minfs_inode_info, vfs_inode); + struct super_block *sb = dir->i_sb; + const char *name = dentry->d_name.name; + struct minfs_dir_entry *final_de = NULL; + struct minfs_dir_entry *de; + int i; + + /* TODO 6/6: Read parent folder data block (contains dentries). + * Fill bhp with return value. + */ + bh = sb_bread(sb, mii->data_block); + if (bh == NULL) { + printk(LOG_LEVEL "could not read block\n"); + return NULL; + } + *bhp = bh; + + for (i = 0; i < MINFS_NUM_ENTRIES; i++) { + /* TODO 6/10: Traverse all entries, find entry by name + * Use `de' to traverse. Use `final_de' to store dentry + * found, if existing. + */ + de = ((struct minfs_dir_entry *) bh->b_data) + i; + if (de->ino != 0) { + /* found it */ + if (strcmp(name, de->name) == 0) { + printk(KERN_DEBUG "Found entry %s on position: %zd\n", + name, i); + final_de = de; + break; + } + } + } + + /* bh needs to be released by caller. */ + return final_de; +} + +static struct dentry *minfs_lookup(struct inode *dir, + struct dentry *dentry, unsigned int flags) +{ + /* TODO 6/1: Comment line. */ + // \ + return simple_lookup(dir, dentry, flags); + + struct super_block *sb = dir->i_sb; + struct minfs_dir_entry *de; + struct buffer_head *bh = NULL; + struct inode *inode = NULL; + + dentry->d_op = sb->s_root->d_op; + + de = minfs_find_entry(dentry, &bh); + if (de != NULL) { + printk(KERN_DEBUG "getting entry: name: %s, ino: %d\n", + de->name, de->ino); + inode = minfs_iget(sb, de->ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + } + + d_add(dentry, inode); + brelse(bh); + + printk(KERN_DEBUG "looked up dentry %s\n", dentry->d_name.name); + + return NULL; +} + +static struct inode *minfs_alloc_inode(struct super_block *s) +{ + struct minfs_inode_info *mii; + + /* TODO 3/4: Allocate minfs_inode_info. */ + mii = kzalloc(sizeof(struct minfs_inode_info), GFP_KERNEL); + if (mii == NULL) + return NULL; + + /* TODO 3/1: init VFS inode in minfs_inode_info */ + inode_init_once(&mii->vfs_inode); + + return &mii->vfs_inode; +} + +static void minfs_destroy_inode(struct inode *inode) +{ + /* TODO 3/1: free minfs_inode_info */ + kfree(container_of(inode, struct minfs_inode_info, vfs_inode)); +} + +/* + * Create a new VFS inode. Do basic initialization and fill imap. + */ + +static struct inode *minfs_new_inode(struct inode *dir) +{ + struct super_block *sb = dir->i_sb; + struct minfs_sb_info *sbi = sb->s_fs_info; + struct inode *inode; + int idx; + + /* TODO 7/5: Find first available inode. */ + idx = find_first_zero_bit(&sbi->imap, MINFS_NUM_INODES); + if (idx < 0) { + printk(LOG_LEVEL "no space left in imap\n"); + return NULL; + } + + /* TODO 7/2: Mark the inode as used in the bitmap and mark + * the superblock buffer head as dirty. + */ + __test_and_set_bit(idx, &sbi->imap); + mark_buffer_dirty(sbi->sbh); + + /* TODO 7/8: Call new_inode(), fill inode fields + * and insert inode into inode hash table. + */ + inode = new_inode(sb); + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); + inode->i_ino = idx; + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_blocks = 0; + + insert_inode_hash(inode); + + /* Actual writing to the disk will be done in minfs_write_inode, + * which will be called at a later time. + */ + + return inode; +} + +/* + * Add dentry link on parent inode disk structure. + */ + +static int minfs_add_link(struct dentry *dentry, struct inode *inode) +{ + struct buffer_head *bh; + struct inode *dir; + struct super_block *sb; + struct minfs_inode_info *mii; + struct minfs_dir_entry *de; + int i; + int err = 0; + + /* TODO 7/3: Get: directory inode (in inode); containing inode (in mii); superblock (in sb). */ + dir = dentry->d_parent->d_inode; + mii = container_of(dir, struct minfs_inode_info, vfs_inode); + sb = dir->i_sb; + + /* TODO 7/1: Read dir data block (use sb_bread). */ + bh = sb_bread(sb, mii->data_block); + + /* TODO 7/10: Find first free dentry (de->ino == 0). */ + for (i = 0; i < MINFS_NUM_ENTRIES; i++) { + de = (struct minfs_dir_entry *) bh->b_data + i; + if (de->ino == 0) + break; + } + + if (i == MINFS_NUM_ENTRIES) { + err = -ENOSPC; + goto out; + } + + /* TODO 7/5: Place new entry in the available slot. Mark buffer_head + * as dirty. */ + de->ino = inode->i_ino; + memcpy(de->name, dentry->d_name.name, MINFS_NAME_LEN); + dir->i_mtime = dir->i_ctime = current_time(inode); + + mark_buffer_dirty(bh); + +out: + brelse(bh); + + return err; +} + +/* + * Create a VFS file inode. Use minfs_file_... operations. + */ + +static int minfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool excl) +{ + struct inode *inode; + struct minfs_inode_info *mii; + int err; + + inode = minfs_new_inode(dir); + if (inode == NULL) { + printk(LOG_LEVEL "error allocating new inode\n"); + err = -ENOMEM; + goto err_new_inode; + } + + inode->i_mode = mode; + inode->i_op = &minfs_file_inode_operations; + inode->i_fop = &minfs_file_operations; + mii = container_of(inode, struct minfs_inode_info, vfs_inode); + mii->data_block = MINFS_FIRST_DATA_BLOCK + inode->i_ino; + + err = minfs_add_link(dentry, inode); + if (err != 0) + goto err_add_link; + + d_instantiate(dentry, inode); + mark_inode_dirty(inode); + + printk(KERN_DEBUG "new file inode created (ino = %lu)\n", + inode->i_ino); + + return 0; + +err_add_link: + inode_dec_link_count(inode); + iput(inode); +err_new_inode: + return err; +} + +/* + * Write VFS inode contents to disk inode. + */ + +static int minfs_write_inode(struct inode *inode, + struct writeback_control *wbc) +{ + struct super_block *sb = inode->i_sb; + struct minfs_inode *mi; + struct minfs_inode_info *mii = container_of(inode, + struct minfs_inode_info, vfs_inode); + struct buffer_head *bh; + int err = 0; + + bh = sb_bread(sb, MINFS_INODE_BLOCK); + if (bh == NULL) { + printk(LOG_LEVEL "could not read block\n"); + err = -ENOMEM; + goto out; + } + + mi = (struct minfs_inode *) bh->b_data + inode->i_ino; + + /* fill disk inode */ + mi->mode = inode->i_mode; + mi->uid = i_uid_read(inode); + mi->gid = i_gid_read(inode); + mi->size = inode->i_size; + mi->data_block = mii->data_block; + + printk(KERN_DEBUG "mode is %05o; data_block is %d\n", mi->mode, + mii->data_block); + + mark_buffer_dirty(bh); + brelse(bh); + + printk(KERN_DEBUG "wrote inode %lu\n", inode->i_ino); + +out: + return err; +} + +static void minfs_put_super(struct super_block *sb) +{ + struct minfs_sb_info *sbi = sb->s_fs_info; + + /* Free superblock buffer head. */ + mark_buffer_dirty(sbi->sbh); + brelse(sbi->sbh); + + printk(KERN_DEBUG "released superblock resources\n"); +} + +static const struct super_operations minfs_ops = { + .statfs = simple_statfs, + .put_super = minfs_put_super, + /* TODO 4/2: add alloc and destroy inode functions */ + .alloc_inode = minfs_alloc_inode, + .destroy_inode = minfs_destroy_inode, + /* TODO 7/1: = set write_inode function. */ + .write_inode = minfs_write_inode, +}; + +static int minfs_fill_super(struct super_block *s, void *data, int silent) +{ + struct minfs_sb_info *sbi; + struct minfs_super_block *ms; + struct inode *root_inode; + struct dentry *root_dentry; + struct buffer_head *bh; + int ret = -EINVAL; + + sbi = kzalloc(sizeof(struct minfs_sb_info), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + s->s_fs_info = sbi; + + /* Set block size for superblock. */ + if (!sb_set_blocksize(s, MINFS_BLOCK_SIZE)) + goto out_bad_blocksize; + + /* TODO 2/3: Read block with superblock. It's the first block on + * the device, i.e. the block with the index 0. This is the index + * to be passed to sb_bread(). + */ + bh = sb_bread(s, MINFS_SUPER_BLOCK); + if (bh == NULL) + goto out_bad_sb; + + /* TODO 2/1: interpret read data as minfs_super_block */ + ms = (struct minfs_super_block *) bh->b_data; + + /* TODO 2/2: check magic number with value defined in minfs.h. jump to out_bad_magic if not suitable */ + if (ms->magic != MINFS_MAGIC) + goto out_bad_magic; + + /* TODO 2/2: fill super_block with magic_number, super_operations */ + s->s_magic = MINFS_MAGIC; + s->s_op = &minfs_ops; + + /* TODO 2/2: Fill sbi with rest of information from disk superblock + * (i.e. version). + */ + sbi->version = ms->version; + sbi->imap = ms->imap; + + /* allocate root inode and root dentry */ + /* TODO 2/0: use myfs_get_inode instead of minfs_iget */ + root_inode = minfs_iget(s, MINFS_ROOT_INODE); + if (!root_inode) + goto out_bad_inode; + + root_dentry = d_make_root(root_inode); + if (!root_dentry) + goto out_iput; + s->s_root = root_dentry; + + /* Store superblock buffer_head for further use. */ + sbi->sbh = bh; + + return 0; + +out_iput: + iput(root_inode); +out_bad_inode: + printk(LOG_LEVEL "bad inode\n"); +out_bad_magic: + printk(LOG_LEVEL "bad magic number\n"); + brelse(bh); +out_bad_sb: + printk(LOG_LEVEL "error reading buffer_head\n"); +out_bad_blocksize: + printk(LOG_LEVEL "bad block size\n"); + s->s_fs_info = NULL; + kfree(sbi); + return ret; +} + +static struct dentry *minfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + /* TODO 1/1: call superblock mount function */ + return mount_bdev(fs_type, flags, dev_name, data, minfs_fill_super); +} + +static struct file_system_type minfs_fs_type = { + .owner = THIS_MODULE, + .name = "minfs", + /* TODO 1/3: add mount, kill_sb and fs_flags */ + .mount = minfs_mount, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static int __init minfs_init(void) +{ + int err; + + err = register_filesystem(&minfs_fs_type); + if (err) { + printk(LOG_LEVEL "register_filesystem failed\n"); + return err; + } + + return 0; +} + +static void __exit minfs_exit(void) +{ + unregister_filesystem(&minfs_fs_type); +} + +module_init(minfs_init); +module_exit(minfs_exit); diff --git a/tools/labs/templates/filesystems/minfs/kernel/minfs.h b/tools/labs/templates/filesystems/minfs/kernel/minfs.h new file mode 100644 index 00000000000000..92285597a938ce --- /dev/null +++ b/tools/labs/templates/filesystems/minfs/kernel/minfs.h @@ -0,0 +1,45 @@ +#ifndef _MINFS_H +#define _MINFS_H 1 + +#define MINFS_MAGIC 0xDEADF00D +#define MINFS_NAME_LEN 16 +#define MINFS_BLOCK_SIZE 4096 +#define MINFS_NUM_INODES 32 +#define MINFS_NUM_ENTRIES 32 + +#define MINFS_ROOT_INODE 0 + +/* + * Filesystem layout: + * + * SB IZONE DATA + * ^ ^ (1 block) + * | | + * +-0 +-- 4096 + */ + +#define MINFS_SUPER_BLOCK 0 +#define MINFS_INODE_BLOCK 1 +#define MINFS_FIRST_DATA_BLOCK 2 + +struct minfs_super_block { + unsigned long magic; + __u8 version; + unsigned long imap; +}; + +struct minfs_dir_entry { + __u32 ino; + char name[MINFS_NAME_LEN]; +}; + +/* A minfs inode uses a single block. */ +struct minfs_inode { + __u32 mode; + __u32 uid; + __u32 gid; + __u32 size; + __u16 data_block; +}; + +#endif /* _MINFS_H */ diff --git a/tools/labs/templates/filesystems/minfs/user/.gitignore b/tools/labs/templates/filesystems/minfs/user/.gitignore new file mode 100644 index 00000000000000..970317ec7dd653 --- /dev/null +++ b/tools/labs/templates/filesystems/minfs/user/.gitignore @@ -0,0 +1 @@ +/mkfs.minfs diff --git a/tools/labs/templates/filesystems/minfs/user/Makefile b/tools/labs/templates/filesystems/minfs/user/Makefile new file mode 100644 index 00000000000000..b8e754a6b70fff --- /dev/null +++ b/tools/labs/templates/filesystems/minfs/user/Makefile @@ -0,0 +1,13 @@ +CFLAGS = -Wall -g -m32 +LDFLAGS = -static -m32 + +.PHONY: all clean + +all: mkfs.minfs + +mkfs.minfs: mkfs.minfs.o + +mkfs.minfs.o: mkfs.minfs.c ../kernel/minfs.h + +clean: + -rm -f *~ *.o mkfs.minfs diff --git a/tools/labs/templates/filesystems/minfs/user/mkfs.minfs.c b/tools/labs/templates/filesystems/minfs/user/mkfs.minfs.c new file mode 100644 index 00000000000000..c5e8f9132bb2c0 --- /dev/null +++ b/tools/labs/templates/filesystems/minfs/user/mkfs.minfs.c @@ -0,0 +1,81 @@ +#include +#include +#include + +#include +#include + +#include "../kernel/minfs.h" + +/* + * mk_minfs file + */ + +int main(int argc, char **argv) +{ + FILE *file; + char buffer[MINFS_BLOCK_SIZE]; + struct minfs_super_block msb; + struct minfs_inode root_inode; + struct minfs_inode file_inode; + struct minfs_dir_entry file_dentry; + int i; + + if (argc != 2) { + fprintf(stderr, "Usage: %s block_device_name\n", argv[0]); + exit(EXIT_FAILURE); + } + + file = fopen(argv[1], "w+"); + if (file == NULL) { + perror("fopen"); + exit(EXIT_FAILURE); + } + + memset(&msb, 0, sizeof(struct minfs_super_block)); + + msb.magic = MINFS_MAGIC; + msb.version = 1; + msb.imap = 0x03; + + /* zero disk */ + memset(buffer, 0, MINFS_BLOCK_SIZE); + for (i = 0; i < 128; i++) + fwrite(buffer, 1, MINFS_BLOCK_SIZE, file); + + fseek(file, 0, SEEK_SET); + + /* initialize super block */ + fwrite(&msb, sizeof(msb), 1, file); + + /* initialize root inode */ + memset(&root_inode, 0, sizeof(root_inode)); + root_inode.uid = 0; + root_inode.gid = 0; + root_inode.mode = S_IFDIR | 0755; + root_inode.size = 0; + root_inode.data_block = MINFS_FIRST_DATA_BLOCK; + + fseek(file, MINFS_INODE_BLOCK * MINFS_BLOCK_SIZE, SEEK_SET); + fwrite(&root_inode, sizeof(root_inode), 1, file); + + /* initialize new inode */ + memset(&file_inode, 0, sizeof(file_inode)); + file_inode.uid = 0; + file_inode.gid = 0; + file_inode.mode = S_IFREG | 0644; + file_inode.size = 0; + file_inode.data_block = MINFS_FIRST_DATA_BLOCK + 1; + fwrite(&file_inode, sizeof(file_inode), 1, file); + + /* add dentry information */ + memset(&file_dentry, 0, sizeof(file_dentry)); + file_dentry.ino = 1; + memcpy(file_dentry.name, "a.txt", 5); + fseek(file, MINFS_FIRST_DATA_BLOCK * MINFS_BLOCK_SIZE, SEEK_SET); + fwrite(&file_dentry, sizeof(file_dentry), 1, file); + + fclose(file); + + return 0; +} diff --git a/tools/labs/templates/filesystems/minfs/user/test-minfs-0.sh b/tools/labs/templates/filesystems/minfs/user/test-minfs-0.sh new file mode 100755 index 00000000000000..7da9597bc9c2f4 --- /dev/null +++ b/tools/labs/templates/filesystems/minfs/user/test-minfs-0.sh @@ -0,0 +1,25 @@ +#!/bin/sh + +# load module +insmod ../kernel/minfs.ko + +# create mount point +mkdir -p /mnt/minfs + +# format partition +./mkfs.minfs /dev/vdb + +# mount filesystem +mount -t minfs /dev/vdb /mnt/minfs + +# show registered filesystems +cat /proc/filesystems + +# show mounted filesystems +cat /proc/mounts + +# umount filesystem +umount /mnt/minfs + +# unload module +rmmod minfs diff --git a/tools/labs/templates/filesystems/minfs/user/test-minfs-1.sh b/tools/labs/templates/filesystems/minfs/user/test-minfs-1.sh new file mode 100755 index 00000000000000..0a824ae9251c27 --- /dev/null +++ b/tools/labs/templates/filesystems/minfs/user/test-minfs-1.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +# load module +insmod ../kernel/minfs.ko + +# create mount point +mkdir -p /mnt/minfs + +# format partition +./mkfs.minfs /dev/vdb + +# mount filesystem +mount -t minfs /dev/vdb /mnt/minfs + +# list all filesystem files +cd /mnt/minfs +ls -la + +# unmount filesystem +cd .. +umount /mnt/minfs + +# unload module +rmmod minfs diff --git a/tools/labs/templates/filesystems/minfs/user/test-minfs-2.sh b/tools/labs/templates/filesystems/minfs/user/test-minfs-2.sh new file mode 100755 index 00000000000000..11970dfa0234cc --- /dev/null +++ b/tools/labs/templates/filesystems/minfs/user/test-minfs-2.sh @@ -0,0 +1,39 @@ +#!/bin/sh + +pushd . > /dev/null 2>&1 + +# load module +insmod ../kernel/minfs.ko + +# create mount point +mkdir -p /mnt/minfs + +# format partition +./mkfs.minfs /dev/vdb + +# mount filesystem +mount -t minfs /dev/vdb /mnt/minfs + +# change to minfs root folder +cd /mnt/minfs + +# create new file +touch b.txt && echo "OK. File created." || echo "NOT OK. File creation failed." + +# unmount filesystem +cd .. +umount /mnt/minfs + +popd > /dev/null 2>&1 + +# mount filesystem +mount -t minfs /dev/vdb /mnt/minfs + +# check whether b.txt is still there +ls /mnt/minfs | grep b.txt && echo "OK. File b.txt exists " || echo "NOT OK. File b.txt does not exist." + +# unmount filesystem +umount /mnt/minfs + +# unload module +rmmod minfs diff --git a/tools/labs/templates/filesystems/minfs/user/test-minfs.sh b/tools/labs/templates/filesystems/minfs/user/test-minfs.sh new file mode 100755 index 00000000000000..ea704e07600c2e --- /dev/null +++ b/tools/labs/templates/filesystems/minfs/user/test-minfs.sh @@ -0,0 +1,35 @@ +#!/bin/sh + +set -ex + +#load module +insmod ../kernel/minfs.ko + +#create mount point +mkdir -p /mnt/minfs + +#format partition +./mkfs.minfs /dev/vdb + +#mount filesystem +mount -t minfs /dev/vdb /mnt/minfs + +#show registered filesystems +cat /proc/filesystems | grep minfs + +#show mounted filesystems +cat /proc/mounts | grep minfs + +#show filesystem statistics +stat -f /mnt/minfs + +#list all filesystem files +cd /mnt/minfs +ls -la + +#unmount filesystem +cd .. +umount /mnt/minfs + +#unload module +rmmod minfs diff --git a/tools/labs/templates/filesystems/myfs/Kbuild b/tools/labs/templates/filesystems/myfs/Kbuild new file mode 100644 index 00000000000000..1b3fa9316852ee --- /dev/null +++ b/tools/labs/templates/filesystems/myfs/Kbuild @@ -0,0 +1,3 @@ +EXTRA_CFLAGS = -Wall -g -Wno-unused + +obj-m = myfs.o diff --git a/tools/labs/templates/filesystems/myfs/myfs.c b/tools/labs/templates/filesystems/myfs/myfs.c new file mode 100644 index 00000000000000..9046b4eaf4eff3 --- /dev/null +++ b/tools/labs/templates/filesystems/myfs/myfs.c @@ -0,0 +1,229 @@ +/* + * SO2 Lab - Filesystem drivers + * Exercise #1 (no-dev filesystem) + */ + +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Simple no-dev filesystem"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +#define MYFS_BLOCKSIZE 4096 +#define MYFS_BLOCKSIZE_BITS 12 +#define MYFS_MAGIC 0xbeefcafe +#define LOG_LEVEL KERN_ALERT + +/* declarations of functions that are part of operation structures */ + +static int myfs_mknod(struct inode *dir, + struct dentry *dentry, umode_t mode, dev_t dev); +static int myfs_create(struct inode *dir, struct dentry *dentry, + umode_t mode, bool excl); +static int myfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); + +/* TODO 2/4: define super_operations structure */ +static const struct super_operations myfs_ops = { + .statfs = simple_statfs, + .drop_inode = generic_drop_inode, +}; + +static const struct inode_operations myfs_dir_inode_operations = { + /* TODO 5/8: Fill dir inode operations structure. */ + .create = myfs_create, + .lookup = simple_lookup, + .link = simple_link, + .unlink = simple_unlink, + .mkdir = myfs_mkdir, + .rmdir = simple_rmdir, + .mknod = myfs_mknod, + .rename = simple_rename, +}; + +static const struct file_operations myfs_file_operations = { + /* TODO 6/4: Fill file operations structure. */ + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, + .mmap = generic_file_mmap, + .llseek = generic_file_llseek, +}; + +static const struct inode_operations myfs_file_inode_operations = { + /* TODO 6/1: Fill file inode operations structure. */ + .getattr = simple_getattr, +}; + +static const struct address_space_operations myfs_aops = { + /* TODO 6/3: Fill address space operations structure. */ + .readpage = simple_readpage, + .write_begin = simple_write_begin, + .write_end = simple_write_end, +}; + +struct inode *myfs_get_inode(struct super_block *sb, const struct inode *dir, + int mode) +{ + struct inode *inode = new_inode(sb); + + if (!inode) + return NULL; + + /* TODO 3/3: fill inode structure + * - mode + * - uid + * - gid + * - atime,ctime,mtime + * - ino + */ + inode_init_owner(inode, dir, mode); + inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_ino = 1; + + /* TODO 5/1: Init i_ino using get_next_ino */ + inode->i_ino = get_next_ino(); + + /* TODO 6/1: Initialize address space operations. */ + inode->i_mapping->a_ops = &myfs_aops; + + if (S_ISDIR(mode)) { + /* TODO 3/2: set inode operations for dir inodes. */ + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + + /* TODO 5/1: use myfs_dir_inode_operations for inode + * operations (i_op). + */ + inode->i_op = &myfs_dir_inode_operations; + + /* TODO 3/1: directory inodes start off with i_nlink == 2 (for "." entry). + * Directory link count should be incremented (use inc_nlink). + */ + inc_nlink(inode); + } + + /* TODO 6/4: Set file inode and file operations for regular files + * (use the S_ISREG macro). + */ + if (S_ISREG(mode)) { + inode->i_op = &myfs_file_inode_operations; + inode->i_fop = &myfs_file_operations; + } + + return inode; +} + +/* TODO 5/33: Implement myfs_mknod, myfs_create, myfs_mkdir. */ +static int myfs_mknod(struct inode *dir, + struct dentry *dentry, umode_t mode, dev_t dev) +{ + struct inode *inode = myfs_get_inode(dir->i_sb, dir, mode); + + if (inode == NULL) + return -ENOSPC; + + d_instantiate(dentry, inode); + dget(dentry); + dir->i_mtime = dir->i_ctime = current_time(inode); + + return 0; +} + +static int myfs_create(struct inode *dir, struct dentry *dentry, + umode_t mode, bool excl) +{ + return myfs_mknod(dir, dentry, mode | S_IFREG, 0); +} + +static int myfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + int ret; + + ret = myfs_mknod(dir, dentry, mode | S_IFDIR, 0); + if (ret != 0) + return ret; + + inc_nlink(dir); + + return 0; +} + +static int myfs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct inode *root_inode; + struct dentry *root_dentry; + + /* TODO 2/5: fill super_block + * - blocksize, blocksize_bits + * - magic + * - super operations + * - maxbytes + */ + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_blocksize = MYFS_BLOCKSIZE; + sb->s_blocksize_bits = MYFS_BLOCKSIZE_BITS; + sb->s_magic = MYFS_MAGIC; + sb->s_op = &myfs_ops; + + /* mode = directory & access rights (755) */ + root_inode = myfs_get_inode(sb, NULL, + S_IFDIR | S_IRWXU | S_IRGRP | + S_IXGRP | S_IROTH | S_IXOTH); + + printk(LOG_LEVEL "root inode has %d link(s)\n", root_inode->i_nlink); + + if (!root_inode) + return -ENOMEM; + + root_dentry = d_make_root(root_inode); + if (!root_dentry) + goto out_no_root; + sb->s_root = root_dentry; + + return 0; + +out_no_root: + iput(root_inode); + return -ENOMEM; +} + +static struct dentry *myfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + /* TODO 1/1: call superblock mount function */ + return mount_nodev(fs_type, flags, data, myfs_fill_super); +} + +/* TODO 1/6: define file_system_type structure */ +static struct file_system_type myfs_fs_type = { + .owner = THIS_MODULE, + .name = "myfs", + .mount = myfs_mount, + .kill_sb = kill_litter_super, +}; + +static int __init myfs_init(void) +{ + int err; + + /* TODO 1/1: register */ + err = register_filesystem(&myfs_fs_type); + if (err) { + printk(LOG_LEVEL "register_filesystem failed\n"); + return err; + } + + return 0; +} + +static void __exit myfs_exit(void) +{ + /* TODO 1/1: unregister */ + unregister_filesystem(&myfs_fs_type); +} + +module_init(myfs_init); +module_exit(myfs_exit); diff --git a/tools/labs/templates/filesystems/myfs/test-myfs-1.sh b/tools/labs/templates/filesystems/myfs/test-myfs-1.sh new file mode 100755 index 00000000000000..26dffe0eb4bfc2 --- /dev/null +++ b/tools/labs/templates/filesystems/myfs/test-myfs-1.sh @@ -0,0 +1,53 @@ +#!/bin/sh + +set -x + +# load module +insmod myfs.ko + +# mount filesystem +mkdir -p /mnt/myfs +mount -t myfs none /mnt/myfs +ls -laid /mnt/myfs + +cd /mnt/myfs + +# create directory +mkdir mydir +ls -la + +# create subdirectory +cd mydir +mkdir mysubdir +ls -lai + +# rename subdirectory +mv mysubdir myrenamedsubdir +ls -lai + +# delete renamed subdirectory +rmdir myrenamedsubdir +ls -la + +# create file +touch myfile +ls -lai + +# rename file +mv myfile myrenamedfile +ls -lai + +# delete renamed file +rm myrenamedfile + +# delete directory +cd .. +rmdir mydir +ls -la + +# unmount filesystem +cd .. +umount /mnt/myfs + +# unload module +rmmod myfs diff --git a/tools/labs/templates/filesystems/myfs/test-myfs-2.sh b/tools/labs/templates/filesystems/myfs/test-myfs-2.sh new file mode 100755 index 00000000000000..c038dd1c0b2c66 --- /dev/null +++ b/tools/labs/templates/filesystems/myfs/test-myfs-2.sh @@ -0,0 +1,44 @@ +#!/bin/sh + +set -x + +# load module +insmod myfs.ko + +# mount filesystem +mkdir -p /mnt/myfs +mount -t myfs none /mnt/myfs +ls -laid /mnt/myfs + +cd /mnt/myfs + +# create file +touch myfile +ls -lai + +# rename file +mv myfile myrenamedfile +ls -lai + +# create link to file +ln myrenamedfile mylink +ls -lai + +# read/write file +echo "message" > myrenamedfile +cat myrenamedfile + +# remove link to file +rm mylink +ls -la + +# delete file +rm -f myrenamedfile +ls -la + +# unmount filesystem +cd .. +umount /mnt/myfs + +# unload module +rmmod myfs diff --git a/tools/labs/templates/filesystems/myfs/test-myfs.sh b/tools/labs/templates/filesystems/myfs/test-myfs.sh new file mode 100755 index 00000000000000..e3e63bfc1d3376 --- /dev/null +++ b/tools/labs/templates/filesystems/myfs/test-myfs.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +set -x + +#load module +insmod myfs.ko + +#mount filesystem +mkdir -p /mnt/myfs +mount -t myfs none /mnt/myfs + +#show registered filesystems +cat /proc/filesystems | grep myfs + +#show mounted filesystems +cat /proc/mounts | grep myfs + +#show filesystem statistics +stat -f /mnt/myfs + +#list all filesystem files +cd /mnt/myfs +ls -la + +#unmount filesystem +cd .. +umount /mnt/myfs + +#unload module +rmmod myfs diff --git a/tools/labs/templates/generate_skels.py b/tools/labs/templates/generate_skels.py new file mode 100755 index 00000000000000..da70ea33be3b73 --- /dev/null +++ b/tools/labs/templates/generate_skels.py @@ -0,0 +1,68 @@ +#!/usr/bin/python3 -u + +import argparse, fnmatch, glob, os.path, re, sys, shutil + +parser = argparse.ArgumentParser(description='Generate skeletons sources from full sources') +parser.add_argument('paths', metavar='path', nargs='+', help='list of files to process') +parser.add_argument('--output', help='output dir to copy processed files') +parser.add_argument('--todo', type=int, help='don\'t remove TODOs less then this', default=1) +args = parser.parse_args() + +def process_file(p, pattern, end_string=None): + f = open(p, "r") + g = open(os.path.join(args.output, p), "w") + skip_lines = 0 + end_found = True + for l in f.readlines(): + if end_string and end_found == False: + g.write(l) + if end_string in l: + end_found = True + continue + + if skip_lines > 0: + skip_lines -= 1 + m = re.search(pattern, l) + if m : + l = "%s%s%s\n" % (m.group(1), m.group(2), m.group(4)) + g.write(l) + continue + m = re.search(pattern, l) + if m: + todo=1 + if m.group(2): + todo = int(m.group(2)) + if todo >= args.todo: + if m.group(3): + skip_lines = int(m.group(3)) + else: + skip_lines = 1 + + if end_string and end_string not in l: + end_found = False + + l = "%s%s%s\n" % (m.group(1), m.group(2), m.group(4)) + g.write(l) + +for p in args.paths: + print("skel %s" % (p), sep = '') + name=os.path.basename(p) + try: + os.makedirs(os.path.join(args.output, os.path.dirname(p))) + except: + pass + + copy = False + end_string = None + if name == "Kbuild" or name == "Makefile": + pattern="(^#\s*TODO)([0-9]*)\/?([0-9]*)(:.*)" + elif fnmatch.fnmatch(name, '*.c') or fnmatch.fnmatch(name, '*.h'): + pattern="(.*/\*\s*TODO)([ 0-9]*)/?([0-9]*)(:.*)" + end_string = "*/" + else: + copy = True + + if copy: + shutil.copyfile(p, os.path.join(args.output, p)) + else: + process_file(p, pattern, end_string) diff --git a/tools/labs/templates/interrupts/Kbuild b/tools/labs/templates/interrupts/Kbuild new file mode 100644 index 00000000000000..20b33f5d968dee --- /dev/null +++ b/tools/labs/templates/interrupts/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = kbd.o diff --git a/tools/labs/templates/interrupts/kbd.c b/tools/labs/templates/interrupts/kbd.c new file mode 100644 index 00000000000000..e049d510746624 --- /dev/null +++ b/tools/labs/templates/interrupts/kbd.c @@ -0,0 +1,277 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("KBD"); +MODULE_AUTHOR("Kernel Hacker"); +MODULE_LICENSE("GPL"); + +#define MODULE_NAME "kbd" + +#define KBD_MAJOR 42 +#define KBD_MINOR 0 +#define KBD_NR_MINORS 1 + +#define I8042_KBD_IRQ 1 +#define I8042_STATUS_REG 0x64 +#define I8042_DATA_REG 0x60 + +#define BUFFER_SIZE 1024 +#define SCANCODE_RELEASED_MASK 0x80 + +struct kbd { + struct cdev cdev; + /* TODO 3: add spinlock */ + spinlock_t lock; + char buf[BUFFER_SIZE]; + size_t put_idx, get_idx, count; +} devs[1]; + +/* + * Checks if scancode corresponds to key press or release. + */ +static int is_key_press(unsigned int scancode) +{ + return !(scancode & SCANCODE_RELEASED_MASK); +} + +/* + * Return the character of the given scancode. + * Only works for alphanumeric/space/enter; returns '?' for other + * characters. + */ +static int get_ascii(unsigned int scancode) +{ + static char *row1 = "1234567890"; + static char *row2 = "qwertyuiop"; + static char *row3 = "asdfghjkl"; + static char *row4 = "zxcvbnm"; + + scancode &= ~SCANCODE_RELEASED_MASK; + if (scancode >= 0x02 && scancode <= 0x0b) + return *(row1 + scancode - 0x02); + if (scancode >= 0x10 && scancode <= 0x19) + return *(row2 + scancode - 0x10); + if (scancode >= 0x1e && scancode <= 0x26) + return *(row3 + scancode - 0x1e); + if (scancode >= 0x2c && scancode <= 0x32) + return *(row4 + scancode - 0x2c); + if (scancode == 0x39) + return ' '; + if (scancode == 0x1c) + return '\n'; + return '?'; +} + +static void put_char(struct kbd *data, char c) +{ + if (data->count >= BUFFER_SIZE) + return; + + data->buf[data->put_idx] = c; + data->put_idx = (data->put_idx + 1) % BUFFER_SIZE; + data->count++; +} + +static bool get_char(char *c, struct kbd *data) +{ + /* TODO 4/6: get char from buffer; update count and get_idx */ + if (data->count > 0) { + *c = data->buf[data->get_idx]; + data->get_idx = (data->get_idx + 1) % BUFFER_SIZE; + data->count--; + return true; + } + return false; +} + +static void reset_buffer(struct kbd *data) +{ + /* TODO 5/3: reset count, put_idx, get_idx */ + data->count = 0; + data->put_idx = 0; + data->get_idx = 0; +} + +/* + * Return the value of the DATA register. + */ +static inline u8 i8042_read_data(void) +{ + u8 val; + /* TODO 3: Read DATA register (8 bits). */ + val = inb(I8042_DATA_REG); + return val; +} + +/* TODO 2/27: implement interrupt handler */ +irqreturn_t kbd_interrupt_handle(int irq_no, void *dev_id) +{ + + unsigned int scancode = 0; + int pressed, ch; + + /* TODO 3: read the scancode */ + scancode = i8042_read_data(); + /* TODO 3/2: interpret the scancode */ + pressed = is_key_press(scancode); + ch = get_ascii(scancode); + + /* TODO 3/2: display information about the keystrokes */ + pr_info("IRQ %d: scancode=0x%x (%u) pressed=%d ch=%c\n", + irq_no, scancode, scancode, pressed, ch); + + /* TODO 3/7: store ASCII key to buffer */ + if (pressed) { + struct kbd *data = (struct kbd *)dev_id; + + spin_lock(&data->lock); + put_char(data, ch); + spin_unlock(&data->lock); + } + + return IRQ_NONE; +} + +static int kbd_open(struct inode *inode, struct file *file) +{ + struct kbd *data = container_of(inode->i_cdev, struct kbd, cdev); + + file->private_data = data; + pr_info("%s opened\n", MODULE_NAME); + return 0; +} + +static int kbd_release(struct inode *inode, struct file *file) +{ + pr_info("%s closed\n", MODULE_NAME); + return 0; +} + +/* TODO 5/12: add write operation and reset the buffer */ +static ssize_t kbd_write(struct file *file, const char __user *user_buffer, + size_t size, loff_t *offset) +{ + struct kbd *data = (struct kbd *) file->private_data; + unsigned long flags; + + spin_lock_irqsave(&data->lock, flags); + reset_buffer(data); + spin_unlock_irqrestore(&data->lock, flags); + + return size; +} + +static ssize_t kbd_read(struct file *file, char __user *user_buffer, + size_t size, loff_t *offset) +{ + struct kbd *data = (struct kbd *) file->private_data; + size_t read = 0; + /* TODO 4/18: read data from buffer */ + unsigned long flags; + char ch; + bool more = true; + + while (size--) { + spin_lock_irqsave(&data->lock, flags); + more = get_char(&ch, data); + spin_unlock_irqrestore(&data->lock, flags); + + if (!more) + break; + + if (put_user(ch, user_buffer++)) + return -EFAULT; + + read++; + } + + return read; +} + +static const struct file_operations kbd_fops = { + .owner = THIS_MODULE, + .open = kbd_open, + .release = kbd_release, + .read = kbd_read, + /* TODO 5: add write operation */ + .write = kbd_write, +}; + +static int kbd_init(void) +{ + int err; + + err = register_chrdev_region(MKDEV(KBD_MAJOR, KBD_MINOR), + KBD_NR_MINORS, MODULE_NAME); + if (err != 0) { + pr_err("register_region failed: %d\n", err); + goto out; + } + + /* TODO 1/8: request the keyboard I/O ports */ + if (request_region(I8042_DATA_REG+1, 1, MODULE_NAME) == NULL) { + err = -EBUSY; + goto out_unregister; + } + if (request_region(I8042_STATUS_REG+1, 1, MODULE_NAME) == NULL) { + err = -EBUSY; + goto out_unregister; + } + + /* TODO 3: initialize spinlock */ + spin_lock_init(&devs[0].lock); + + /* TODO 2/7: Register IRQ handler for keyboard IRQ (IRQ 1). */ + err = request_irq(I8042_KBD_IRQ, + kbd_interrupt_handle, + IRQF_SHARED, MODULE_NAME, &devs[0]); + if (err != 0) { + pr_err("request_irq failed: %d\n", err); + goto out_release_regions; + } + + cdev_init(&devs[0].cdev, &kbd_fops); + cdev_add(&devs[0].cdev, MKDEV(KBD_MAJOR, KBD_MINOR), 1); + + pr_notice("Driver %s loaded\n", MODULE_NAME); + return 0; + + /*TODO 2/3: release regions in case of error */ +out_release_regions: + release_region(I8042_STATUS_REG+1, 1); + release_region(I8042_DATA_REG+1, 1); + +out_unregister: + unregister_chrdev_region(MKDEV(KBD_MAJOR, KBD_MINOR), + KBD_NR_MINORS); +out: + return err; +} + +static void kbd_exit(void) +{ + cdev_del(&devs[0].cdev); + + /* TODO 2: Free IRQ. */ + free_irq(I8042_KBD_IRQ, &devs[0]); + + /* TODO 1/2: release keyboard I/O ports */ + release_region(I8042_STATUS_REG+1, 1); + release_region(I8042_DATA_REG+1, 1); + + + unregister_chrdev_region(MKDEV(KBD_MAJOR, KBD_MINOR), + KBD_NR_MINORS); + pr_notice("Driver %s unloaded\n", MODULE_NAME); +} + +module_init(kbd_init); +module_exit(kbd_exit); diff --git a/tools/labs/templates/kernel_api/1-mem/Kbuild b/tools/labs/templates/kernel_api/1-mem/Kbuild new file mode 100644 index 00000000000000..85f6de99faec52 --- /dev/null +++ b/tools/labs/templates/kernel_api/1-mem/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = mem.o diff --git a/tools/labs/templates/kernel_api/1-mem/mem.c b/tools/labs/templates/kernel_api/1-mem/mem.c new file mode 100644 index 00000000000000..a18029a88d2216 --- /dev/null +++ b/tools/labs/templates/kernel_api/1-mem/mem.c @@ -0,0 +1,46 @@ +/* + * Kernel API lab + * + * mem.c - Memory allocation in Linux + */ + +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Print memory"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +static char *mem; + +static int mem_init(void) +{ + size_t i; + + mem = kmalloc(4096 * sizeof(*mem), GFP_KERNEL); + if (mem == NULL) + goto err_mem; + + pr_info("chars: "); + for (i = 0; i < 4096; i++) { + if (isalpha(mem[i])) + printk("%c ", mem[i]); + } + pr_info("\n"); + + return 0; + +err_mem: + return -1; +} + +static void mem_exit(void) +{ + kfree(mem); +} + +module_init(mem_init); +module_exit(mem_exit); diff --git a/tools/labs/templates/kernel_api/2-sched-spin/Kbuild b/tools/labs/templates/kernel_api/2-sched-spin/Kbuild new file mode 100644 index 00000000000000..440138296e63a1 --- /dev/null +++ b/tools/labs/templates/kernel_api/2-sched-spin/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = sched-spin.o diff --git a/tools/labs/templates/kernel_api/2-sched-spin/sched-spin.c b/tools/labs/templates/kernel_api/2-sched-spin/sched-spin.c new file mode 100644 index 00000000000000..52fc3d2307c89c --- /dev/null +++ b/tools/labs/templates/kernel_api/2-sched-spin/sched-spin.c @@ -0,0 +1,40 @@ +/* + * Kernel API lab + * + * sched-spin.c: Sleeping in atomic context + */ + +#include +#include +#include +#include + +MODULE_DESCRIPTION("Sleep while atomic"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +static int sched_spin_init(void) +{ + spinlock_t lock; + + spin_lock_init(&lock); + + /* TODO 0/1: Use spin_lock to aquire the lock */ + spin_lock(&lock); + + set_current_state(TASK_INTERRUPTIBLE); + /* Try to sleep for 5 seconds. */ + schedule_timeout(5 * HZ); + + /* TODO 0/1: Use spin_unlock to release the lock */ + spin_unlock(&lock); + + return 0; +} + +static void sched_spin_exit(void) +{ +} + +module_init(sched_spin_init); +module_exit(sched_spin_exit); diff --git a/tools/labs/templates/kernel_api/3-memory/Kbuild b/tools/labs/templates/kernel_api/3-memory/Kbuild new file mode 100644 index 00000000000000..a29f7961d2a9e1 --- /dev/null +++ b/tools/labs/templates/kernel_api/3-memory/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = memory.o diff --git a/tools/labs/templates/kernel_api/3-memory/memory.c b/tools/labs/templates/kernel_api/3-memory/memory.c new file mode 100644 index 00000000000000..1d85b71e2d82e3 --- /dev/null +++ b/tools/labs/templates/kernel_api/3-memory/memory.c @@ -0,0 +1,71 @@ +/* + * SO2 lab3 - task 3 + */ + +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Memory processing"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +struct task_info { + pid_t pid; + unsigned long timestamp; +}; + +static struct task_info *ti1, *ti2, *ti3, *ti4; + +static struct task_info *task_info_alloc(int pid) +{ + struct task_info *ti; + + /* TODO 1/5: allocated and initialize a task_info struct */ + ti = kmalloc(sizeof(*ti), GFP_KERNEL); + if (ti == NULL) + return NULL; + ti->pid = pid; + ti->timestamp = jiffies; + + return ti; +} + +static int memory_init(void) +{ + /* TODO 2/1: call task_info_alloc for current pid */ + ti1 = task_info_alloc(current->pid); + + /* TODO 2/1: call task_info_alloc for parent PID */ + ti2 = task_info_alloc(current->parent->pid); + + /* TODO 2/1: call task_info alloc for next process PID */ + ti3 = task_info_alloc(next_task(current)->pid); + + /* TODO 2/1: call text_info_alloc for next process of the nex process */ + ti4 = task_info_alloc(next_task(next_task(current))->pid); + + return 0; +} + +static void memory_exit(void) +{ + + /* TODO 3/4: print ti* field values */ + printk("pid: %d, timestamp: %lu\n", ti1->pid, ti1->timestamp); + printk("pid: %d, timestamp: %lu\n", ti2->pid, ti2->timestamp); + printk("pid: %d, timestamp: %lu\n", ti3->pid, ti3->timestamp); + printk("pid: %d, timestamp: %lu\n", ti4->pid, ti4->timestamp); + + /* TODO 4/4: free ti* structures */ + kfree(ti1); + kfree(ti2); + kfree(ti3); + kfree(ti4); +} + +module_init(memory_init); +module_exit(memory_exit); diff --git a/tools/labs/templates/kernel_api/4-list/Kbuild b/tools/labs/templates/kernel_api/4-list/Kbuild new file mode 100644 index 00000000000000..7187139dbdb7af --- /dev/null +++ b/tools/labs/templates/kernel_api/4-list/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = list.o diff --git a/tools/labs/templates/kernel_api/4-list/list.c b/tools/labs/templates/kernel_api/4-list/list.c new file mode 100644 index 00000000000000..4745b75c59452a --- /dev/null +++ b/tools/labs/templates/kernel_api/4-list/list.c @@ -0,0 +1,100 @@ +/* + * Kernel API lab + * + * list.c: Working with lists + * + */ + +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Use list to process task info"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +struct task_info { + pid_t pid; + unsigned long timestamp; + struct list_head list; +}; + +static struct list_head head; + +static struct task_info *task_info_alloc(int pid) +{ + struct task_info *ti; + + ti = kmalloc(sizeof(*ti), GFP_KERNEL); + if (ti == NULL) + return NULL; + ti->pid = pid; + ti->timestamp = jiffies; + + return ti; +} + +static void task_info_add_to_list(int pid) +{ + struct task_info *ti; + + /* TODO 1/2: Allocate task_info and add it to list */ + ti = task_info_alloc(pid); + list_add(&ti->list, &head); +} + +static void task_info_add_for_current(void) +{ + /* Add current, parent, next and next of next to the list */ + task_info_add_to_list(current->pid); + task_info_add_to_list(current->parent->pid); + task_info_add_to_list(next_task(current)->pid); + task_info_add_to_list(next_task(next_task(current))->pid); +} + +static void task_info_print_list(const char *msg) +{ + struct list_head *p; + struct task_info *ti; + + pr_info("%s: [ ", msg); + list_for_each(p, &head) { + ti = list_entry(p, struct task_info, list); + pr_info("(%d, %lu) ", ti->pid, ti->timestamp); + } + pr_info("]\n"); +} + +static void task_info_purge_list(void) +{ + struct list_head *p, *q; + struct task_info *ti; + + /* TODO 2/5: Iterate over the list and delete all elements */ + list_for_each_safe(p, q, &head) { + ti = list_entry(p, struct task_info, list); + list_del(p); + kfree(ti); + } +} + +static int list_init(void) +{ + INIT_LIST_HEAD(&head); + + task_info_add_for_current(); + + return 0; +} + +static void list_exit(void) +{ + task_info_print_list("before exiting"); + task_info_purge_list(); +} + +module_init(list_init); +module_exit(list_exit); diff --git a/tools/labs/templates/kernel_api/5-list-full/Kbuild b/tools/labs/templates/kernel_api/5-list-full/Kbuild new file mode 100644 index 00000000000000..45358ad9ca1503 --- /dev/null +++ b/tools/labs/templates/kernel_api/5-list-full/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = list-full.o diff --git a/tools/labs/templates/kernel_api/5-list-full/list-full.c b/tools/labs/templates/kernel_api/5-list-full/list-full.c new file mode 100644 index 00000000000000..1184fcb1e91478 --- /dev/null +++ b/tools/labs/templates/kernel_api/5-list-full/list-full.c @@ -0,0 +1,145 @@ +/* + * Kernel API lab + * + * list-full.c: Working with lists (advanced) + */ + +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Full list processing"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +struct task_info { + pid_t pid; + unsigned long timestamp; + atomic_t count; + struct list_head list; +}; + +static struct list_head head; + +static struct task_info *task_info_alloc(int pid) +{ + struct task_info *ti; + + ti = kmalloc(sizeof(*ti), GFP_KERNEL); + if (ti == NULL) + return NULL; + ti->pid = pid; + ti->timestamp = jiffies; + atomic_set(&ti->count, 0); + + return ti; +} + +static struct task_info *task_info_find_pid(int pid) +{ + struct list_head *p; + struct task_info *ti; + + /* TODO 1/5: Look for pid and return task_info or NULL if not found */ + list_for_each(p, &head) { + ti = list_entry(p, struct task_info, list); + if (ti->pid == pid) + return ti; + } + + return NULL; +} + +static void task_info_add_to_list(int pid) +{ + struct task_info *ti; + + ti = task_info_find_pid(pid); + if (ti != NULL) { + ti->timestamp = jiffies; + atomic_inc(&ti->count); + return; + } + + ti = task_info_alloc(pid); + list_add(&ti->list, &head); +} + +static void task_info_add_for_current(void) +{ + task_info_add_to_list(current->pid); + task_info_add_to_list(current->parent->pid); + task_info_add_to_list(next_task(current)->pid); + task_info_add_to_list(next_task(next_task(current))->pid); +} + +static void task_info_print_list(const char *msg) +{ + struct list_head *p; + struct task_info *ti; + + pr_info("%s: [ ", msg); + list_for_each(p, &head) { + ti = list_entry(p, struct task_info, list); + pr_info("(%d, %lu) ", ti->pid, ti->timestamp); + } + pr_info("]\n"); +} + +static void task_info_remove_expired(void) +{ + struct list_head *p, *q; + struct task_info *ti; + + list_for_each_safe(p, q, &head) { + ti = list_entry(p, struct task_info, list); + if (jiffies - ti->timestamp > 3 * HZ && atomic_read(&ti->count) < 5) { + list_del(p); + kfree(ti); + } + } +} + +static void task_info_purge_list(void) +{ + struct list_head *p, *q; + struct task_info *ti; + + list_for_each_safe(p, q, &head) { + ti = list_entry(p, struct task_info, list); + list_del(p); + kfree(ti); + } +} + +static int list_full_init(void) +{ + INIT_LIST_HEAD(&head); + + task_info_add_for_current(); + task_info_print_list("after first add"); + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(5 * HZ); + + return 0; +} + +static void list_full_exit(void) +{ + struct task_info *ti; + + /* TODO 2/2: Ensure that at least one task is not deleted */ + ti = list_entry(head.prev, struct task_info, list); + atomic_set(&ti->count, 10); + + task_info_remove_expired(); + task_info_print_list("after removing expired"); + task_info_purge_list(); +} + +module_init(list_full_init); +module_exit(list_full_exit); diff --git a/tools/labs/templates/kernel_api/6-list-sync/Kbuild b/tools/labs/templates/kernel_api/6-list-sync/Kbuild new file mode 100644 index 00000000000000..8105af70665ff9 --- /dev/null +++ b/tools/labs/templates/kernel_api/6-list-sync/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = list-sync.o diff --git a/tools/labs/templates/kernel_api/6-list-sync/list-sync.c b/tools/labs/templates/kernel_api/6-list-sync/list-sync.c new file mode 100644 index 00000000000000..ae837ed4795228 --- /dev/null +++ b/tools/labs/templates/kernel_api/6-list-sync/list-sync.c @@ -0,0 +1,176 @@ +/* + * Linux API lab + * + * list-sync.c - Synchronize access to a list + */ + +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Full list processing with synchronization"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +struct task_info { + pid_t pid; + unsigned long timestamp; + atomic_t count; + struct list_head list; +}; + +static struct list_head head; + +/* TODO 1: you can use either a spinlock or rwlock, define it here */ +DEFINE_RWLOCK(lock); + +static struct task_info *task_info_alloc(int pid) +{ + struct task_info *ti; + + ti = kmalloc(sizeof(*ti), GFP_KERNEL); + if (ti == NULL) + return NULL; + ti->pid = pid; + ti->timestamp = jiffies; + atomic_set(&ti->count, 0); + + return ti; +} + +static struct task_info *task_info_find_pid(int pid) +{ + struct list_head *p; + struct task_info *ti; + + list_for_each(p, &head) { + ti = list_entry(p, struct task_info, list); + if (ti->pid == pid) { + return ti; + } + } + + return NULL; +} + +static void task_info_add_to_list(int pid) +{ + struct task_info *ti; + + /* TODO 1: Protect list, is this read or write access? */ + write_lock(&lock); + ti = task_info_find_pid(pid); + if (ti != NULL) { + ti->timestamp = jiffies; + atomic_inc(&ti->count); + /* TODO: Guess why this comment was added here */ + write_unlock(&lock); + return; + } + /* TODO 1: critical section ends here */ + write_unlock(&lock); + + ti = task_info_alloc(pid); + /* TODO 1: protect list access, is this read or write access? */ + write_lock(&lock); + list_add(&ti->list, &head); + /* TODO 1: critical section ends here */ + write_unlock(&lock); +} + +void task_info_add_for_current(void) +{ + task_info_add_to_list(current->pid); + task_info_add_to_list(current->parent->pid); + task_info_add_to_list(next_task(current)->pid); + task_info_add_to_list(next_task(next_task(current))->pid); +} +/* TODO 2: Export the kernel symbol */ +EXPORT_SYMBOL(task_info_add_for_current); + +void task_info_print_list(const char *msg) +{ + struct list_head *p; + struct task_info *ti; + + pr_info("%s: [ ", msg); + + /* TODO 1: Protect list, is this read or write access? */ + read_lock(&lock); + list_for_each(p, &head) { + ti = list_entry(p, struct task_info, list); + pr_info("(%d, %lu) ", ti->pid, ti->timestamp); + } + /* TODO 1: Critical section ends here */ + read_unlock(&lock); + pr_info("]\n"); +} +/* TODO 2: Export the kernel symbol */ +EXPORT_SYMBOL(task_info_print_list); + +void task_info_remove_expired(void) +{ + struct list_head *p, *q; + struct task_info *ti; + + /* TODO 1: Protect list, is this read or write access? */ + write_lock(&lock); + list_for_each_safe(p, q, &head) { + ti = list_entry(p, struct task_info, list); + if (jiffies - ti->timestamp > 3 * HZ && atomic_read(&ti->count) < 5) { + list_del(p); + kfree(ti); + } + } + /* TODO 1: Critical section ends here */ + write_unlock(&lock); +} +/* TODO 2: Export the kernel symbol */ +EXPORT_SYMBOL(task_info_remove_expired); + +static void task_info_purge_list(void) +{ + struct list_head *p, *q; + struct task_info *ti; + + /* TODO 1: Protect list, is this read or write access? */ + write_lock(&lock); + list_for_each_safe(p, q, &head) { + ti = list_entry(p, struct task_info, list); + list_del(p); + kfree(ti); + } + /* TODO 1: Critical sections ends here */ + write_unlock(&lock); +} + +static int list_sync_init(void) +{ + INIT_LIST_HEAD(&head); + + task_info_add_for_current(); + task_info_print_list("after first add"); + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(5 * HZ); + + return 0; +} + +static void list_sync_exit(void) +{ + struct task_info *ti; + + ti = list_entry(head.prev, struct task_info, list); + atomic_set(&ti->count, 10); + + task_info_remove_expired(); + task_info_print_list("after removing expired"); + task_info_purge_list(); +} + +module_init(list_sync_init); +module_exit(list_sync_exit); diff --git a/tools/labs/templates/kernel_api/7-list-test/Kbuild b/tools/labs/templates/kernel_api/7-list-test/Kbuild new file mode 100644 index 00000000000000..324750ee764344 --- /dev/null +++ b/tools/labs/templates/kernel_api/7-list-test/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = list-test.o diff --git a/tools/labs/templates/kernel_api/7-list-test/list-test.c b/tools/labs/templates/kernel_api/7-list-test/list-test.c new file mode 100644 index 00000000000000..e21c5592fd2b70 --- /dev/null +++ b/tools/labs/templates/kernel_api/7-list-test/list-test.c @@ -0,0 +1,32 @@ +/* + * SO2 lab3 - task 7 + */ + +#include +#include +#include + +MODULE_DESCRIPTION("Test list processing"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +extern void task_info_add_for_current(void); +extern void task_info_remove_expired(void); +extern void task_info_print_list(const char *msg); + +static int list_test_init(void) +{ + task_info_add_for_current(); + task_info_print_list("after new addition"); + + return 0; +} + +static void list_test_exit(void) +{ + task_info_remove_expired(); + task_info_print_list("after removing expired"); +} + +module_init(list_test_init); +module_exit(list_test_exit); diff --git a/tools/labs/templates/kernel_modules/1-2-test-mod/Kbuild b/tools/labs/templates/kernel_modules/1-2-test-mod/Kbuild new file mode 100644 index 00000000000000..3674ef2591b3d9 --- /dev/null +++ b/tools/labs/templates/kernel_modules/1-2-test-mod/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable -DDEBUG + +obj-m = hello_mod.o diff --git a/tools/labs/templates/kernel_modules/1-2-test-mod/hello_mod.c b/tools/labs/templates/kernel_modules/1-2-test-mod/hello_mod.c new file mode 100644 index 00000000000000..0ed6520054f72a --- /dev/null +++ b/tools/labs/templates/kernel_modules/1-2-test-mod/hello_mod.c @@ -0,0 +1,21 @@ +#include +#include +#include + +MODULE_DESCRIPTION("Simple module"); +MODULE_AUTHOR("Kernel Hacker"); +MODULE_LICENSE("GPL"); + +static int my_hello_init(void) +{ + pr_debug("Hello!\n"); + return 0; +} + +static void hello_exit(void) +{ + pr_debug("Goodbye!\n"); +} + +module_init(my_hello_init); +module_exit(hello_exit); diff --git a/tools/labs/templates/kernel_modules/3-error-mod/Kbuild b/tools/labs/templates/kernel_modules/3-error-mod/Kbuild new file mode 100644 index 00000000000000..7bf41fb63f9589 --- /dev/null +++ b/tools/labs/templates/kernel_modules/3-error-mod/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = err_mod.o diff --git a/tools/labs/templates/kernel_modules/3-error-mod/err_mod.c b/tools/labs/templates/kernel_modules/3-error-mod/err_mod.c new file mode 100644 index 00000000000000..057036642574af --- /dev/null +++ b/tools/labs/templates/kernel_modules/3-error-mod/err_mod.c @@ -0,0 +1,26 @@ +#include +#include +/* TODO: add missing kernel headers */ +#include + +MODULE_DESCRIPTION("Error module"); +MODULE_AUTHOR("Kernel Hacker"); +MODULE_LICENSE("GPL"); + +static int n1, n2; + +static int err_init(void) +{ + n1 = 1; n2 = 2; + pr_info("n1 is %d, n2 is %d\n", n1, n2); + + return 0; +} + +static void err_exit(void) +{ + pr_info("sum is %d\n", n1 + n2); +} + +module_init(err_init); +module_exit(err_exit); diff --git a/tools/labs/templates/kernel_modules/4-multi-mod/Kbuild b/tools/labs/templates/kernel_modules/4-multi-mod/Kbuild new file mode 100644 index 00000000000000..1d211ca44c7035 --- /dev/null +++ b/tools/labs/templates/kernel_modules/4-multi-mod/Kbuild @@ -0,0 +1,5 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +# TODO/2: add rules to create a multi object module +obj-m = multi-mod.o +multi-mod-y = mod1.o mod2.o diff --git a/tools/labs/templates/kernel_modules/4-multi-mod/mod1.c b/tools/labs/templates/kernel_modules/4-multi-mod/mod1.c new file mode 100644 index 00000000000000..08511866c460a1 --- /dev/null +++ b/tools/labs/templates/kernel_modules/4-multi-mod/mod1.c @@ -0,0 +1,27 @@ +#include +#include +#include + +MODULE_DESCRIPTION("Multi-file module"); +MODULE_AUTHOR("Kernel Hacker"); +MODULE_LICENSE("GPL"); + +extern int add(int a, int b); + +static int n1, n2; + +static int my_hello_init(void) +{ + n1 = 1; n2 = 2; + pr_info("n1 is %d, n2 is %d\n", n1, n2); + + return 0; +} + +static void hello_exit(void) +{ + pr_info("sum is %d\n", add(n1, n2)); +} + +module_init(my_hello_init); +module_exit(hello_exit); diff --git a/tools/labs/templates/kernel_modules/4-multi-mod/mod2.c b/tools/labs/templates/kernel_modules/4-multi-mod/mod2.c new file mode 100644 index 00000000000000..7c923bb3ac5488 --- /dev/null +++ b/tools/labs/templates/kernel_modules/4-multi-mod/mod2.c @@ -0,0 +1,4 @@ +int add(int a, int b) +{ + return a + b; +} diff --git a/tools/labs/templates/kernel_modules/5-oops-mod/Kbuild b/tools/labs/templates/kernel_modules/5-oops-mod/Kbuild new file mode 100644 index 00000000000000..09e3be5e8ab20c --- /dev/null +++ b/tools/labs/templates/kernel_modules/5-oops-mod/Kbuild @@ -0,0 +1,4 @@ +# TODO: add flags to generate debug information +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = oops_mod.o diff --git a/tools/labs/templates/kernel_modules/5-oops-mod/oops_mod.c b/tools/labs/templates/kernel_modules/5-oops-mod/oops_mod.c new file mode 100644 index 00000000000000..9fd1448572a94a --- /dev/null +++ b/tools/labs/templates/kernel_modules/5-oops-mod/oops_mod.c @@ -0,0 +1,27 @@ +#include +#include +#include +#include + +MODULE_DESCRIPTION("Oops generating module"); +MODULE_AUTHOR("So2rul Esforever"); +MODULE_LICENSE("GPL"); + +static int my_oops_init(void) +{ + char *p = 0; + + pr_info("before init\n"); + *p = 'a'; + pr_info("after init\n"); + + return 0; +} + +static void my_oops_exit(void) +{ + pr_info("module goes all out\n"); +} + +module_init(my_oops_init); +module_exit(my_oops_exit); diff --git a/tools/labs/templates/kernel_modules/6-cmd-mod/Kbuild b/tools/labs/templates/kernel_modules/6-cmd-mod/Kbuild new file mode 100644 index 00000000000000..2c5fe9cfd4de33 --- /dev/null +++ b/tools/labs/templates/kernel_modules/6-cmd-mod/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = cmd_mod.o diff --git a/tools/labs/templates/kernel_modules/6-cmd-mod/cmd_mod.c b/tools/labs/templates/kernel_modules/6-cmd-mod/cmd_mod.c new file mode 100644 index 00000000000000..3bd758897f3600 --- /dev/null +++ b/tools/labs/templates/kernel_modules/6-cmd-mod/cmd_mod.c @@ -0,0 +1,26 @@ +#include +#include +#include + +MODULE_DESCRIPTION("Command-line args module"); +MODULE_AUTHOR("Kernel Hacker"); +MODULE_LICENSE("GPL"); + +static char *str = "the worm"; + +module_param(str, charp, 0000); +MODULE_PARM_DESC(str, "A simple string"); + +static int __init cmd_init(void) +{ + pr_info("Early bird gets %s\n", str); + return 0; +} + +static void __exit cmd_exit(void) +{ + pr_info("Exit, stage left\n"); +} + +module_init(cmd_init); +module_exit(cmd_exit); diff --git a/tools/labs/templates/kernel_modules/7-list-proc/Kbuild b/tools/labs/templates/kernel_modules/7-list-proc/Kbuild new file mode 100644 index 00000000000000..45eb7676b7ec51 --- /dev/null +++ b/tools/labs/templates/kernel_modules/7-list-proc/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = list_proc.o diff --git a/tools/labs/templates/kernel_modules/7-list-proc/list_proc.c b/tools/labs/templates/kernel_modules/7-list-proc/list_proc.c new file mode 100644 index 00000000000000..96a659bd277f03 --- /dev/null +++ b/tools/labs/templates/kernel_modules/7-list-proc/list_proc.c @@ -0,0 +1,35 @@ +#include +#include +#include +/* TODO: add missing headers */ +#include + +MODULE_DESCRIPTION("List current processes"); +MODULE_AUTHOR("Kernel Hacker"); +MODULE_LICENSE("GPL"); + +static int my_proc_init(void) +{ + struct task_struct *p; + + /* TODO/2: print current process pid and its name */ + pr_info("Current process: pid = %d; comm = %s\n", + current->pid, current->comm); + + /* TODO/3: print the pid and name of all processes */ + pr_info("\nProcess list:\n\n"); + for_each_process(p) + pr_info("pid = %d; comm = %s\n", p->pid, p->comm); + + return 0; +} + +static void my_proc_exit(void) +{ + /* TODO/2: print current process pid and name */ + pr_info("Current process: pid = %d; comm = %s\n", + current->pid, current->comm); +} + +module_init(my_proc_init); +module_exit(my_proc_exit); diff --git a/tools/labs/templates/kernel_modules/8-kdb/Kbuild b/tools/labs/templates/kernel_modules/8-kdb/Kbuild new file mode 100644 index 00000000000000..4453b28ab39c4d --- /dev/null +++ b/tools/labs/templates/kernel_modules/8-kdb/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = hello_kdb.o diff --git a/tools/labs/templates/kernel_modules/8-kdb/hello_kdb.c b/tools/labs/templates/kernel_modules/8-kdb/hello_kdb.c new file mode 100644 index 00000000000000..dedbb96fa4a5fb --- /dev/null +++ b/tools/labs/templates/kernel_modules/8-kdb/hello_kdb.c @@ -0,0 +1,144 @@ +#include +#include +#include + +int kdb_write_address; +EXPORT_SYMBOL(kdb_write_address); + +noinline void dummy_func18(void) +{ + panic("Hello KDB has paniced!"); +} +noinline void dummy_func17(void) +{ + dummy_func18(); +} +noinline void dummy_func16(void) +{ + dummy_func17(); +} +noinline void dummy_func15(void) +{ + dummy_func16(); +} +noinline void dummy_func14(void) +{ + dummy_func15(); +} +noinline void dummy_func13(void) +{ + dummy_func14(); +} +noinline void dummy_func12(void) +{ + dummy_func13(); +} +noinline void dummy_func11(void) +{ + dummy_func12(); +} +noinline void dummy_func10(void) +{ + dummy_func11(); +} +noinline void dummy_func9(void) +{ + dummy_func10(); +} +noinline void dummy_func8(void) +{ + dummy_func9(); +} +noinline void dummy_func7(void) +{ + dummy_func8(); +} +noinline void dummy_func6(void) +{ + dummy_func7(); +} +noinline void dummy_func5(void) +{ + dummy_func6(); +} +noinline void dummy_func4(void) +{ + dummy_func5(); +} +noinline void dummy_func3(void) +{ + dummy_func4(); +} +noinline void dummy_func2(void) +{ + dummy_func3(); +} +noinline void dummy_func1(void) +{ + dummy_func2(); +} + +static int hello_proc_show(struct seq_file *m, void *v) { + seq_printf(m, "Hello proc!\n"); + return 0; +} + +static int hello_proc_open(struct inode *inode, struct file *file) { + return single_open(file, hello_proc_show, NULL); +} + +static int edit_write(struct file *file, const char *buffer, + size_t count, loff_t *data) +{ + kdb_write_address += 1; + return count; +} + +static int bug_write(struct file *file, const char *buffer, + size_t count, loff_t *data) +{ + dummy_func1(); + return count; +} + +static const struct file_operations edit_proc_fops = { + .owner = THIS_MODULE, + .open = hello_proc_open, + .read = seq_read, + .write = edit_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations bug_proc_fops = { + .owner = THIS_MODULE, + .open = hello_proc_open, + .read = seq_read, + .write = bug_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init hello_proc_init(void) { + struct proc_dir_entry *file; + file = proc_create("hello_kdb_bug", 0, NULL, &bug_proc_fops); + if (file == NULL) { + return -ENOMEM; + } + + file = proc_create("hello_kdb_break", 0, NULL, &edit_proc_fops); + if (file == NULL) { + remove_proc_entry("hello_kdb_bug", NULL); + return -ENOMEM; + } + return 0; +} + +static void __exit hello_proc_exit(void) { + remove_proc_entry("hello_kdb_bug", NULL); + remove_proc_entry("hello_kdb_break", NULL); +} + +MODULE_LICENSE("GPL"); +module_init(hello_proc_init); +module_exit(hello_proc_exit); diff --git a/tools/labs/templates/kernel_modules/9-dyndbg/Kbuild b/tools/labs/templates/kernel_modules/9-dyndbg/Kbuild new file mode 100644 index 00000000000000..a0d5f3af5202f1 --- /dev/null +++ b/tools/labs/templates/kernel_modules/9-dyndbg/Kbuild @@ -0,0 +1,3 @@ +EXTRA_CFLAGS = -Wall -g + +obj-m = dyndbg.o diff --git a/tools/labs/templates/kernel_modules/9-dyndbg/dyndbg.c b/tools/labs/templates/kernel_modules/9-dyndbg/dyndbg.c new file mode 100644 index 00000000000000..e9071e5a723535 --- /dev/null +++ b/tools/labs/templates/kernel_modules/9-dyndbg/dyndbg.c @@ -0,0 +1,32 @@ +#include +#include +#include + +MODULE_DESCRIPTION("Dyndbg kernel module"); +MODULE_AUTHOR("Dyndbg"); +MODULE_LICENSE("GPL"); + +void my_debug_func(void) +{ + pr_debug("Important dyndbg debug message1\n"); + pr_debug("Important dyndbg debug message2\n"); + pr_debug("Verbose dyndbg debug message\n"); +} +EXPORT_SYMBOL(my_debug_func); + + +static int dyndbg_init(void) +{ + printk(KERN_INFO "Hi dyndbg!\n" ); + my_debug_func(); + return 0; +} + +static void dyndbg_exit(void) +{ + printk(KERN_INFO "Bye dyndbg!\n" ); + my_debug_func(); +} + +module_init(dyndbg_init); +module_exit(dyndbg_exit); diff --git a/tools/labs/templates/memory_mapping/kmmap/Kbuild b/tools/labs/templates/memory_mapping/kmmap/Kbuild new file mode 100644 index 00000000000000..3df7ecec2cab89 --- /dev/null +++ b/tools/labs/templates/memory_mapping/kmmap/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m := kmmap.o diff --git a/tools/labs/templates/memory_mapping/kmmap/kmmap.c b/tools/labs/templates/memory_mapping/kmmap/kmmap.c new file mode 100644 index 00000000000000..b3fccfbaac860c --- /dev/null +++ b/tools/labs/templates/memory_mapping/kmmap/kmmap.c @@ -0,0 +1,229 @@ +/* + * PSO - Memory Mapping Lab(#11) + * + * Exercise #1: memory mapping using kmalloc'd kernel areas + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../test/mmap-test.h" + +MODULE_DESCRIPTION("simple mmap driver"); +MODULE_AUTHOR("PSO"); +MODULE_LICENSE("Dual BSD/GPL"); + +#define MY_MAJOR 42 +/* how many pages do we actually kmalloc */ +#define NPAGES 16 + +/* character device basic structure */ +static struct cdev mmap_cdev; + +/* pointer to kmalloc'd area */ +static void *kmalloc_ptr; + +/* pointer to the kmalloc'd area, rounded up to a page boundary */ +static char *kmalloc_area; + +static int my_open(struct inode *inode, struct file *filp) +{ + return 0; +} + +static int my_release(struct inode *inode, struct file *filp) +{ + return 0; +} + +static int my_read(struct file *file, char __user *user_buffer, + size_t size, loff_t *offset) +{ + /* TODO 2/2: check size doesn't exceed our mapped area size */ + if (size > NPAGES * PAGE_SIZE) + size = NPAGES * PAGE_SIZE; + + /* TODO 2/2: copy from mapped area to user buffer */ + if (copy_to_user(user_buffer, kmalloc_area, size)) + return -EFAULT; + + return size; +} + +static int my_write(struct file *file, const char __user *user_buffer, + size_t size, loff_t *offset) +{ + /* TODO 2/2: check size doesn't exceed our mapped area size */ + if (size > NPAGES * PAGE_SIZE) + size = NPAGES * PAGE_SIZE; + + /* TODO 2/3: copy from user buffer to mapped area */ + memset(kmalloc_area, 0, NPAGES * PAGE_SIZE); + if (copy_from_user(kmalloc_area, user_buffer, size)) + return -EFAULT; + + return size; +} + +static int my_mmap(struct file *filp, struct vm_area_struct *vma) +{ + int ret; + long length = vma->vm_end - vma->vm_start; + + /* do not map more than we can */ + if (length > NPAGES * PAGE_SIZE) + return -EIO; + + /* TODO 1/7: map the whole physically contiguous area in one piece */ + ret = remap_pfn_range(vma, vma->vm_start, + virt_to_phys((void *)kmalloc_area) >> PAGE_SHIFT, + length, vma->vm_page_prot); + if (ret < 0) { + pr_err("could not map address area\n"); + return ret; + } + + return 0; +} + +static const struct file_operations mmap_fops = { + .owner = THIS_MODULE, + .open = my_open, + .release = my_release, + .mmap = my_mmap, + .read = my_read, + .write = my_write +}; + +static int my_seq_show(struct seq_file *seq, void *v) +{ + struct mm_struct *mm; + struct vm_area_struct *vma_iterator; + unsigned long total = 0; + + /* TODO 3: Get current process' mm_struct */ + mm = get_task_mm(current); + + /* TODO 3/8: Iterate through all memory mappings */ + vma_iterator = mm->mmap; + while (vma_iterator != NULL) { + pr_info("%lx %lx\n", vma_iterator->vm_start, vma_iterator->vm_end); + total += vma_iterator->vm_end - vma_iterator->vm_start; + vma_iterator = vma_iterator->vm_next; + } + + /* TODO 3: Release mm_struct */ + mmput(mm); + + /* TODO 3: write the total count to file */ + seq_printf(seq, "%lu %s\n", total, current->comm); + return 0; +} + +static int my_seq_open(struct inode *inode, struct file *file) +{ + return single_open(file, my_seq_show, NULL); +} + +static const struct file_operations my_proc_file_ops = { + .owner = THIS_MODULE, + .open = my_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init my_init(void) +{ + int ret = 0; + int i; + /* TODO 3/7: create a new entry in procfs */ + struct proc_dir_entry *entry; + + entry = proc_create(PROC_ENTRY_NAME, 0, NULL, &my_proc_file_ops); + if (!entry) { + ret = -ENOMEM; + goto out; + } + + ret = register_chrdev_region(MKDEV(MY_MAJOR, 0), 1, "mymap"); + if (ret < 0) { + pr_err("could not register region\n"); + goto out_no_chrdev; + } + + /* TODO 1/6: allocate NPAGES+2 pages using kmalloc */ + kmalloc_ptr = kmalloc((NPAGES + 2) * PAGE_SIZE, GFP_KERNEL); + if (kmalloc_ptr == NULL) { + ret = -ENOMEM; + pr_err("could not allocate memory\n"); + goto out_unreg; + } + + /* TODO 1: round kmalloc_ptr to nearest page start address */ + kmalloc_area = (char *) PAGE_ALIGN(((unsigned long)kmalloc_ptr)); + + /* TODO 1/2: mark pages as reserved */ + for (i = 0; i < NPAGES * PAGE_SIZE; i += PAGE_SIZE) + SetPageReserved(virt_to_page(((unsigned long)kmalloc_area)+i)); + + /* TODO 1/6: write data in each page */ + for (i = 0; i < NPAGES * PAGE_SIZE; i += PAGE_SIZE) { + kmalloc_area[i] = 0xaa; + kmalloc_area[i + 1] = 0xbb; + kmalloc_area[i + 2] = 0xcc; + kmalloc_area[i + 3] = 0xdd; + } + + /* Init device. */ + cdev_init(&mmap_cdev, &mmap_fops); + ret = cdev_add(&mmap_cdev, MKDEV(MY_MAJOR, 0), 1); + if (ret < 0) { + pr_err("could not add device\n"); + goto out_kfree; + } + + return 0; + +out_kfree: + kfree(kmalloc_ptr); +out_unreg: + unregister_chrdev_region(MKDEV(MY_MAJOR, 0), 1); +out_no_chrdev: + remove_proc_entry(PROC_ENTRY_NAME, NULL); +out: + return ret; +} + +static void __exit my_exit(void) +{ + int i; + + cdev_del(&mmap_cdev); + + /* TODO 1/3: clear reservation on pages and free mem. */ + for (i = 0; i < NPAGES * PAGE_SIZE; i += PAGE_SIZE) + ClearPageReserved(virt_to_page(((unsigned long)kmalloc_area)+i)); + kfree(kmalloc_ptr); + + unregister_chrdev_region(MKDEV(MY_MAJOR, 0), 1); + /* TODO 3: remove proc entry */ + remove_proc_entry(PROC_ENTRY_NAME, NULL); +} + +module_init(my_init); +module_exit(my_exit); diff --git a/tools/labs/templates/memory_mapping/test/.gitignore b/tools/labs/templates/memory_mapping/test/.gitignore new file mode 100644 index 00000000000000..2a2fb151cce3b2 --- /dev/null +++ b/tools/labs/templates/memory_mapping/test/.gitignore @@ -0,0 +1 @@ +/mmap-test diff --git a/tools/labs/templates/memory_mapping/test/Makefile b/tools/labs/templates/memory_mapping/test/Makefile new file mode 100644 index 00000000000000..8639d2a7d45f53 --- /dev/null +++ b/tools/labs/templates/memory_mapping/test/Makefile @@ -0,0 +1,9 @@ +CFLAGS=-Wall -m32 +LDFLAGS=-static -m32 + +mmap-test: mmap-test.o + +.PHONY: clean + +clean: + -rm -f *~ *.o mmap-test diff --git a/tools/labs/templates/memory_mapping/test/mmap-test.c b/tools/labs/templates/memory_mapping/test/mmap-test.c new file mode 100644 index 00000000000000..a4aa5669149d1e --- /dev/null +++ b/tools/labs/templates/memory_mapping/test/mmap-test.c @@ -0,0 +1,173 @@ +/* + * PSO - Memory Mapping Lab (#11) + * + * Exercise #1, #2: memory mapping between user-space and kernel-space + * + * test case + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mmap-test.h" + +#define NPAGES 16 +#define MMAP_DEV "/dev/mymmap" +#define PROC_ENTRY_PATH "/proc/" PROC_ENTRY_NAME + +void test_contents(unsigned char *addr, + unsigned char value1, unsigned char value2, + unsigned char value3, unsigned char value4) +{ + int i; + + for (i = 0; i < NPAGES * getpagesize(); i += getpagesize()) { + if (addr[i] != value1 || addr[i + 1] != value2 || + addr[i + 2] != value3 || addr[i + 3] != value4) + printf("0x%x 0x%x 0x%x 0x%x\n", addr[i], addr[i+1], + addr[i+2], addr[i+3]); + else + printf("matched\n"); + } +} + +int test_read_write(int fd, unsigned char *mmap_addr) +{ + unsigned char *local_addr; + int len = NPAGES * getpagesize(), i; + + printf("\nWrite test ...\n"); + /* alloc local memory */ + local_addr = malloc(len); + if (!local_addr) + return -1; + + /* init local memory */ + memset(local_addr, 0, len); + for (i = 0; i < NPAGES * getpagesize(); i += getpagesize()) { + local_addr[i] = 0xa0; + local_addr[i+1] = 0xb0; + local_addr[i+2] = 0xc0; + local_addr[i+3] = 0xd0; + } + + /* write to device */ + write(fd, local_addr, len); + + /* are these values in mapped memory? */ + test_contents(mmap_addr, 0xa0, 0xb0, 0xc0, 0xd0); + + printf("\nRead test ...\n"); + memset(local_addr, 0, len); + /* read from device */ + read(fd, local_addr, len); + /* are the values read correct? */ + test_contents(local_addr, 0xa0, 0xb0, 0xc0, 0xd0); + return 0; +} + +static int show_mem_usage(void) +{ + int fd, ret; + char buf[40]; + unsigned long mem_usage; + + fd = open(PROC_ENTRY_PATH, O_RDONLY); + if (fd < 0) { + perror("open " PROC_ENTRY_PATH); + ret = fd; + goto out; + } + + ret = read(fd, buf, sizeof buf); + if (ret < 0) + goto no_read; + + sscanf(buf, "%lu", &mem_usage); + buf[ret] = 0; + + printf("Memory usage: %lu\n", mem_usage); + + ret = mem_usage; +no_read: + close(fd); +out: + return ret; +} + +int main(int argc, const char **argv) +{ + int fd, test; + unsigned char *addr; + int len = NPAGES * getpagesize(); + int i; + unsigned long usage_before_mmap, usage_after_mmap; + + if (argc > 1) + test = atoi(argv[1]); + + assert(system("mknod " MMAP_DEV " c 42 0") == 0); + + fd = open(MMAP_DEV, O_RDWR | O_SYNC); + if (fd < 0) { + perror("open"); + assert(system("rm " MMAP_DEV) == 0); + exit(EXIT_FAILURE); + } + + addr = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) { + perror("mmap"); + assert(system("rm " MMAP_DEV) == 0); + exit(EXIT_FAILURE); + } + + for (i = 0; i < NPAGES * getpagesize(); i += getpagesize()) { + if (addr[i] != 0xaa || addr[i + 1] != 0xbb || + addr[i + 2] != 0xcc || addr[i + 3] != 0xdd) + printf("0x%x 0x%x 0x%x 0x%x\n", addr[i], addr[i+1], + addr[i+2], addr[i+3]); + else + printf("matched\n"); + } + + + if (test >= 2 && test_read_write(fd, addr)) { + perror("read/write test"); + assert(system("rm " MMAP_DEV) == 0); + exit(EXIT_FAILURE); + } + + if (test >= 3) { + usage_before_mmap = show_mem_usage(); + if (usage_before_mmap < 0) + printf("failed to show memory usage\n"); + + #define SIZE (10 * 1024 * 1024) + addr = mmap(NULL, SIZE, PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) + perror("mmap_"); + + usage_after_mmap = show_mem_usage(); + if (usage_after_mmap < 0) + printf("failed to show memory usage\n"); + printf("mmaped :%lu MB\n", + (usage_after_mmap - usage_before_mmap) >> 20); + + sleep(30); + + munmap(addr, SIZE); + } + + close(fd); + + assert(system("rm " MMAP_DEV) == 0); + + return 0; +} diff --git a/tools/labs/templates/memory_mapping/test/mmap-test.h b/tools/labs/templates/memory_mapping/test/mmap-test.h new file mode 100644 index 00000000000000..8d98f57e319240 --- /dev/null +++ b/tools/labs/templates/memory_mapping/test/mmap-test.h @@ -0,0 +1,6 @@ +#ifndef __SO2MMAP_H__ +#define __SO2MMAP_H__ 1 + +#define PROC_ENTRY_NAME "my-proc-entry" + +#endif diff --git a/tools/labs/templates/memory_mapping/vmmap/Kbuild b/tools/labs/templates/memory_mapping/vmmap/Kbuild new file mode 100644 index 00000000000000..eaf763a4a9d505 --- /dev/null +++ b/tools/labs/templates/memory_mapping/vmmap/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m := vmmap.o diff --git a/tools/labs/templates/memory_mapping/vmmap/vmmap.c b/tools/labs/templates/memory_mapping/vmmap/vmmap.c new file mode 100644 index 00000000000000..68adc662be572e --- /dev/null +++ b/tools/labs/templates/memory_mapping/vmmap/vmmap.c @@ -0,0 +1,226 @@ +/* + * PSO - Memory Mapping Lab(#11) + * + * Exercise #2: memory mapping using vmalloc'd kernel areas + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../test/mmap-test.h" + + +MODULE_DESCRIPTION("simple mmap driver"); +MODULE_AUTHOR("PSO"); +MODULE_LICENSE("Dual BSD/GPL"); + +#define MY_MAJOR 42 + +/* how many pages do we actually vmalloc */ +#define NPAGES 16 + +/* character device basic structure */ +static struct cdev mmap_cdev; + +/* pointer to the vmalloc'd area, rounded up to a page boundary */ +static char *vmalloc_area; + +static int my_open(struct inode *inode, struct file *filp) +{ + return 0; +} + +static int my_release(struct inode *inode, struct file *filp) +{ + return 0; +} + +static ssize_t my_read(struct file *file, char __user *user_buffer, + size_t size, loff_t *offset) +{ + /* TODO 2/2: check size doesn't exceed our mapped area size */ + if (size > NPAGES * PAGE_SIZE) + size = NPAGES * PAGE_SIZE; + + /* TODO 2/2: copy from mapped area to user buffer */ + if (copy_to_user(user_buffer, vmalloc_area, size)) + return -EFAULT; + + return size; +} + +static ssize_t my_write(struct file *file, const char __user *user_buffer, + size_t size, loff_t *offset) +{ + /* TODO 2/2: check size doesn't exceed our mapped area size */ + if (size > NPAGES * PAGE_SIZE) + size = NPAGES * PAGE_SIZE; + + /* TODO 2/3: copy from user buffer to mapped area */ + memset(vmalloc_area, 0, NPAGES * PAGE_SIZE); + if (copy_from_user(vmalloc_area, user_buffer, size)) + return -EFAULT; + + return size; +} + +static int my_mmap(struct file *filp, struct vm_area_struct *vma) +{ + int ret; + long length = vma->vm_end - vma->vm_start; + unsigned long start = vma->vm_start; + char *vmalloc_area_ptr = vmalloc_area; + unsigned long pfn; + + if (length > NPAGES * PAGE_SIZE) + return -EIO; + + /* TODO 1/9: map pages individually */ + while (length > 0) { + pfn = vmalloc_to_pfn(vmalloc_area_ptr); + ret = remap_pfn_range(vma, start, pfn, PAGE_SIZE, PAGE_SHARED); + if (ret < 0) + return ret; + start += PAGE_SIZE; + vmalloc_area_ptr += PAGE_SIZE; + length -= PAGE_SIZE; + } + + return 0; +} + +static const struct file_operations mmap_fops = { + .owner = THIS_MODULE, + .open = my_open, + .release = my_release, + .mmap = my_mmap, + .read = my_read, + .write = my_write +}; + +static int my_seq_show(struct seq_file *seq, void *v) +{ + struct mm_struct *mm; + struct vm_area_struct *vma_iterator; + unsigned long total = 0; + + /* TODO 3: Get current process' mm_struct */ + mm = get_task_mm(current); + + /* TODO 3/8: Iterate through all memory mappings and print ranges */ + vma_iterator = mm->mmap; + while (vma_iterator != NULL) { + pr_info("%lx %lx\n", vma_iterator->vm_start, vma_iterator->vm_end); + total += vma_iterator->vm_end - vma_iterator->vm_start; + vma_iterator = vma_iterator->vm_next; + } + + /* TODO 3: Release mm_struct */ + mmput(mm); + + /* TODO 3: write the total count to file */ + seq_printf(seq, "%lu %s\n", total, current->comm); + return 0; +} + +static int my_seq_open(struct inode *inode, struct file *file) +{ + return single_open(file, my_seq_show, NULL); +} + +static const struct file_operations my_proc_file_ops = { + .owner = THIS_MODULE, + .open = my_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init my_init(void) +{ + int ret = 0; + int i; + /* TODO 3/7: create a new entry in procfs */ + struct proc_dir_entry *entry; + + entry = proc_create(PROC_ENTRY_NAME, 0, NULL, &my_proc_file_ops); + if (!entry) { + ret = -ENOMEM; + goto out; + } + + ret = register_chrdev_region(MKDEV(MY_MAJOR, 0), 1, "mymap"); + if (ret < 0) { + pr_err("could not register region\n"); + goto out_no_chrdev; + } + + /* TODO 1/6: allocate NPAGES using vmalloc */ + vmalloc_area = (char *)vmalloc(NPAGES * PAGE_SIZE); + if (vmalloc_area == NULL) { + ret = -ENOMEM; + pr_err("could not allocate memory\n"); + goto out_unreg; + } + + /* TODO 1/2: mark pages as reserved */ + for (i = 0; i < NPAGES * PAGE_SIZE; i += PAGE_SIZE) + SetPageReserved(vmalloc_to_page(vmalloc_area+i)); + + /* TODO 1/6: write data in each page */ + for (i = 0; i < NPAGES * PAGE_SIZE; i += PAGE_SIZE) { + vmalloc_area[i] = 0xaa; + vmalloc_area[i + 1] = 0xbb; + vmalloc_area[i + 2] = 0xcc; + vmalloc_area[i + 3] = 0xdd; + } + + cdev_init(&mmap_cdev, &mmap_fops); + ret = cdev_add(&mmap_cdev, MKDEV(MY_MAJOR, 0), 1); + if (ret < 0) { + pr_err("could not add device\n"); + goto out_vfree; + } + + return 0; + +out_vfree: + vfree(vmalloc_area); +out_unreg: + unregister_chrdev_region(MKDEV(MY_MAJOR, 0), 1); +out_no_chrdev: + remove_proc_entry(PROC_ENTRY_NAME, NULL); +out: + return ret; +} + +static void __exit my_exit(void) +{ + int i; + + cdev_del(&mmap_cdev); + + /* TODO 1/3: clear reservation on pages and free mem.*/ + for (i = 0; i < NPAGES * PAGE_SIZE; i += PAGE_SIZE) + ClearPageReserved(vmalloc_to_page(vmalloc_area+i)); + vfree(vmalloc_area); + + unregister_chrdev_region(MKDEV(MY_MAJOR, 0), 1); + /* TODO 3: remove proc entry */ + remove_proc_entry(PROC_ENTRY_NAME, NULL); +} + +module_init(my_init); +module_exit(my_exit); diff --git a/tools/labs/templates/networking/1-2-netfilter/kernel/Kbuild b/tools/labs/templates/networking/1-2-netfilter/kernel/Kbuild new file mode 100644 index 00000000000000..8d831f886a624c --- /dev/null +++ b/tools/labs/templates/networking/1-2-netfilter/kernel/Kbuild @@ -0,0 +1,3 @@ +EXTRA_CFLAGS = -g + +obj-m = filter.o diff --git a/tools/labs/templates/networking/1-2-netfilter/kernel/filter.c b/tools/labs/templates/networking/1-2-netfilter/kernel/filter.c new file mode 100644 index 00000000000000..91850db9aad3c5 --- /dev/null +++ b/tools/labs/templates/networking/1-2-netfilter/kernel/filter.c @@ -0,0 +1,163 @@ +/* + * SO2 - Networking Lab (#10) + * + * Exercise #1, #2: simple netfilter module + * + * Code skeleton. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "filter.h" + +MODULE_DESCRIPTION("Simple netfilter module"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +#define LOG_LEVEL KERN_ALERT +#define MY_DEVICE "filter" + +static struct cdev my_cdev; +static atomic_t ioctl_set; +static unsigned int ioctl_set_addr; + + +/* Test ioctl_set_addr if it has been set. + */ +static int test_daddr(unsigned int dst_addr) +{ + int ret = 0; + + /* TODO 2/4: return non-zero if address has been set + * *and* matches dst_addr + */ + if (atomic_read(&ioctl_set) == 1) + ret = (ioctl_set_addr == dst_addr); + else + ret = 1; + + return ret; +} + +/* TODO 1/20: netfilter hook function */ +static unsigned int my_nf_hookfn(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + /* get IP header */ + struct iphdr *iph = ip_hdr(skb); + + if (iph->protocol == IPPROTO_TCP && test_daddr(iph->daddr)) { + /* get TCP header */ + struct tcphdr *tcph = tcp_hdr(skb); + /* test for connection initiating packet */ + if (tcph->syn && !tcph->ack) + printk(LOG_LEVEL "TCP connection initiated from " + "%pI4:%u\n", + &iph->saddr, ntohs(tcph->source)); + } + + /* let the package pass */ + return NF_ACCEPT; +} + +static int my_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static int my_close(struct inode *inode, struct file *file) +{ + return 0; +} + +static long my_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case MY_IOCTL_FILTER_ADDRESS: + /* TODO 2/4: set filter address from arg */ + if (copy_from_user(&ioctl_set_addr, (void *) arg, + sizeof(ioctl_set_addr))) + return -EFAULT; + atomic_set(&ioctl_set, 1); + break; + + default: + return -ENOTTY; + } + + return 0; +} + +static const struct file_operations my_fops = { + .owner = THIS_MODULE, + .open = my_open, + .release = my_close, + .unlocked_ioctl = my_ioctl +}; + +/* TODO 1/6: define netfilter hook operations structure */ +static struct nf_hook_ops my_nfho = { + .hook = my_nf_hookfn, + .hooknum = NF_INET_LOCAL_OUT, + .pf = PF_INET, + .priority = NF_IP_PRI_FIRST +}; + +int __init my_hook_init(void) +{ + int err; + + /* register filter device */ + err = register_chrdev_region(MKDEV(MY_MAJOR, 0), 1, MY_DEVICE); + if (err != 0) + return err; + + atomic_set(&ioctl_set, 0); + ioctl_set_addr = 0; + + /* init & add device */ + cdev_init(&my_cdev, &my_fops); + cdev_add(&my_cdev, MKDEV(MY_MAJOR, 0), 1); + + /* TODO 1/3: register netfilter hook */ + err = nf_register_net_hook(&init_net, &my_nfho); + if (err) + goto out; + + return 0; + +out: + /* cleanup */ + cdev_del(&my_cdev); + unregister_chrdev_region(MKDEV(MY_MAJOR, 0), 1); + + return err; +} + +void __exit my_hook_exit(void) +{ + /* TODO 1/1: unregister hook */ + nf_unregister_net_hook(&init_net, &my_nfho); + + /* cleanup device */ + cdev_del(&my_cdev); + unregister_chrdev_region(MKDEV(MY_MAJOR, 0), 1); +} + +module_init(my_hook_init); +module_exit(my_hook_exit); diff --git a/tools/labs/templates/networking/1-2-netfilter/kernel/filter.h b/tools/labs/templates/networking/1-2-netfilter/kernel/filter.h new file mode 100644 index 00000000000000..ad2f73c9fd000a --- /dev/null +++ b/tools/labs/templates/networking/1-2-netfilter/kernel/filter.h @@ -0,0 +1,11 @@ +#ifndef _FILTER_H_ +#define _FILTER_H_ + +#include + +/* ioctl command to pass address to filter driver */ +#define MY_IOCTL_FILTER_ADDRESS _IOW('k', 1, unsigned int) + +#define MY_MAJOR 42 + +#endif /* _FILTER_H_ */ diff --git a/tools/labs/templates/networking/1-2-netfilter/user/.gitignore b/tools/labs/templates/networking/1-2-netfilter/user/.gitignore new file mode 100644 index 00000000000000..ee4c92682341e4 --- /dev/null +++ b/tools/labs/templates/networking/1-2-netfilter/user/.gitignore @@ -0,0 +1 @@ +/test diff --git a/tools/labs/templates/networking/1-2-netfilter/user/Makefile b/tools/labs/templates/networking/1-2-netfilter/user/Makefile new file mode 100644 index 00000000000000..0d5af50006725c --- /dev/null +++ b/tools/labs/templates/networking/1-2-netfilter/user/Makefile @@ -0,0 +1,16 @@ +# +# SO2 - Networking Lab (#10) +# +# Makefile for test filter module +# + +CFLAGS = -Wall -static -m32 + +all: test + +test: test.c + +.PHONY: clean + +clean: + -rm -f test *~ *.o diff --git a/tools/labs/templates/networking/1-2-netfilter/user/test-1.sh b/tools/labs/templates/networking/1-2-netfilter/user/test-1.sh new file mode 100755 index 00000000000000..d78c482564c7ee --- /dev/null +++ b/tools/labs/templates/networking/1-2-netfilter/user/test-1.sh @@ -0,0 +1,25 @@ +#!/bin/sh +# +# SO2 - Networking Lab (#10) +# +# Test script for exercise #1 +# + +# insert module +insmod ../kernel/filter.ko || exit 1 + +# listen for connections on localhost, port 60000 (run in background) +../../netcat -l -p 60000 & + +# wait for netcat to start listening +sleep 1 + +# connect to localhost, port 60000, starting a connection using local +# port number 600001; +echo "Should show up in filter." | ../../netcat -q 2 127.0.0.1 60000 + +# look for filter message in dmesg output +echo "Check dmesg output." + +# remove module +rmmod filter || exit 1 diff --git a/tools/labs/templates/networking/1-2-netfilter/user/test-2.sh b/tools/labs/templates/networking/1-2-netfilter/user/test-2.sh new file mode 100755 index 00000000000000..37d07cedb74ace --- /dev/null +++ b/tools/labs/templates/networking/1-2-netfilter/user/test-2.sh @@ -0,0 +1,41 @@ +#!/bin/sh +# +# SO2 - Networking Lab (#10) +# +# Test script for exercise #2 +# + +# insert module +insmod ../kernel/filter.ko || exit 1 + +# set filter IP address to 127.0.0.1 +./test 127.0.0.1 + +# listen for connections on localhost, port 60000 (run in background) +../../netcat -l -p 60000 & + +# wait for netcat to start listening +sleep 1 + +# connect to localhost, port 60000, starting a connection using local +# port number 600001; +echo "Should show up in filter." | ../../netcat -q 2 127.0.0.1 60000 + +# set filter IP address to 127.0.0.2 +./test 127.0.0.2 + +# listen for connections on localhost, port 60000 (run in background) +../../netcat -l -p 60000 & + +# wait for netcat to start listening +sleep 1 + +# connect to localhost, port 60000, starting a connection using local +# port number 600001; +echo "Should NOT show up in filter." | ../../netcat -q 2 127.0.0.1 60000 + +# look for filter message in dmesg output +echo "Check dmesg output." + +# remove module +rmmod filter || exit 1 diff --git a/tools/labs/templates/networking/1-2-netfilter/user/test.c b/tools/labs/templates/networking/1-2-netfilter/user/test.c new file mode 100644 index 00000000000000..775edb458286c7 --- /dev/null +++ b/tools/labs/templates/networking/1-2-netfilter/user/test.c @@ -0,0 +1,73 @@ +/* + * SO2 - Networking Lab (#11) + * + * Test filter module for exercise #2 + * + * Sends MY_IOCTL_FILTER_ADDRESS to filter module. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../kernel/filter.h" + +#define MY_DEVICE "/dev/filter" + + +static void print_usage(char *argv0) +{ + fprintf(stderr, "Usage: %s
\n" + "\taddress must be a string containing " + "an IP dotted address\n", argv0); +} + +int main(int argc, char **argv) +{ + int fd; + unsigned int addr; + + if (argc != 2) { + print_usage(argv[0]); + exit(EXIT_FAILURE); + } + + /* get address */ + addr = inet_addr(argv[1]); + + /* make device node */ + if (mknod(MY_DEVICE, 0644 | S_IFCHR, makedev(MY_MAJOR, 0)) < 0) { + if (errno != EEXIST) { + perror("mknod " MY_DEVICE); + exit(EXIT_FAILURE); + } + } + + /* open device */ + fd = open(MY_DEVICE, O_RDONLY); + if (fd < 0) { + perror("open " MY_DEVICE); + } else { + /* send ioctl */ + if (ioctl(fd, MY_IOCTL_FILTER_ADDRESS, &addr) < 0) + perror("ioctl MY_IOCTL_FILTER_ADDRESS"); + + /* close device */ + if (close(fd) < 0) + perror("close"); + } + + /* cleanup device node */ + if (unlink(MY_DEVICE) < 0) + perror("unlink " MY_DEVICE); + + return 0; +} diff --git a/tools/labs/templates/networking/3-4-tcp-sock/Kbuild b/tools/labs/templates/networking/3-4-tcp-sock/Kbuild new file mode 100644 index 00000000000000..fa55ec98e71d78 --- /dev/null +++ b/tools/labs/templates/networking/3-4-tcp-sock/Kbuild @@ -0,0 +1,3 @@ +EXTRA_CFLAGS = -Wall -g + +obj-m = tcp_sock.o diff --git a/tools/labs/templates/networking/3-4-tcp-sock/tcp_sock.c b/tools/labs/templates/networking/3-4-tcp-sock/tcp_sock.c new file mode 100644 index 00000000000000..d3ea78a1a3e496 --- /dev/null +++ b/tools/labs/templates/networking/3-4-tcp-sock/tcp_sock.c @@ -0,0 +1,129 @@ +/* + * SO2 - Networking Lab (#10) + * + * Exercise #3, #4: simple kernel TCP socket + * + * Code skeleton. + */ + +#include +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Simple kernel TCP socket"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +#define LOG_LEVEL KERN_ALERT +#define MY_TCP_PORT 60000 +#define LISTEN_BACKLOG 5 + +#define ON 1 +#define OFF 0 +#define DEBUG ON + +#if DEBUG == ON +#define LOG(s) \ + do { \ + printk(KERN_DEBUG s "\n"); \ + } while (0) +#else +#define LOG(s) \ + do {} while (0) +#endif + +#define print_sock_address(addr) \ + do { \ + printk(LOG_LEVEL "connection established to " \ + "%pI4:%d\n", \ + &addr.sin_addr.s_addr, \ + ntohs(addr.sin_port)); \ + } while (0) + +static struct socket *sock; /* listening (server) socket */ +static struct socket *new_sock; /* communication socket */ + +int __init my_tcp_sock_init(void) +{ + int err; + /* address to bind on */ + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = htons(MY_TCP_PORT), + .sin_addr = { htonl(INADDR_LOOPBACK) } + }; + int addrlen = sizeof(addr); + /* address of peer */ + struct sockaddr_in raddr; + + /* TODO 1/5: create listening socket */ + err = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); + if (err < 0) { + printk(LOG_LEVEL "can't create socket\n"); + goto out; + } + + /* TODO 1/5: bind socket to loopback on port MY_TCP_PORT */ + err = sock->ops->bind(sock, (struct sockaddr *) &addr, addrlen); + if (err < 0) { + printk(LOG_LEVEL "can't bind socket\n"); + goto out_release; + } + + /* TODO 1/5: start listening */ + err = sock->ops->listen(sock, LISTEN_BACKLOG); + if (err < 0) { + printk(LOG_LEVEL "can't listen on socket\n"); + goto out_release; + } + + /* TODO 2/6: create new socket for the accepted connection */ + err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &new_sock); + if (err < 0) { + printk(LOG_LEVEL "can't create new socket\n"); + goto out; + } + new_sock->ops = sock->ops; + + /* TODO 2/5: accept a connection */ + err = sock->ops->accept(sock, new_sock, 0, true); + if (err < 0) { + printk(LOG_LEVEL "can't accept new connection\n"); + goto out_release_new_sock; + } + + /* TODO 2/6: get the address of the peer and print it */ + err = sock->ops->getname(new_sock, (struct sockaddr *) &raddr, 1); + if (err < 0) { + printk(LOG_LEVEL "can't find peer name\n"); + goto out_release_new_sock; + } + print_sock_address(raddr); + + return 0; + +out_release_new_sock: + /* TODO 2/1: cleanup socket for accepted connection */ + sock_release(new_sock); +out_release: + /* TODO 1/1: cleanup listening socket */ + sock_release(sock); +out: + return err; +} + +void __exit my_tcp_sock_exit(void) +{ + /* TODO 2/1: cleanup socket for accepted connection */ + sock_release(new_sock); + + /* TODO 1/1: cleanup listening socket */ + sock_release(sock); +} + +module_init(my_tcp_sock_init); +module_exit(my_tcp_sock_exit); diff --git a/tools/labs/templates/networking/3-4-tcp-sock/test-3.sh b/tools/labs/templates/networking/3-4-tcp-sock/test-3.sh new file mode 100755 index 00000000000000..b3289dbbe584ef --- /dev/null +++ b/tools/labs/templates/networking/3-4-tcp-sock/test-3.sh @@ -0,0 +1,18 @@ +#!/bin/sh +# +# SO2 - Networking Lab (#10) +# +# Test script for exercise #3 +# + +set -x + +# insert module +insmod tcp_sock.ko || exit 1 + +# list all currently listening servers and active connections +# for both TCP and UDP, and don't resolve hostnames +netstat -tuan + +# remove module +rmmod tcp_sock || exit 1 diff --git a/tools/labs/templates/networking/3-4-tcp-sock/test-4.sh b/tools/labs/templates/networking/3-4-tcp-sock/test-4.sh new file mode 100755 index 00000000000000..345d85356070f4 --- /dev/null +++ b/tools/labs/templates/networking/3-4-tcp-sock/test-4.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# +# SO2 - Networking Lab (#10) +# +# Test script for exercise #3 +# + +set -x + +# insert module (run in background, it waits for a connection) +insmod tcp_sock.ko & + +# wait for module to start listening +sleep 1 + +# list all currently listening servers and active connections +# for both TCP and UDP, and don't resolve hostnames +netstat -tuan + +# connect to localhost, port 60000, starting a connection using local +# port number 600001; +echo "Should connect." | ../netcat -q 4 127.0.0.1 60000 -p 60001 & + +# wait for connection to be established then remove module +# (and close connection) +sleep 3 + +# remove module +rmmod tcp_sock || exit 1 diff --git a/tools/labs/templates/networking/5-udp-sock/Kbuild b/tools/labs/templates/networking/5-udp-sock/Kbuild new file mode 100644 index 00000000000000..e42a05b84ca634 --- /dev/null +++ b/tools/labs/templates/networking/5-udp-sock/Kbuild @@ -0,0 +1,3 @@ +EXTRA_CFLAGS = -Wall -g + +obj-m = udp_sock.o diff --git a/tools/labs/templates/networking/5-udp-sock/test-5.sh b/tools/labs/templates/networking/5-udp-sock/test-5.sh new file mode 100755 index 00000000000000..9db69a99254e18 --- /dev/null +++ b/tools/labs/templates/networking/5-udp-sock/test-5.sh @@ -0,0 +1,26 @@ +#!/bin/sh +# +# SO2 - Networking Lab (#10) +# +# Test script for bonus exercise +# + +set -x + +# listen for UDP packets on localhost, port 60001 (run in background) +../netcat -l -u -p 60001 & + +# get pid of netcat +pid=$! + +# wait for netcat to start listening +sleep 1 + +# insert module, causing the message to be sent +insmod udp_sock.ko + +# remove module +rmmod udp_sock + +# kill netcat +kill $pid 2>/dev/null diff --git a/tools/labs/templates/networking/5-udp-sock/udp_sock.c b/tools/labs/templates/networking/5-udp-sock/udp_sock.c new file mode 100644 index 00000000000000..0b08f56d973d05 --- /dev/null +++ b/tools/labs/templates/networking/5-udp-sock/udp_sock.c @@ -0,0 +1,130 @@ +/* + * SO2 - Networking Lab (#10) + * + * Bonus: simple kernel UDP socket + * + * Code skeleton. + */ + +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Simple kernel UDP socket"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +#define LOG_LEVEL KERN_ALERT +#define MY_UDP_LOCAL_PORT 60000 +#define MY_UDP_REMOTE_PORT 60001 +#define MY_TEST_MESSAGE "kernelsocket\n" + +#define ON 1 +#define OFF 0 +#define DEBUG ON + +#if DEBUG == ON +#define LOG(s) \ + do { \ + printk(KERN_DEBUG s "\n"); \ + } while (0) +#else +#define LOG(s) \ + do {} while (0) +#endif + +#define print_sock_address(addr) \ + do { \ + printk(LOG_LEVEL "connection established to " \ + NIPQUAD_FMT ":%d\n", \ + NIPQUAD(addr.sin_addr.s_addr), \ + ntohs(addr.sin_port)); \ + } while (0) + +static struct socket *sock; /* UDP server */ + +/* send datagram */ +static int my_udp_msgsend(struct socket *s) +{ + /* address to send to */ + struct sockaddr_in raddr = { + .sin_family = AF_INET, + .sin_port = htons(MY_UDP_REMOTE_PORT), + .sin_addr = { htonl(INADDR_LOOPBACK) } + }; + int raddrlen = sizeof(raddr); + /* message */ + struct msghdr msg; + struct iovec iov; + char *buffer = MY_TEST_MESSAGE; + int len = strlen(buffer) + 1; + + /* TODO 1/7: build message */ + iov.iov_base = buffer; + iov.iov_len = len; + msg.msg_flags = 0; + msg.msg_name = &raddr; + msg.msg_namelen = raddrlen; + msg.msg_control = NULL; + msg.msg_controllen = 0; + + /* TODO 1/1: send the message down the socket and return the + * error code. + */ + return kernel_sendmsg(s, &msg, (struct kvec *) &iov, 1, len); + + return 0; +} + +int __init my_udp_sock_init(void) +{ + int err; + /* address to bind on */ + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = htons(MY_UDP_LOCAL_PORT), + .sin_addr = { htonl(INADDR_LOOPBACK) } + }; + int addrlen = sizeof(addr); + + /* TODO 1/5: create UDP socket */ + err = sock_create_kern(&init_net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + if (err < 0) { + printk(LOG_LEVEL "can't create socket\n"); + goto out; + } + + /* TODO 1/5: bind socket to loopback on port MY_UDP_LOCAL_PORT */ + err = sock->ops->bind(sock, (struct sockaddr *) &addr, addrlen); + if (err < 0) { + printk(LOG_LEVEL "can't bind socket\n"); + goto out_release; + } + + /* send message */ + err = my_udp_msgsend(sock); + if (err < 0) { + printk(LOG_LEVEL "can't send message\n"); + goto out_release; + } + + return 0; + +out_release: + /* TODO 1/1: release socket */ + sock_release(sock); +out: + return err; +} + +void __exit my_udp_sock_exit(void) +{ + /* TODO 1/1: release socket */ + sock_release(sock); +} + +module_init(my_udp_sock_init); +module_exit(my_udp_sock_exit); diff --git a/tools/labs/templates/networking/netcat b/tools/labs/templates/networking/netcat new file mode 100755 index 00000000000000..27bf43d64c2191 Binary files /dev/null and b/tools/labs/templates/networking/netcat differ