diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index d761ab10..e9b7e6ef 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -50,6 +50,9 @@ jobs: SENDER_PWD: ${{ secrets.MATRIX_SENDER_PWD }} RECVER_UID: ${{ secrets.MATRIX_RECVER_UID }} RECVER_PWD: ${{ secrets.MATRIX_RECVER_PWD }} + WAYBACK_IPFS_MODE: ${{ vars.WAYBACK_IPFS_MODE }} + WAYBACK_IPFS_HOST: ${{ vars.WAYBACK_IPFS_HOST }} + WAYBACK_IPFS_PORT: ${{ vars.WAYBACK_IPFS_PORT }} steps: - name: Harden Runner uses: step-security/harden-runner@2e205a28d0e1da00c5f53b161f4067b052c61f34 # v1.5.0 @@ -92,7 +95,7 @@ jobs: with: args: -h - - name: Install Packages + - name: Install Packages for Linux if: matrix.os == 'ubuntu-latest' shell: bash run: | @@ -105,7 +108,7 @@ jobs: you-get --version ffmpeg -version - - name: Install Packages + - name: Install Packages for MacOS if: matrix.os == 'macos-latest' shell: bash run: | @@ -115,7 +118,7 @@ jobs: you-get --version ffmpeg -version - - name: Install Packages + - name: Install Packages for Windows if: matrix.os == 'windows-latest' shell: bash run: | @@ -129,6 +132,8 @@ jobs: - name: Set environments shell: bash run: | + ipfsMode="${{ vars.WAYBACK_IPFS_MODE }}" + echo "WAYBACK_IPFS_MODE=${ipfsMode:-daemon}" >> $GITHUB_ENV # Set env to enable reduxer echo "WAYBACK_STORAGE_DIR=${{ runner.temp }}" >> $GITHUB_ENV # Append paths to environment path diff --git a/README.md b/README.md index c1b57931..57fa0553 100644 --- a/README.md +++ b/README.md @@ -225,7 +225,7 @@ You can also specify configuration options either via command flags or via envir | - | `LOG_LEVEL` | `info` | Log level, supported level are `debug`, `info`, `warn`, `error`, `fatal`, defaults to `info` | | - | `ENABLE_METRICS` | `false` | Enable metrics collector | | - | `WAYBACK_LISTEN_ADDR` | `0.0.0.0:8964` | The listen address for the HTTP server | -| - | `CHROME_REMOTE_ADDR` | - | Chrome/Chromium remote debugging address, for screenshot | +| - | `CHROME_REMOTE_ADDR` | - | Chrome/Chromium remote debugging address, for screenshot, format: `host:port`, `wss://domain.tld` | | - | `WAYBACK_POOLING_SIZE` | `3` | Number of worker pool for wayback at once | | - | `WAYBACK_BOLT_PATH` | `./wayback.db` | File path of bolt database | | - | `WAYBACK_STORAGE_DIR` | - | Directory to store binary file, e.g. PDF, html file | diff --git a/cmd/wayback/serve.go b/cmd/wayback/serve.go index a8e8ac46..32ce8a0f 100644 --- a/cmd/wayback/serve.go +++ b/cmd/wayback/serve.go @@ -181,7 +181,7 @@ func (srv *services) run(ctx context.Context, opts service.Options) *services { name: s, }) default: - logger.Error("unrecognize %s in `--daemon`", s) + logger.Fatal("unrecognize %s in `--daemon`", s) } } diff --git a/config/options.go b/config/options.go index 9bf1355a..5fc8d9f5 100644 --- a/config/options.go +++ b/config/options.go @@ -23,7 +23,7 @@ const ( defOverTor = false defIPFSHost = "127.0.0.1" - defIPFSPort = 4001 + defIPFSPort = 5001 defIPFSMode = "pinner" defIPFSTarget = "" defIPFSApikey = "" diff --git a/docs/changelog.md b/docs/changelog.md index 8d836fd7..39d913ad 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -36,6 +36,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - No longer build image for `linux/s390x` - Get rid of the Tor binary ([#336](https://github.com/wabarc/wayback/pull/336)) - Adjusting lux to pluggable mode ([#337](https://github.com/wabarc/wayback/pull/337)) +- Several improvements ([#335](https://github.com/wabarc/wayback/pull/335)) + - Add client timeout for telegra.ph + - Log pooling errors + - Warcraft supports timeout and return waiting error + - Telegra.ph performance improvement + - Upload artifact remotely with timeout + - Remotely file upload with separate function + - Throw a fatal error if the command-line flag value is not specified + - Replace os.Tempdir with testing.T.TempDir + - Replace ioutil.ReadAll with io.ReadAll + - Add storage testing + - Change default ipfs port to 5001 + - Place ipfs related environments for testing + - Wrap testing using t.Run + - Add `chromedp.NoModifyURL` compatibility ### Fixed - Fix semgrep scan workflow ([#312](https://github.com/wabarc/wayback/pull/312)) diff --git a/docs/resources.md b/docs/resources.md index b5ef667a..2335f54a 100644 --- a/docs/resources.md +++ b/docs/resources.md @@ -22,3 +22,8 @@ From the popular Wayback Machine to lesser-known platforms, there is something h ## Wiki - [Web archiving](https://en.wikipedia.org/wiki/Web_archiving) + +## Tools + +- [Browserless](https://www.browserless.io/): Web Automation & Headless Browser Automation Tool. + diff --git a/go.mod b/go.mod index 9eec8505..b37ddb50 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ module github.com/wabarc/wayback go 1.18 require ( - github.com/PuerkitoBio/goquery v1.8.0 + github.com/PuerkitoBio/goquery v1.8.1 github.com/bwmarrin/discordgo v0.23.3-0.20210627161652-421e14965030 github.com/cretz/bine v0.2.0 github.com/davecgh/go-spew v1.1.1 @@ -13,8 +13,8 @@ require ( github.com/dghubble/oauth1 v0.7.1 github.com/dstotijn/go-notion v0.6.1 github.com/dustin/go-humanize v1.0.0 - github.com/fatih/color v1.13.0 - github.com/gabriel-vasile/mimetype v1.4.1 + github.com/fatih/color v1.15.0 + github.com/gabriel-vasile/mimetype v1.4.2 github.com/go-shiori/go-readability v0.0.0-20220215145315-dd6828d2f09b github.com/go-shiori/obelisk v0.0.0-20221119111008-23c015a8fad7 github.com/google/go-github/v40 v40.0.0 @@ -38,15 +38,15 @@ require ( github.com/wabarc/archive.org v1.2.1-0.20210708220121-cb9b83ff9896 github.com/wabarc/go-anonfile v0.1.0 github.com/wabarc/go-catbox v0.1.0 - github.com/wabarc/helper v0.0.0-20230209075818-96584f1ebf9d + github.com/wabarc/helper v0.0.0-20230318095659-969de9ddf4b6 github.com/wabarc/imgbb v1.0.0 github.com/wabarc/ipfs-pinner v1.1.1-0.20220126131044-16299c0dd43d github.com/wabarc/logger v0.0.0-20210730133522-86bd3f31e792 github.com/wabarc/playback v0.0.0-20220715111526-90d0327d3f04 github.com/wabarc/rivet v0.1.4-0.20221226142645-ebc8a29d914f - github.com/wabarc/screenshot v1.6.0 - github.com/wabarc/telegra.ph v0.0.0-20221226141851-edf1cc14c076 - github.com/wabarc/warcraft v0.2.2-0.20211107142816-7beea5a75ab5 + github.com/wabarc/screenshot v1.6.1-0.20230315004517-7587f8bc14e0 + github.com/wabarc/telegra.ph v0.0.0-20230318134541-a0922e1ace3a + github.com/wabarc/warcraft v0.3.1-0.20230308125707-3daa5592ba52 go.etcd.io/bbolt v1.3.6 golang.org/x/net v0.8.0 golang.org/x/sync v0.1.0 @@ -68,8 +68,8 @@ require ( github.com/cenkalti/backoff/v4 v4.2.0 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/cheggaaa/pb/v3 v3.0.8 // indirect - github.com/chromedp/cdproto v0.0.0-20221126224343-3a0787b8dd28 // indirect - github.com/chromedp/chromedp v0.8.6 // indirect + github.com/chromedp/cdproto v0.0.0-20230310204135-a6d692f2c96d // indirect + github.com/chromedp/chromedp v0.9.1 // indirect github.com/chromedp/sysutil v1.0.0 // indirect github.com/crackcomm/go-gitignore v0.0.0-20170627025303-887ab5e44cc3 // indirect github.com/decred/dcrd/crypto/blake256 v1.0.0 // indirect @@ -97,7 +97,7 @@ require ( github.com/itchyny/timefmt-go v0.1.3 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect - github.com/kallydev/telegraph-go v1.0.0 // indirect + github.com/kallydev/telegraph-go v1.0.1-0.20230318133700-df034d9eed50 // indirect github.com/kennygrant/sanitize v1.2.4 // indirect github.com/kkdai/youtube/v2 v2.7.18 // indirect github.com/klauspost/cpuid/v2 v2.2.2 // indirect diff --git a/go.sum b/go.sum index b54cceaa..7803dd52 100644 --- a/go.sum +++ b/go.sum @@ -39,8 +39,8 @@ github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAE github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg= github.com/PuerkitoBio/goquery v1.6.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= github.com/PuerkitoBio/goquery v1.7.0/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= -github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U= -github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI= +github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM= +github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ= github.com/SaveTheRbtz/generic-sync-map-go v0.0.0-20220414055132-a37292614db8 h1:Xa6tp8DPDhdV+k23uiTC/GrAYOe4IdyJVKtob4KW3GA= github.com/SaveTheRbtz/generic-sync-map-go v0.0.0-20220414055132-a37292614db8/go.mod h1:ihkm1viTbO/LOsgdGoFPBSvzqvx7ibvkMzYp3CgtHik= github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA= @@ -97,10 +97,11 @@ github.com/cheekybits/is v0.0.0-20150225183255-68e9c0620927 h1:SKI1/fuSdodxmNNyV github.com/cheekybits/is v0.0.0-20150225183255-68e9c0620927/go.mod h1:h/aW8ynjgkuj+NQRlZcDbAbM1ORAbXjXX77sX7T289U= github.com/cheggaaa/pb/v3 v3.0.8 h1:bC8oemdChbke2FHIIGy9mn4DPJ2caZYQnfbRqwmdCoA= github.com/cheggaaa/pb/v3 v3.0.8/go.mod h1:UICbiLec/XO6Hw6k+BHEtHeQFzzBH4i2/qk/ow1EJTA= -github.com/chromedp/cdproto v0.0.0-20221126224343-3a0787b8dd28 h1:i4vpMoaMguVwvDc0qSNbCHCRue6d0kbXjj5bDF4fHBA= -github.com/chromedp/cdproto v0.0.0-20221126224343-3a0787b8dd28/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs= -github.com/chromedp/chromedp v0.8.6 h1:KobeeqR2dpfKSG1prS3Y6+FbffMmGC6xmAobRXA9QEQ= -github.com/chromedp/chromedp v0.8.6/go.mod h1:nBYHoD6YSNzrr82cIeuOzhw1Jo/s2o0QQ+ifTeoCZ+c= +github.com/chromedp/cdproto v0.0.0-20230220211738-2b1ec77315c9/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs= +github.com/chromedp/cdproto v0.0.0-20230310204135-a6d692f2c96d h1:V9DP/zVOBFANcxrhe1aHU1nknxHsn6wv9BEMyd/DQNY= +github.com/chromedp/cdproto v0.0.0-20230310204135-a6d692f2c96d/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs= +github.com/chromedp/chromedp v0.9.1 h1:CC7cC5p1BeLiiS2gfNNPwp3OaUxtRMBjfiw3E3k6dFA= +github.com/chromedp/chromedp v0.9.1/go.mod h1:DUgZWRvYoEfgi66CgZ/9Yv+psgi+Sksy5DTScENWjaQ= github.com/chromedp/sysutil v1.0.0 h1:+ZxhTpfpZlmchB58ih/LBHX52ky7w2VhQVKQMucy3Ic= github.com/chromedp/sysutil v1.0.0/go.mod h1:kgWmDdq8fTzXYcKIBqIYvRRTnYb9aNS9moAV0xufSww= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= @@ -158,13 +159,14 @@ github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7 github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU= github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM= github.com/fatih/color v1.12.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM= -github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w= github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= +github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs= +github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw= github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw= github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= -github.com/gabriel-vasile/mimetype v1.4.1 h1:TRWk7se+TOjCYgRth7+1/OYLNiRNIotknkFtf/dnN7Q= -github.com/gabriel-vasile/mimetype v1.4.1/go.mod h1:05Vi0w3Y9c/lNvJOdmIwvrrAhX3rYhfQQCaf9VJcv7M= +github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= +github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= @@ -330,8 +332,8 @@ github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1 github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= -github.com/kallydev/telegraph-go v1.0.0 h1:JsIlQfYCY4S5QH20rJYSN6A1eY/vVx07oBVaCsGU/nE= -github.com/kallydev/telegraph-go v1.0.0/go.mod h1:vZj7M9HridntSIuQ7D9hgn2idKiA0T1VIblLp6l9uuc= +github.com/kallydev/telegraph-go v1.0.1-0.20230318133700-df034d9eed50 h1:Os2f/xCaCfWqMAW8hAW8uviDY8XsRqkLUgLFFldSTYw= +github.com/kallydev/telegraph-go v1.0.1-0.20230318133700-df034d9eed50/go.mod h1:vZj7M9HridntSIuQ7D9hgn2idKiA0T1VIblLp6l9uuc= github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o= github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= @@ -358,6 +360,7 @@ github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo= +github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs= github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= github.com/libp2p/go-buffer-pool v0.0.2/go.mod h1:MvaB6xw5vOrDl8rYZGLFdKAuk/hRoRZd1Vi32+RXyFM= github.com/libp2p/go-buffer-pool v0.1.0 h1:oK4mSFcQz7cTQIfqbe4MIj9gLW+mnanjyFtc6cdF0Y8= @@ -458,6 +461,7 @@ github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+W github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde h1:x0TT0RDC7UhAVbbWWBzr41ElhJx5tXPWkIHA2HWPRuw= +github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/phf/go-queue v0.0.0-20170504031614-9abe38d0371d h1:U+PMnTlV2tu7RuMK5etusZG3Cf+rpow5hqQByeCzJ2g= github.com/phf/go-queue v0.0.0-20170504031614-9abe38d0371d/go.mod h1:lXfE4PvvTW5xOjO6Mba8zDPyw8M93B6AQ7frTGnMlA8= @@ -595,8 +599,8 @@ github.com/wabarc/helper v0.0.0-20210614160629-1a5ba5e551eb/go.mod h1:TuTZtoiOu9 github.com/wabarc/helper v0.0.0-20210701193643-e0fe0a807cb9/go.mod h1:TuTZtoiOu984UWOf7FfX58JllKMjq7FCz701kB5W88E= github.com/wabarc/helper v0.0.0-20210718171053-59c70d0b20c2/go.mod h1:uS6mimKlWkGvEZXkJ6JoW7LYnnB2JP6dLU9q7pgDaWQ= github.com/wabarc/helper v0.0.0-20220320101038-dbb2ea36249b/go.mod h1:JGo6GZ+m/xGV63ZiNH0apOrWczV/f8w2JzjoTOg8u58= -github.com/wabarc/helper v0.0.0-20230209075818-96584f1ebf9d h1:TD4Hb2McRvnEkfvzSJxbogf8W+Ra0hfeAc1mV3ouB/A= -github.com/wabarc/helper v0.0.0-20230209075818-96584f1ebf9d/go.mod h1:CuinYj2UtWpv07U+X2y/vSUbvPHF1TT4rg2s23vd6Y0= +github.com/wabarc/helper v0.0.0-20230318095659-969de9ddf4b6 h1:d38Mz85TClBZB+068zx2o7GcZ3yaFgbGXeP1HXDonpQ= +github.com/wabarc/helper v0.0.0-20230318095659-969de9ddf4b6/go.mod h1:eb3rEZVnsfvRAbjKZOTBuLVDtICaDysuscD+MOnPX2M= github.com/wabarc/imgbb v1.0.0 h1:vNud9qY4dRGp1nikb+GHIrc/sE3WOdNq5gfT/vhWEUQ= github.com/wabarc/imgbb v1.0.0/go.mod h1:piz6QXkVfFpQbaQHx70/rPDwaICkW5Ew3lShTzbnVt0= github.com/wabarc/ipfs-pinner v1.1.1-0.20220126131044-16299c0dd43d h1:rSjZNYtr/5ep62nmcAF31v8zUGb0scD3bov/vAzYUpc= @@ -610,12 +614,12 @@ github.com/wabarc/playback v0.0.0-20220715111526-90d0327d3f04 h1:D1XXhpIu2DO0LEv github.com/wabarc/playback v0.0.0-20220715111526-90d0327d3f04/go.mod h1:rxvBzfzci+rMEEsupy1yo58Nbg9KtUmY4WJ67EPzNH8= github.com/wabarc/rivet v0.1.4-0.20221226142645-ebc8a29d914f h1:fkaTPNIalAj8i51BEPb3dPjxrE3ryzYt/+3yzV1u4nM= github.com/wabarc/rivet v0.1.4-0.20221226142645-ebc8a29d914f/go.mod h1:lNEgL14wbyTc+ZNQPM+QtEZcQjB0AifzlWJbRJYQn/A= -github.com/wabarc/screenshot v1.6.0 h1:9yOZY7pGWpDjZ+ms/p5qQVGydhuqfmoAzuKZHdSJ9JU= -github.com/wabarc/screenshot v1.6.0/go.mod h1:At9pHrT/gNDStXkgJgcffBzeeXHDSDQ8ZbnV4D8LsMM= -github.com/wabarc/telegra.ph v0.0.0-20221226141851-edf1cc14c076 h1:ROaObhmSdKVnqhrxYHxE/AURsNjtdEz12TswRLNPQW4= -github.com/wabarc/telegra.ph v0.0.0-20221226141851-edf1cc14c076/go.mod h1:5dfqXIK1Hbdv/4YQtf+S8M1GTwkKSITNhoQmOnfNoRE= -github.com/wabarc/warcraft v0.2.2-0.20211107142816-7beea5a75ab5 h1:jY/jqIy/ddCMWWWuTIeAazE5F4QW8HAIvlI69XMJ1ew= -github.com/wabarc/warcraft v0.2.2-0.20211107142816-7beea5a75ab5/go.mod h1:/BbCwReBjlqHRaw8Yh+7sfAicOesiMYNhiFpuL1x8Rc= +github.com/wabarc/screenshot v1.6.1-0.20230315004517-7587f8bc14e0 h1:H7Ufx43fg3khCS7s39mfHHbpRFoo5GBJS846QhxfClo= +github.com/wabarc/screenshot v1.6.1-0.20230315004517-7587f8bc14e0/go.mod h1:iQ6N8Myjc1XCR7Vh/1ihsWGZQY/vXAMneHwUmKRitxE= +github.com/wabarc/telegra.ph v0.0.0-20230318134541-a0922e1ace3a h1:W1gb0ww1oIARudOfXEMoXkZU1gqk0JERkIvVEgtuzq4= +github.com/wabarc/telegra.ph v0.0.0-20230318134541-a0922e1ace3a/go.mod h1:n+wNWjpzSvzIw8l5JbpeRzJqkOnSsXSgmI45635MUC4= +github.com/wabarc/warcraft v0.3.1-0.20230308125707-3daa5592ba52 h1:ys1ltR7cKqATsLeqXBCp7kPCItIDxwyOjAFj0btH0wQ= +github.com/wabarc/warcraft v0.3.1-0.20230308125707-3daa5592ba52/go.mod h1:/BbCwReBjlqHRaw8Yh+7sfAicOesiMYNhiFpuL1x8Rc= github.com/whyrusleeping/tar-utils v0.0.0-20180509141711-8c6c8ba81d5c/go.mod h1:xxcJeBb7SIUl/Wzkz1eVKJE/CB34YNrqX2TQI6jY9zs= github.com/whyrusleeping/tar-utils v0.0.0-20201201191210-20a61371de5b h1:wA3QeTsaAXybLL2kb2cKhCAQTHgYTMwuI8lBlJSv5V8= github.com/whyrusleeping/tar-utils v0.0.0-20201201191210-20a61371de5b/go.mod h1:xT1Y5p2JR2PfSZihE0s4mjdJaRGp1waCTf5JzhQLBck= @@ -627,6 +631,7 @@ github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU= go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= @@ -652,6 +657,7 @@ golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm golang.org/x/crypto v0.0.0-20210513164829-c07d793c2f9a/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8= golang.org/x/crypto v0.0.0-20210616213533-5ff15b29337e/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20210817164053-32db794688a5/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.4.0 h1:UVQgzMY87xqpKNgb+kDsll2Igd33HszWHFLmpaRMq/8= golang.org/x/crypto v0.4.0/go.mod h1:3quD/ATkf6oY+rnes5c3ExXTbLc8mueNue5/DoinL80= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= @@ -686,6 +692,7 @@ golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzB golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -729,7 +736,8 @@ golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20211216030914-fe4d6282115f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= -golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ= golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -750,6 +758,7 @@ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -813,11 +822,14 @@ golang.org/x/sys v0.0.0-20220227234510-4e6760a101f9/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -825,6 +837,7 @@ golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68= golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -874,6 +887,7 @@ golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/pooling/pooling.go b/pooling/pooling.go index d51bc0c5..e8602357 100644 --- a/pooling/pooling.go +++ b/pooling/pooling.go @@ -11,6 +11,7 @@ import ( "time" "github.com/phf/go-queue/queue" + "github.com/wabarc/logger" "github.com/wabarc/wayback/errors" ) @@ -109,8 +110,10 @@ func (p *Pool) Roll() { if b, has := p.bucket(); has { go b.once.Do(func() { - // nolint:errcheck - p.do(b) + err := p.do(b) + if err != nil { + logger.Error("pooling do failed: %v", err) + } }) } } diff --git a/publish/github/github_test.go b/publish/github/github_test.go index 252de071..6d892cf2 100644 --- a/publish/github/github_test.go +++ b/publish/github/github_test.go @@ -7,7 +7,7 @@ package github // import "github.com/wabarc/wayback/publish/github" import ( "context" "fmt" - "io/ioutil" + "io" "net/http" "os" "strings" @@ -33,7 +33,7 @@ func TestToIssues(t *testing.T) { w.Header().Set("Content-Type", "application/json") switch r.URL.Path { case "/repos/bar/zoo/issues": - body, _ := ioutil.ReadAll(r.Body) + body, _ := io.ReadAll(r.Body) if !strings.Contains(string(body), config.SlotName(config.SLOT_IA)) { http.Error(w, http.StatusText(http.StatusBadRequest), http.StatusBadRequest) return diff --git a/publish/matrix/matrix_test.go b/publish/matrix/matrix_test.go index 1d44c00c..b43e2a96 100644 --- a/publish/matrix/matrix_test.go +++ b/publish/matrix/matrix_test.go @@ -6,7 +6,7 @@ package matrix // import "github.com/wabarc/wayback/publish/matrix" import ( "fmt" - "io/ioutil" + "io" "net/http" "os" "strings" @@ -36,7 +36,7 @@ func TestToMatrixRoom(t *testing.T) { case r.URL.Path == "/_matrix/client/r0/login", r.URL.Path == "/_matrix/client/v3/login": fmt.Fprintln(w, `{"access_token": "zoo"}`) case strings.Contains(r.URL.Path, "!bar:example.com/send/m.room.message"): - body, _ := ioutil.ReadAll(r.Body) + body, _ := io.ReadAll(r.Body) if !strings.Contains(string(body), config.SlotName(config.SLOT_IA)) { http.Error(w, http.StatusText(http.StatusBadRequest), http.StatusBadRequest) return diff --git a/publish/meili/meili_test.go b/publish/meili/meili_test.go index 2f34a745..7c59685d 100644 --- a/publish/meili/meili_test.go +++ b/publish/meili/meili_test.go @@ -7,7 +7,7 @@ package meili // import "github.com/wabarc/wayback/publish/meili" import ( "encoding/json" "fmt" - "io/ioutil" + "io" "net/http" "os" "strings" @@ -109,7 +109,7 @@ var ( w.WriteHeader(http.StatusAccepted) _, _ = w.Write([]byte(respCreateIndex)) case r.Method == http.MethodPost && r.URL.Path == fmt.Sprintf(`/indexes/%s/documents`, indexing): // add documents - buf, err := ioutil.ReadAll(r.Body) + buf, err := io.ReadAll(r.Body) if err != nil { w.WriteHeader(http.StatusBadRequest) _, _ = w.Write([]byte(respInvalidRequest)) diff --git a/publish/notion/notion_test.go b/publish/notion/notion_test.go index f9c1d9f8..e25b7794 100644 --- a/publish/notion/notion_test.go +++ b/publish/notion/notion_test.go @@ -7,7 +7,7 @@ package notion // import "github.com/wabarc/wayback/publish/notion" import ( "context" "fmt" - "io/ioutil" + "io" "net/http" "os" "strings" @@ -174,7 +174,7 @@ func TestToNotion(t *testing.T) { w.Header().Set("Content-Type", "application/json") switch r.URL.Path { case "/v1/pages": - body, _ := ioutil.ReadAll(r.Body) + body, _ := io.ReadAll(r.Body) if !strings.Contains(string(body), config.SlotName(config.SLOT_IA)) { http.Error(w, http.StatusText(http.StatusBadRequest), http.StatusBadRequest) return diff --git a/reduxer/reduxer.go b/reduxer/reduxer.go index b735b400..85432243 100644 --- a/reduxer/reduxer.go +++ b/reduxer/reduxer.go @@ -9,7 +9,6 @@ import ( "context" "fmt" "io" - "net" "net/http" "net/url" "os" @@ -31,6 +30,8 @@ import ( "golang.org/x/sync/errgroup" ) +const timeout = 30 * time.Second + var ( ctxBasenameKey struct{} @@ -153,10 +154,13 @@ func Do(ctx context.Context, opts *config.Options, urls ...*url.URL) (Reduxer, e } var warc = &warcraft.Warcraft{BasePath: dir, UserAgent: opts.WaybackUserAgent()} - var craft = func(in *url.URL) (path string) { + var craft = func(ctx context.Context, in *url.URL) (path string) { + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + path, err = warc.Download(ctx, in) if err != nil { - logger.Debug("create warc for %s failed: %v", in.String(), err) + logger.Debug("create warc for %s failed: %v", in, err) return "" } return path @@ -181,7 +185,7 @@ func Do(ctx context.Context, opts *config.Options, urls ...*url.URL) (Reduxer, e Raw: Asset{Local: fmt.Sprint(shot.HTML)}, PDF: Asset{Local: fmt.Sprint(shot.PDF)}, HAR: Asset{Local: fmt.Sprint(shot.HAR)}, - WARC: Asset{Local: craft(uri)}, + WARC: Asset{Local: craft(ctx, uri)}, } fp := filepath.Join(dir, basename) @@ -200,7 +204,7 @@ func Do(ctx context.Context, opts *config.Options, urls ...*url.URL) (Reduxer, e var article readability.Article buf, err = os.ReadFile(fmt.Sprint(shot.HTML)) if err == nil { - singleFilePath := singleFile(ctx, bytes.NewReader(buf), dir, shot.URL) + singleFilePath := singleFile(ctx, opts, bytes.NewReader(buf), dir, shot.URL) artifact.HTM.Local = singleFilePath } article, err = readability.FromReader(bytes.NewReader(buf), uri) @@ -246,39 +250,36 @@ func capture(ctx context.Context, cfg *config.Options, uri *url.URL, dir string) screenshot.Quality(100), // image quality } - if remote := remoteHeadless(cfg.ChromeRemoteAddr()); remote != nil { - logger.Debug("reduxer using remote browser") - addr := remote.(*net.TCPAddr) - browser, er := screenshot.NewChromeRemoteScreenshoter[screenshot.Path](addr.String()) - if er != nil { - return shot, errors.Wrap(er, "dial screenshoter failed") - } - shot, err = browser.Screenshot(ctx, uri, opts...) - } else { + fallback := func() (*screenshot.Screenshots[screenshot.Path], error) { logger.Debug("reduxer using local browser") shot, err = screenshot.Screenshot[screenshot.Path](ctx, uri, opts...) - } - if err != nil { - if err == context.DeadlineExceeded { - return shot, errors.Wrap(err, "screenshot deadline") + if err != nil { + if err == context.DeadlineExceeded { + return shot, errors.Wrap(err, "screenshot deadline") + } + return shot, errors.Wrap(err, "screenshot error") } - return shot, errors.Wrap(err, "screenshot error") + return shot, err } - return shot, err -} - -func remoteHeadless(addr string) net.Addr { - conn, err := net.DialTimeout("tcp", addr, time.Second) - if err != nil { - return nil + // Try to take a screenshot with a remote headless browser + // Fallback to local browser if remote is unavailable + if remote := cfg.ChromeRemoteAddr(); remote != "" { + logger.Debug("reduxer using remote browser") + browser, er := screenshot.NewChromeRemoteScreenshoter[screenshot.Path](remote) + if er != nil { + logger.Error("screenshot dial failed: %v", er) + return fallback() + } + shot, err = browser.Screenshot(ctx, uri, opts...) + if err != nil { + logger.Error("screenshot failed: %v", err) + return fallback() + } + return shot, nil } - if conn != nil { - conn.Close() - return conn.RemoteAddr() - } - return nil + return fallback() } func createDir(baseDir string) (dir string, err error) { @@ -294,7 +295,7 @@ func createDir(baseDir string) (dir string, err error) { } func remotely(ctx context.Context, artifact *Artifact) (err error) { - v := []*Asset{ + assets := []*Asset{ &artifact.Img, &artifact.PDF, &artifact.Raw, @@ -305,51 +306,57 @@ func remotely(ctx context.Context, artifact *Artifact) (err error) { &artifact.Media, } - c := &http.Client{} + c := &http.Client{Timeout: timeout} cat := catbox.New(c) anon := anonfile.NewAnonfile(c) g, _ := errgroup.WithContext(ctx) + var mu sync.Mutex - for _, asset := range v { + for _, asset := range assets { asset := asset + if asset.Local == "" { + continue + } + if !helper.Exists(asset.Local) { + err = errors.Wrap(err, fmt.Sprintf("local asset: %s not exists", asset.Local)) + continue + } g.Go(func() error { + var remote Remote + func() { + r, e := anon.Upload(asset.Local) + if e != nil { + err = errors.Wrap(err, fmt.Sprintf("upload %s to anonfiles failed: %v", asset.Local, e)) + } else { + remote.Anonfile = r.Short() + } + }() + func() { + c, e := cat.Upload(asset.Local) + if e != nil { + err = errors.Wrap(err, fmt.Sprintf("upload %s to catbox failed: %v", asset.Local, e)) + } else { + remote.Catbox = c + } + }() mu.Lock() - defer mu.Unlock() - - if asset.Local == "" { - return nil - } - if !helper.Exists(asset.Local) { - logger.Debug("local asset: %s not exists", asset.Local) - return nil - } - r, e := anon.Upload(asset.Local) - if e != nil { - err = errors.Wrap(e, fmt.Sprintf("upload %s to anonfiles failed", asset.Local)) - } else { - asset.Remote.Anonfile = r.Short() - } - c, e := cat.Upload(asset.Local) - if e != nil { - err = errors.Wrap(e, fmt.Sprintf("upload %s to catbox failed", asset.Local)) - } else { - asset.Remote.Catbox = c - } - return err + asset.Remote = remote + mu.Unlock() + return nil }) } - if err = g.Wait(); err != nil { - return err - } + // nolint:errcheck + _ = g.Wait() - return nil + return err } -func singleFile(ctx context.Context, inp io.Reader, dir, uri string) string { +func singleFile(ctx context.Context, opts *config.Options, inp io.Reader, dir, uri string) string { req := obelisk.Request{URL: uri, Input: inp} arc := &obelisk.Archiver{ SkipResourceURLError: true, RequestTimeout: 3 * time.Second, + EnableVerboseLog: opts.HasDebugMode(), } arc.Validate() diff --git a/reduxer/reduxer_test.go b/reduxer/reduxer_test.go index b3b634ad..de009c5a 100644 --- a/reduxer/reduxer_test.go +++ b/reduxer/reduxer_test.go @@ -18,6 +18,7 @@ import ( "path/filepath" "strings" "testing" + "time" "github.com/wabarc/helper" "github.com/wabarc/wayback/config" @@ -88,7 +89,7 @@ func TestDo(t *testing.T) { t.Skip("Chrome headless browser no found, skipped") } - dir, err := os.MkdirTemp(os.TempDir(), "reduxer-") + dir, err := os.MkdirTemp(t.TempDir(), "reduxer-") if err != nil { t.Fatalf(`Unexpected create temp dir: %v`, err) } @@ -123,7 +124,7 @@ func TestDo(t *testing.T) { } func TestCreateDir(t *testing.T) { - dir, err := os.MkdirTemp(os.TempDir(), "reduxer-") + dir, err := os.MkdirTemp(t.TempDir(), "reduxer-") if err != nil { t.Fatalf(`Unexpected create temp dir: %v`, err) } @@ -142,7 +143,7 @@ func TestCreateDir(t *testing.T) { } func TestSingleFile(t *testing.T) { - dir, err := os.MkdirTemp(os.TempDir(), "reduxer-") + dir, err := os.MkdirTemp(t.TempDir(), "reduxer-") if err != nil { t.Fatalf(`Unexpected create temp dir: %v`, err) } @@ -157,10 +158,18 @@ func TestSingleFile(t *testing.T) { t.Fatal(`unexpected sample html page`) } + opts, err := config.NewParser().ParseEnvironmentVariables() + if err != nil { + t.Fatalf("Parse environment variables or flags failed, error: %v", err) + } + + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + uri := server.URL filename := helper.RandString(5, "") - ctx := context.WithValue(context.Background(), ctxBasenameKey, filename) - got := singleFile(ctx, strings.NewReader(content), dir, uri) + ctx = context.WithValue(ctx, ctxBasenameKey, filename) + got := singleFile(ctx, opts, strings.NewReader(content), dir, uri) buf, _ := os.ReadFile(got) if !strings.Contains(string(buf), exp) { t.Fatal(`unexpected archive webpage as a single file`) diff --git a/reduxer/sites b/reduxer/sites index a43ff61f..c33deb72 100644 --- a/reduxer/sites +++ b/reduxer/sites @@ -52,7 +52,7 @@ nicovideo.jp pinterest.com pixnet.net pptv.com -qq.com +v.qq.com reddit.com showroom-live.com sina.com.cn diff --git a/service/discord/discord_test.go b/service/discord/discord_test.go index 592034b2..41bb6d0f 100644 --- a/service/discord/discord_test.go +++ b/service/discord/discord_test.go @@ -232,15 +232,7 @@ func TestProcess(t *testing.T) { t.Fatalf("Parse environment variables or flags failed, error: %v", err) } - dir := filepath.Join(os.TempDir(), "wayback") - if !helper.Exists(dir) { - if err := os.Mkdir(dir, 0744); err != nil { - t.Fatal(err) - } - } - defer os.RemoveAll(dir) - - dbpath := filepath.Join(dir, "testing.db") + dbpath := filepath.Join(t.TempDir(), "testing.db") store, err := storage.Open(opts, dbpath) if err != nil { t.Fatalf("open storage failed: %v", err) diff --git a/service/httpd/web_test.go b/service/httpd/web_test.go index 89449627..37309b83 100644 --- a/service/httpd/web_test.go +++ b/service/httpd/web_test.go @@ -7,12 +7,13 @@ package httpd // import "github.com/wabarc/wayback/service/httpd" import ( "context" "encoding/json" - "io/ioutil" + "io" "net/http" "os" "path" "strings" "testing" + "time" "github.com/wabarc/helper" "github.com/wabarc/wayback" @@ -25,7 +26,7 @@ import ( func TestTransform(t *testing.T) { os.Setenv("WAYBACK_ENABLE_IA", "true") - os.Setenv("WAYBACK_STORAGE_DIR", path.Join(os.TempDir(), "reduxer")) + os.Setenv("WAYBACK_STORAGE_DIR", path.Join(t.TempDir(), "reduxer")) parser := config.NewParser() opts, err := parser.ParseEnvironmentVariables() @@ -78,111 +79,124 @@ func TestProcessRespStatus(t *testing.T) { status int method string data string + name string }{ { method: http.MethodGet, status: http.StatusNotModified, data: `{"text":"", "data-type":"json"}`, + name: "without text", }, { method: http.MethodPost, status: http.StatusNotModified, data: `{"text":"foo bar", "data-type":"json"}`, + name: "with text", }, } for _, test := range tests { - req, err := http.NewRequest(test.method, server.URL, strings.NewReader(test.data)) - if err != nil { - t.Fatalf("Unexpected new request: %v", err) - } - - req.Header.Add("Content-Type", "application/json") - resp, err := httpClient.Do(req) - if err != nil { - t.Fatalf("Unexpected response: %v", err) - } - defer resp.Body.Close() - - if resp.StatusCode != test.status { - t.Fatalf("Unexpected response code got %d instead of %d", resp.StatusCode, test.status) - } + t.Run(test.name, func(t *testing.T) { + req, err := http.NewRequest(test.method, server.URL, strings.NewReader(test.data)) + if err != nil { + t.Fatalf("Unexpected new request: %v", err) + } + + req.Header.Add("Content-Type", "application/json") + resp, err := httpClient.Do(req) + if err != nil { + t.Fatalf("Unexpected response: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != test.status { + t.Fatalf("Unexpected response code got %d instead of %d", resp.StatusCode, test.status) + } + }) } } func TestProcessContentType(t *testing.T) { os.Setenv("WAYBACK_ENABLE_IA", "true") - os.Setenv("WAYBACK_STORAGE_DIR", path.Join(os.TempDir(), "reduxer")) - - opts, err := config.NewParser().ParseEnvironmentVariables() - if err != nil { - t.Fatalf("Parse environment variables or flags failed, error: %v", err) - } - - cfg := []pooling.Option{ - pooling.Capacity(opts.PoolingSize()), - pooling.Timeout(opts.WaybackTimeout()), - pooling.MaxRetries(opts.WaybackMaxRetries()), - } - ctx := context.Background() - pool := pooling.New(ctx, cfg...) - go pool.Roll() - defer pool.Close() - - pub := publish.New(ctx, opts) - defer pub.Stop() - - web := newWeb(ctx, opts, pool, pub) - - web.handle() - httpClient, mux, server := helper.MockServer() - defer server.Close() - mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { - web.process(ctx, w, r) - }) var tests = []struct { status int method string contentType string data string + name string }{ { method: http.MethodPost, status: http.StatusOK, contentType: "application/json", data: `text=https%3A%2F%2Fexample.com&data-type=json`, + name: "json", }, { method: http.MethodPost, status: http.StatusOK, contentType: "text/html; charset=utf-8", data: `text=https%3A%2F%2Fexample.com`, + name: "text", }, } for _, test := range tests { - req, err := http.NewRequest(test.method, server.URL, strings.NewReader(test.data)) - if err != nil { - t.Fatalf("Unexpected new request: %v", err) - } - - req.Header.Add("Content-Type", "application/x-www-form-urlencoded") - resp, err := httpClient.Do(req) - if err != nil { - t.Fatalf("Unexpected response: %v", err) - } - defer resp.Body.Close() - - if resp.StatusCode != test.status { - t.Fatalf("Unexpected response code got %d instead of %d", resp.StatusCode, test.status) - } - body, err := ioutil.ReadAll(resp.Body) - if err != nil { - t.Fatalf("Unexpected read body: %v", err) - } - if strings.Index(string(body), config.SlotName(config.SLOT_IA)) == 0 { - t.Fatalf("Unexpected wayback results got %s no containing %q", string(body), config.SlotName(config.SLOT_IA)) - } + t.Run(test.name, func(t *testing.T) { + os.Setenv("WAYBACK_STORAGE_DIR", path.Join(t.TempDir(), "reduxer")) + + opts, err := config.NewParser().ParseEnvironmentVariables() + if err != nil { + t.Fatalf("Parse environment variables or flags failed, error: %v", err) + } + + cfg := []pooling.Option{ + pooling.Capacity(opts.PoolingSize()), + pooling.Timeout(opts.WaybackTimeout()), + pooling.MaxRetries(opts.WaybackMaxRetries()), + } + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + + pool := pooling.New(ctx, cfg...) + go pool.Roll() + defer pool.Close() + + pub := publish.New(ctx, opts) + defer pub.Stop() + + web := newWeb(ctx, opts, pool, pub) + web.handle() + + httpClient, mux, server := helper.MockServer() + defer server.Close() + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + web.process(ctx, w, r) + }) + + req, err := http.NewRequest(test.method, server.URL, strings.NewReader(test.data)) + if err != nil { + t.Fatalf("Unexpected new request: %v", err) + } + + req.Header.Add("Content-Type", "application/x-www-form-urlencoded") + resp, err := httpClient.Do(req) + if err != nil { + t.Fatalf("Unexpected response: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != test.status { + t.Fatalf("Unexpected response code got %d instead of %d", resp.StatusCode, test.status) + } + body, err := io.ReadAll(resp.Body) + if err != nil { + t.Fatalf("Unexpected read body: %v", err) + } + if strings.Index(string(body), config.SlotName(config.SLOT_IA)) == 0 { + t.Fatalf("Unexpected wayback results got %s no containing %q", string(body), config.SlotName(config.SLOT_IA)) + } + }) } } diff --git a/service/slack/slack_test.go b/service/slack/slack_test.go index 7ea3e8f5..ae04e2eb 100644 --- a/service/slack/slack_test.go +++ b/service/slack/slack_test.go @@ -18,7 +18,6 @@ import ( "time" "github.com/gorilla/websocket" - // "github.com/gorilla/websocket" // "github.com/slack-go/slack/slackevents" "github.com/slack-go/slack" "github.com/slack-go/slack/slacktest" diff --git a/storage/storage.go b/storage/storage.go index 558eedec..cf028d10 100644 --- a/storage/storage.go +++ b/storage/storage.go @@ -14,6 +14,8 @@ import ( bolt "go.etcd.io/bbolt" ) +var ErrDatabaseNotFound = errors.New("database not found") + // Storage handles all operations related to the database. type Storage struct { db *bolt.DB @@ -38,7 +40,7 @@ func (s *Storage) Close() error { if s.db != nil { return s.db.Close() } - return errors.New("database not found.") + return ErrDatabaseNotFound } func itob(v int) []byte { diff --git a/storage/storage_test.go b/storage/storage_test.go index 673a3e2c..a86736e7 100644 --- a/storage/storage_test.go +++ b/storage/storage_test.go @@ -6,12 +6,64 @@ package storage // import "github.com/wabarc/wayback/storage" import ( "os" - "path/filepath" + "path" + "testing" - "github.com/wabarc/helper" + "github.com/wabarc/wayback/config" ) -func tmpPath() string { - r := helper.RandString(5, "lower") - return filepath.Join(os.TempDir(), r) +func TestOpen(t *testing.T) { + tests := []struct { + name string + path string + }{ + {"empty path", ""}, + {"exist path", "bolt.db"}, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + file := path.Join(t.TempDir(), test.path) + if test.path == "" { + file = test.path + } + + opts, err := config.NewParser().ParseEnvironmentVariables() + if err != nil { + t.Fatalf("Parse environment variables or flags failed, error: %v", err) + } + defer os.Remove(opts.BoltPathname()) + + s, err := Open(opts, file) + if err != nil { + t.Fatalf("failed to open database: %v", err) + } + defer s.db.Close() + + if s == nil { + t.Fatalf("Storage instance is nil") + } + if s.db == nil { + t.Fatalf("bolt.DB instance is nil") + } + }) + } +} + +func TestClose(t *testing.T) { + file := path.Join(t.TempDir(), "bolt.db") + opts := &config.Options{} + s, err := Open(opts, file) + if err != nil { + t.Fatalf("failed to open database: %v", err) + } + + err = s.Close() + if err != nil { + t.Fatalf("failed to close database: %v", err) + } + + if s.db.String() != `DB<"">` { + t.Fatalf("failed to close database: %s", s.db) + } } diff --git a/storage/telegram_test.go b/storage/telegram_test.go index 9c743373..c85f2c45 100644 --- a/storage/telegram_test.go +++ b/storage/telegram_test.go @@ -5,7 +5,7 @@ package storage // import "github.com/wabarc/wayback/storage" import ( - "os" + "path" "testing" "github.com/wabarc/wayback/config" @@ -13,16 +13,13 @@ import ( ) func TestCreatePlayback(t *testing.T) { - dbpath := tmpPath() - defer os.Remove(dbpath) - parser := config.NewParser() opts, err := parser.ParseEnvironmentVariables() if err != nil { t.Fatalf("Parse environment variables or flags failed, error: %v", err) } - s, err := Open(opts, dbpath) + s, err := Open(opts, path.Join(t.TempDir(), "wayback.db")) if err != nil { t.Fatalf("Unexpected open a bolt db: %v", err) } @@ -36,16 +33,13 @@ func TestCreatePlayback(t *testing.T) { } func TestPlayback(t *testing.T) { - dbpath := tmpPath() - defer os.Remove(dbpath) - parser := config.NewParser() opts, err := parser.ParseEnvironmentVariables() if err != nil { t.Fatalf("Parse environment variables or flags failed, error: %v", err) } - s, err := Open(opts, dbpath) + s, err := Open(opts, path.Join(t.TempDir(), "wayback.db")) if err != nil { t.Fatalf("Unexpected open a bolt db: %v", err) } @@ -71,16 +65,13 @@ func TestPlayback(t *testing.T) { } func TestRemovePlayback(t *testing.T) { - dbpath := tmpPath() - defer os.Remove(dbpath) - parser := config.NewParser() opts, err := parser.ParseEnvironmentVariables() if err != nil { t.Fatalf("Parse environment variables or flags failed, error: %v", err) } - s, err := Open(opts, dbpath) + s, err := Open(opts, path.Join(t.TempDir(), "wayback.db")) if err != nil { t.Fatalf("Unexpected open a bolt db: %v", err) } diff --git a/systemd/systemd_test.go b/systemd/systemd_test.go index 7f6144ef..4040cf01 100644 --- a/systemd/systemd_test.go +++ b/systemd/systemd_test.go @@ -9,18 +9,13 @@ package systemd // import "github.com/wabarc/wayback/systemd" import ( "fmt" - "io/ioutil" "net" "os" "testing" ) func TestSdNotify(t *testing.T) { - testDir, err := ioutil.TempDir("", "test-") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(testDir) + testDir := t.TempDir() notifySocket := testDir + "/notify-socket.sock" laddr := net.UnixAddr{ diff --git a/wayback.go b/wayback.go index 33576367..d5e2f769 100644 --- a/wayback.go +++ b/wayback.go @@ -7,9 +7,11 @@ package wayback // import "github.com/wabarc/wayback" import ( "context" "fmt" + "net/http" "net/url" "os" "sync" + "time" "github.com/wabarc/logger" "github.com/wabarc/playback" @@ -27,6 +29,9 @@ import ( pinner "github.com/wabarc/ipfs-pinner" ) +// TODO: find a better way to handle it +var client = &http.Client{Timeout: 30 * time.Second} + // Collect results that archived, Arc is name of the archive service, // Dst mapping the original URL and archived destination URL, // Ext is extra descriptions. @@ -143,7 +148,7 @@ func (i IP) Wayback(rdx reduxer.Reduxer) string { // Wayback implements the standard Waybacker interface: // it reads URL from the PH and returns archived URL as a string. func (i PH) Wayback(rdx reduxer.Reduxer) string { - arc := &ph.Archiver{} + arc := ph.New(client) uri := i.URL.String() ctx := i.ctx