From 71fad0cfd81568c8adb2bdab3555fa6a7c55d017 Mon Sep 17 00:00:00 2001 From: Mo Kweon Date: Wed, 23 Jun 2021 20:33:04 -0700 Subject: [PATCH] feat: parallelize fetchers and update database (#118) update database chore: disable funlen lint --- .golangci.yaml | 1 + metadata-manager/cmd/genmetadata.go | 38 +- server/internal/data/database.pbtxt | 31855 +++++++++++++------------- 3 files changed, 16070 insertions(+), 15824 deletions(-) diff --git a/.golangci.yaml b/.golangci.yaml index 48c74380..6099c5bf 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -29,3 +29,4 @@ linters: - maligned - gomoddirectives - wrapcheck + - funlen diff --git a/metadata-manager/cmd/genmetadata.go b/metadata-manager/cmd/genmetadata.go index d29394b4..b3bbc2dd 100644 --- a/metadata-manager/cmd/genmetadata.go +++ b/metadata-manager/cmd/genmetadata.go @@ -22,6 +22,7 @@ const ( envNameYouTubeAPIKey = "YOUTUBE_API_KEY" envNameMappingFile = "MAPPING_FILE" envNameDatabaseOutFile = "DATABASE_OUT_FILE" + envNameWorkers = "WORKERS" ) // genMetaCmd represents the gen-meta command. @@ -77,6 +78,7 @@ func fetchArxivPapersInfo(paperArxivIDs []string) []*pr12er.Paper { var pr12erPapers []*pr12er.Paper for _, arxivID := range paperArxivIDs { + log.WithField("arxivID", arxivID).Info("processing a paper") params := paperswithcode_go.PaperListParamsDefault() params.ArxivID = arxivID papers, err := c.PaperList(params) @@ -122,6 +124,7 @@ func fetchArxivPapersInfo(paperArxivIDs []string) []*pr12er.Paper { } func fetchYouTubeVideoInfo(videoID string, apiKey string) *pr12er.YouTubeVideo { + log.WithField("videoID", videoID).Info("fetching YouTube video info") // api info: https://developers.google.com/youtube/v3/docs/videos/list // using package: https://pkg.go.dev/google.golang.org/api/youtube/v3 // using API example: https://bit.ly/3dfFQPd @@ -169,11 +172,13 @@ func generateMetadata(cmd *cobra.Command, args []string) error { apiKey := viper.GetString(envNameYouTubeAPIKey) mappingFile := viper.GetString(envNameMappingFile) databaseOutFile := viper.GetString(envNameDatabaseOutFile) + workers := viper.GetInt(envNameWorkers) log.WithFields(log.Fields{ envNameMappingFile: mappingFile, envNameYouTubeAPIKey: apiKey, envNameDatabaseOutFile: databaseOutFile, + envNameWorkers: workers, }).WithField(envNameYouTubeAPIKey, apiKey).Info("bind variables") // read file and unmarshal mapping file @@ -191,11 +196,35 @@ func generateMetadata(cmd *cobra.Command, args []string) error { database := &pr12er.Database{ PrIdToVideo: make(map[int32]*pr12er.PrVideo), } - for _, prRow := range mappingTable.Rows { - database.PrIdToVideo[prRow.PrId] = fetchPrVideo(prRow, apiKey) + + in := make(chan *pr12er.MappingTableRow, len(mappingTable.GetRows())) + out := make(chan *pr12er.PrVideo, len(mappingTable.GetRows())) + + for w := 0; w < workers; w++ { + go func(id int, in <-chan *pr12er.MappingTableRow, out chan<- *pr12er.PrVideo) { + for row := range in { + out <- fetchPrVideo(row, apiKey) + } + }(w, in, out) + } + + for _, prRow := range mappingTable.GetRows() { + in <- prRow } - bs, err := prototext.Marshal(database) + close(in) + + for range mappingTable.GetRows() { + prVideo := <-out + database.PrIdToVideo[prVideo.GetPrId()] = prVideo + } + + close(out) + + bs, err := prototext.MarshalOptions{ + Multiline: true, + Indent: " ", + }.Marshal(database) if err != nil { return err } @@ -227,4 +256,7 @@ func init() { "../server/internal/data/database.pbtxt", "Filepath to write database.pbtxt") _ = viper.BindPFlag(envNameDatabaseOutFile, genMetaCmd.PersistentFlags().Lookup("database-out-file")) + + genMetaCmd.PersistentFlags().Int("workers", 10, "The number of workers to use for fetching") + _ = viper.BindPFlag(envNameWorkers, genMetaCmd.PersistentFlags().Lookup("workers")) } diff --git a/server/internal/data/database.pbtxt b/server/internal/data/database.pbtxt index 2cc4a9e5..c6586291 100644 --- a/server/internal/data/database.pbtxt +++ b/server/internal/data/database.pbtxt @@ -1,15824 +1,16037 @@ -pr_id_to_video: { - key: 1 - value: { - papers: { - paper_id: "generative-adversarial-networks" - title: "Generative Adversarial Networks" - arxiv_id: "1406.2661" - abstract: "We propose a new framework for estimating generative models via an adversarial process, in which we simultaneously train two models: a generative model G that captures the data distribution, and a discriminative model D that estimates the probability that a sample came from the training data rather than G. The training procedure for G is to maximize the probability of D making a mistake. This framework corresponds to a minimax two-player game. In the space of arbitrary functions G and D, a unique solution exists, with G recovering the training data distribution and D equal to 1/2 everywhere. In the case where G and D are defined by multilayer perceptrons, the entire system can be trained with backpropagation. There is no need for any Markov chains or unrolled approximate inference networks during either training or generation of samples. Experiments demonstrate the potential of the framework through qualitative and quantitative evaluation of the generated samples." - pub_date: { - seconds: 1402358400 - } - authors: "Ian J. Goodfellow" - authors: "Jean Pouget-Abadie" - authors: "Mehdi Mirza" - authors: "Bing Xu" - authors: "David Warde-Farley" - authors: "Sherjil Ozair" - authors: "Aaron Courville" - authors: "Yoshua Bengio" - repositories: { - url: "https://github.com/jskDr/keraspp_2021" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/JaryV/CycleGAN_OldYoung" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/asiltureli/gan-in-colab" - framework: FRAMEWORK_PYTORCH - description: "GAN implementations on Google Colab" - } - repositories: { - url: "https://github.com/rohitkuk/AnimeGAN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 17 - description: "Generating Anime Images by Implementing Deep Convolutional Generative Adversarial Networks paper " - } - repositories: { - url: "https://github.com/ddehueck/pytorch-GAN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "PyTorch implementation of the original GAN paper by Goodfellow et al." - } - repositories: { - url: "https://github.com/roberttwomey/machine-imagination-workshop" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "text to image notebook with CLIP for workshop on Machine Imagination, Spring 2021" - } - repositories: { - url: "https://github.com/MaximeVandegar/Papers-in-100-Lines-of-Code" - framework: FRAMEWORK_PYTORCH - number_of_stars: 11 - description: "Implementation of papers in 100 lines of code." - } - repositories: { - url: "https://github.com/dhrim/andong_2021" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - } - repositories: { - url: "https://github.com/lab-ml/annotated_deep_learning_paper_implementations/tree/master/labml_nn/gan/original" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3053 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." - } - repositories: { - url: "https://github.com/jhKessler/Progressively-Growing-Generative-Adverserial-Network" - framework: FRAMEWORK_PYTORCH - description: "Generative Adverserial Network for Image Generation" - } - methods: { - name: "GAN" - full_name: "Generative Adversarial Network" - description: "A **GAN**, or **Generative Adversarial Network**, is a generative model that simultaneously trains\r\ntwo models: a generative model $G$ that captures the data distribution, and a discriminative model $D$ that estimates the\r\nprobability that a sample came from the training data rather than $G$.\r\n\r\nThe training procedure for $G$ is to maximize the probability of $D$ making\r\na mistake. This framework corresponds to a minimax two-player game. In the\r\nspace of arbitrary functions $G$ and $D$, a unique solution exists, with $G$\r\nrecovering the training data distribution and $D$ equal to $\\frac{1}{2}$\r\neverywhere. In the case where $G$ and $D$ are defined by multilayer perceptrons,\r\nthe entire system can be trained with backpropagation. \r\n\r\n(Image Source: [here](http://www.kdnuggets.com/2017/01/generative-adversarial-networks-hot-topic-machine-learning.html))" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - } - video: { - video_id: "L3hz57whyNw" - video_title: "PR-001: Generative adversarial nets by Jaejun Yoo (2017/4/13)" - number_of_likes: 255 - number_of_views: 34421 - published_date: { - seconds: 1492839397 - } - uploader: "Sung Kim" - } - } -} -pr_id_to_video: { - key: 2 - value: { - papers: { - paper_id: "deformable-convolutional-networks" - title: "Deformable Convolutional Networks" - arxiv_id: "1703.06211" - abstract: "Convolutional neural networks (CNNs) are inherently limited to model\ngeometric transformations due to the fixed geometric structures in its building\nmodules. In this work, we introduce two new modules to enhance the\ntransformation modeling capacity of CNNs, namely, deformable convolution and\ndeformable RoI pooling. Both are based on the idea of augmenting the spatial\nsampling locations in the modules with additional offsets and learning the\noffsets from target tasks, without additional supervision. The new modules can\nreadily replace their plain counterparts in existing CNNs and can be easily\ntrained end-to-end by standard back-propagation, giving rise to deformable\nconvolutional networks. Extensive experiments validate the effectiveness of our\napproach on sophisticated vision tasks of object detection and semantic\nsegmentation. The code would be released." - pub_date: { - seconds: 1489708800 - } - authors: "Jifeng Dai" - authors: "Haozhi Qi" - authors: "Yuwen Xiong" - authors: "Yi Li" - authors: "Guodong Zhang" - authors: "Han Hu" - authors: "Yichen Wei" - repositories: { - url: "https://github.com/ximilar-com/xcenternet" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 133 - description: "Fast anchor free Object Detection based on CenterNet (Objects As Points) and TTFNet (Training-Time-Friendly Network). Implemented in TensorFlow 2.4+." - } - repositories: { - url: "https://github.com/esw0116/DynaVSR" - framework: FRAMEWORK_PYTORCH - number_of_stars: 40 - description: "DynaVSR: Dynamic Adaptive Blind VideoSuper-Resolution" - } - repositories: { - url: "https://github.com/bkvie/Locally-Consistent-Deformable-Convolution" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "Locally Consistent Deformable Convolution as part of deformable flow" - } - repositories: { - url: "https://github.com/zhusiling/EDVR" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - } - repositories: { - url: "https://github.com/TangDL/DCN" - framework: FRAMEWORK_TENSORFLOW - description: "DCN" - } - repositories: { - url: "https://github.com/tianhai123/deform-conv" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - } - repositories: { - url: "https://github.com/necla-ml/Deformable-ConvNets-py3" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Forked Deformable ConvNets for Python 3" - } - repositories: { - is_official: true - url: "https://github.com/msracver/Deformable-ConvNets" - framework: FRAMEWORK_OTHERS - number_of_stars: 3526 - description: "Deformable Convolutional Networks" - } - repositories: { - url: "https://github.com/NVIDIAAICITYCHALLENGE/AICity_Team6_ISU" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 20 - description: "Source code and code description of Team6_ISU for NVIDIA AICity Challenge 2017 track 1" - } - repositories: { - url: "https://github.com/qilei123/fpn_crop" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - methods: { - name: "Kaiming Initialization" - full_name: "Kaiming Initialization" - description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." - } - methods: { - name: "Batch Normalization" - full_name: "Batch Normalization" - description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." - } - methods: { - name: "Deformable RoI Pooling" - full_name: "Deformable RoI Pooling" - description: "**Deformable RoI Pooling** adds an offset to each bin position in the regular bin partition of the RoI Pooling. Similarly, the offsets are learned from the preceding feature maps and the RoIs, enabling adaptive part localization for objects with different shapes." - } - methods: { - name: "ResNet" - full_name: "Residual Network" - description: "**Residual Networks**, or **ResNets**, learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. Instead of hoping each few stacked layers directly fit a desired underlying mapping, residual nets let these layers fit a residual mapping. They stack [residual blocks](https://paperswithcode.com/method/residual-block) ontop of each other to form network: e.g. a ResNet-50 has fifty layers using these blocks. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}(x)$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}(x):=\\mathcal{H}(x)-x$. The original mapping is recast into $\\mathcal{F}(x)+x$.\r\n\r\nThere is empirical evidence that these types of network are easier to optimize, and can gain accuracy from considerably increased depth." - } - methods: { - name: "Average Pooling" - full_name: "Average Pooling" - description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" - } - methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." - } - methods: { - name: "Deformable Position-Sensitive RoI Pooling" - full_name: "Deformable Position-Sensitive RoI Pooling" - description: "**Deformable Position-Sensitive RoI Pooling** is similar to PS RoI Pooling but it adds an offset to each bin position in the regular bin partition. Offset learning follows the “fully convolutional” spirit. In the top branch, a convolutional layer generates the full spatial resolution offset fields. For each RoI (also for each class), PS RoI pooling is applied on such fields to obtain normalized offsets, which are then transformed to the real offsets, in the same way as in deformable RoI pooling." - } - methods: { - name: "Deformable Convolution" - full_name: "Deformable Convolution" - description: "**Deformable convolutions** add 2D offsets to the regular grid sampling locations in the standard convolution. It enables free form deformation of the sampling grid. The offsets are learned from the preceding feature maps, via additional convolutional layers. Thus, the deformation is conditioned on the input features in a local, dense, and adaptive manner." - } - } - video: { - video_id: "RRwaz0fBQ0Y" - video_title: "PR-002: Deformable Convolutional Networks (2017)" - number_of_likes: 110 - number_of_views: 14404 - published_date: { - seconds: 1492352642 - } - uploader: "Terry TaeWoong Um" - } - } -} -pr_id_to_video: { - key: 3 - value: { - papers: { - paper_id: "learning-phrase-representations-using-rnn" - title: "Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation" - arxiv_id: "1406.1078" - abstract: "In this paper, we propose a novel neural network model called RNN\nEncoder-Decoder that consists of two recurrent neural networks (RNN). One RNN\nencodes a sequence of symbols into a fixed-length vector representation, and\nthe other decodes the representation into another sequence of symbols. The\nencoder and decoder of the proposed model are jointly trained to maximize the\nconditional probability of a target sequence given a source sequence. The\nperformance of a statistical machine translation system is empirically found to\nimprove by using the conditional probabilities of phrase pairs computed by the\nRNN Encoder-Decoder as an additional feature in the existing log-linear model.\nQualitatively, we show that the proposed model learns a semantically and\nsyntactically meaningful representation of linguistic phrases." - pub_date: { - seconds: 1401753600 - } - authors: "Kyunghyun Cho" - authors: "Bart van Merrienboer" - authors: "Caglar Gulcehre" - authors: "Dzmitry Bahdanau" - authors: "Fethi Bougares" - authors: "Holger Schwenk" - authors: "Yoshua Bengio" - repositories: { - url: "https://github.com/roomylee/rnn-text-classification-tf" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 115 - description: "Tensorflow Implementation of Recurrent Neural Network (Vanilla, LSTM, GRU) for Text Classification" - } - repositories: { - url: "https://github.com/dewanderelex/LanguageTranslation" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/magahub/songrnn" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/starry91/NMT-Lab" - framework: FRAMEWORK_OTHERS - description: "Implementation of Neural machine translation papers" - } - repositories: { - url: "https://github.com/munir-bd/Korean-POS-Tagger-LSTM" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/trevor-richardson/rnn_zoo" - framework: FRAMEWORK_PYTORCH - number_of_stars: 9 - description: "This repository tests various recurrent neural network architectures on baseline datasets SeqMNIST and pMNIST." - } - repositories: { - url: "https://github.com/Avmb/lowrank-gru" - framework: FRAMEWORK_OTHERS - number_of_stars: 34 - description: "Gated Recurrent Unit with Low-rank matrix factorization" - } - repositories: { - url: "https://github.com/mp2893/gram" - framework: FRAMEWORK_OTHERS - number_of_stars: 197 - description: "Graph-based Attention Model" - } - repositories: { - url: "https://github.com/farizrahman4u/seq2seq" - framework: FRAMEWORK_OTHERS - number_of_stars: 3077 - description: "Sequence to Sequence Learning with Keras" - } - repositories: { - url: "https://github.com/littleflow3r/Sequence_to_sequence_learning_for_machine_translation" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "Pytorch implementation of several seq2seq models (Machine translation task, Japanese-English)" - } - methods: { - name: "GRU" - full_name: "Gated Recurrent Unit" - description: "A **Gated Recurrent Unit**, or **GRU**, is a type of recurrent neural network. It is similar to an [LSTM](https://paperswithcode.com/method/lstm), but only has two gates - a reset gate and an update gate - and notably lacks an output gate. Fewer parameters means GRUs are generally easier/faster to train than their LSTM counterparts.\r\n\r\nImage Source: [here](https://www.google.com/url?sa=i&url=https%3A%2F%2Fcommons.wikimedia.org%2Fwiki%2FFile%3AGated_Recurrent_Unit%2C_type_1.svg&psig=AOvVaw3EmNX8QXC5hvyxeenmJIUn&ust=1590332062671000&source=images&cd=vfe&ved=0CA0QjhxqFwoTCMiev9-eyukCFQAAAAAdAAAAABAR)" - } - } - video: { - video_id: "_Dp8u97_rQ0" - video_title: "PR-003:Learning phrase representations using RNN encoder-decoder for statistical machine translation" - number_of_likes: 34 - number_of_views: 6320 - published_date: { - seconds: 1495764575 - } - uploader: "keun bong Kwak" - } - } -} -pr_id_to_video: { - key: 4 - value: { - papers: { - paper_id: "image-super-resolution-using-deep" - title: "Image Super-Resolution Using Deep Convolutional Networks" - arxiv_id: "1501.00092" - abstract: "We propose a deep learning method for single image super-resolution (SR). Our\nmethod directly learns an end-to-end mapping between the low/high-resolution\nimages. The mapping is represented as a deep convolutional neural network (CNN)\nthat takes the low-resolution image as the input and outputs the\nhigh-resolution one. We further show that traditional sparse-coding-based SR\nmethods can also be viewed as a deep convolutional network. But unlike\ntraditional methods that handle each component separately, our method jointly\noptimizes all layers. Our deep CNN has a lightweight structure, yet\ndemonstrates state-of-the-art restoration quality, and achieves fast speed for\npractical on-line usage. We explore different network structures and parameter\nsettings to achieve trade-offs between performance and speed. Moreover, we\nextend our network to cope with three color channels simultaneously, and show\nbetter overall reconstruction quality." - pub_date: { - seconds: 1419984000 - } - authors: "Chao Dong" - authors: "Chen Change Loy" - authors: "Kaiming He" - authors: "Xiaoou Tang" - repositories: { - url: "https://github.com/aba450/Super-Resolution" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/shreeyashyende/better_img_res_with_SRCNN" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - } - repositories: { - url: "https://github.com/mukul1093/Image-Super-Resolution" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/jaivanti/Super-Resolution-using-ConvNet" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Reconstructing a high resolution photo-realistic image from its counterpart low resolution image has been a long challenging task in the fraternity of computer vision. This task becomes even more difficult when all you have is a single low resolution image as input to recreate its high resolution image. This can be done using Convolution Neural Networks." - } - repositories: { - url: "https://github.com/Amritha16/ImageResolutionEnhancement" - framework: FRAMEWORK_OTHERS - description: "A python implementation of https://arxiv.org/pdf/1501.00092.pdf" - } - repositories: { - url: "https://github.com/amzamzamzamz/nagadomi-waifu2x" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/vpaliwal1/Deep_learning_SRCNN" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/xgd/waifu2xx" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/Weifeng73/Zero-Shot-Super-resolution" - framework: FRAMEWORK_OTHERS - description: "Computer Vision Course 2019 Final Project in ZJU " - } - repositories: { - url: "https://github.com/ferseiti/reproducibility" - framework: FRAMEWORK_TENSORFLOW - } - } - video: { - video_id: "1jGr_OFyfa0" - video_title: "PR-004: Image Super-Resolution Using Deep Convolutional Networks" - number_of_likes: 64 - number_of_views: 9821 - published_date: { - seconds: 1492956744 - } - uploader: "Taegyun Jeon" - } - } -} -pr_id_to_video: { - key: 5 - value: { - papers: { - paper_id: "playing-atari-with-deep-reinforcement" - title: "Playing Atari with Deep Reinforcement Learning" - arxiv_id: "1312.5602" - abstract: "We present the first deep learning model to successfully learn control\npolicies directly from high-dimensional sensory input using reinforcement\nlearning. The model is a convolutional neural network, trained with a variant\nof Q-learning, whose input is raw pixels and whose output is a value function\nestimating future rewards. We apply our method to seven Atari 2600 games from\nthe Arcade Learning Environment, with no adjustment of the architecture or\nlearning algorithm. We find that it outperforms all previous approaches on six\nof the games and surpasses a human expert on three of them." - pub_date: { - seconds: 1387411200 - } - authors: "Volodymyr Mnih" - authors: "Koray Kavukcuoglu" - authors: "David Silver" - authors: "Alex Graves" - authors: "Ioannis Antonoglou" - authors: "Daan Wierstra" - authors: "Martin Riedmiller" - repositories: { - url: "https://github.com/datamllab/rlcard" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1173 - description: "Reinforcement Learning / AI Bots in Card (Poker) Games - Blackjack, Leduc, Texas, DouDizhu, Mahjong, UNO." - } - repositories: { - url: "https://github.com/TheFebrin/DeepRL-Pong" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Deep Reinforcement Learning bot playing Pong game." - } - repositories: { - url: "https://github.com/rikluost/RL_DQN_Pong" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Tackling Atari 2600 game Pong with Reinforcement Learning by utilizing DQN and TF-Agents" - } - repositories: { - url: "https://github.com/gordicaleksa/pytorch-learn-reinforcement-learning" - framework: FRAMEWORK_PYTORCH - number_of_stars: 70 - description: "A collection of various RL algorithms like policy gradients, DQN and PPO. The goal of this repo will be to make it a go-to resource for learning about RL. How to visualize, debug and solve RL problems. I've additionally included playground.py for learning more about OpenAI gym, etc." - } - repositories: { - url: "https://github.com/Curt-Park/rainbow-is-all-you-need" - framework: FRAMEWORK_OTHERS - number_of_stars: 1012 - description: "Rainbow is all you need! A step-by-step tutorial from DQN to Rainbow" - } - repositories: { - url: "https://github.com/epignatelli/human-level-control-through-deep-reinforcement-learning" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "A jax/stax implementation of: Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A.A., Veness, J., Bellemare, M.G., Graves, A., Riedmiller, M., Fidjeland, A.K., Ostrovski, G. and Petersen, S., 2015. Human-level control through deep reinforcement learning. nature, 518(7540), pp.529-533." - } - repositories: { - url: "https://github.com/rishavb123/MineRL" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - } - repositories: { - url: "https://github.com/eddynelson/dqn" - framework: FRAMEWORK_TENSORFLOW - description: "Deep Q-Networks Implementation with tensorflow 2.x" - } - repositories: { - url: "https://github.com/ktkachuk/Atari-with-Q-Learning" - framework: FRAMEWORK_TENSORFLOW - description: "This notebook shows and explains the implementation of a Reinforcement Learning agent which plays the Atari game Breakout. The agent was trained with Q-Learning." - } - repositories: { - url: "https://github.com/lab-ml/nn" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3055 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "DQN" - full_name: "Deep Q-Network" - description: "A **DQN**, or Deep Q-Network, approximates a state-value function in a [Q-Learning](https://paperswithcode.com/method/q-learning) framework with a neural network. In the Atari Games case, they take in several frames of the game as an input and output state values for each action as an output. \r\n\r\nIt is usually used in conjunction with Experience Replay, for storing the episode steps in memory for off-policy learning, where samples are drawn from the replay memory at random. Additionally, the Q-Network is usually optimized towards a frozen target network that is periodically updated with the latest weights every $k$ steps (where $k$ is a hyperparameter). The latter makes training more stable by preventing short-term oscillations from a moving target. The former tackles autocorrelation that would occur from on-line learning, and having a replay memory makes the problem more like a supervised learning problem.\r\n\r\nImage Source: [here](https://www.researchgate.net/publication/319643003_Autonomous_Quadrotor_Landing_using_Deep_Reinforcement_Learning)" - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "Q-Learning" - full_name: "Q-Learning" - description: "**Q-Learning** is an off-policy temporal difference control algorithm:\r\n\r\n$$Q\\left(S\\_{t}, A\\_{t}\\right) \\leftarrow Q\\left(S\\_{t}, A\\_{t}\\right) + \\alpha\\left[R_{t+1} + \\gamma\\max\\_{a}Q\\left(S\\_{t+1}, a\\right) - Q\\left(S\\_{t}, A\\_{t}\\right)\\right] $$\r\n\r\nThe learned action-value function $Q$ directly approximates $q\\_{*}$, the optimal action-value function, independent of the policy being followed.\r\n\r\nSource: Sutton and Barto, Reinforcement Learning, 2nd Edition" - } - methods: { - name: "Epsilon Greedy Exploration" - full_name: "Epsilon Greedy Exploration" - description: "**$\\epsilon$-Greedy Exploration** is an exploration strategy in reinforcement learning that takes an exploratory action with probability $\\epsilon$ and a greedy action with probability $1-\\epsilon$. It tackles the exploration-exploitation tradeoff with reinforcement learning algorithms: the desire to explore the state space with the desire to seek an optimal policy. Despite its simplicity, it is still commonly used as an behaviour policy $\\pi$ in several state-of-the-art reinforcement learning models.\r\n\r\nImage Credit: [Robin van Embden](https://cran.r-project.org/web/packages/contextual/vignettes/sutton_barto.html)" - } - methods: { - name: "Experience Replay" - full_name: "Experience Replay" - description: "**Experience Replay** is a replay memory technique used in reinforcement learning where we store the agent’s experiences at each time-step, $e\\_{t} = \\left(s\\_{t}, a\\_{t}, r\\_{t}, s\\_{t+1}\\right)$ in a data-set $D = e\\_{1}, \\cdots, e\\_{N}$ , pooled over many episodes into a replay memory. We then usually sample the memory randomly for a minibatch of experience, and use this to learn off-policy, as with Deep Q-Networks. This tackles the problem of autocorrelation leading to unstable training, by making the problem more like a supervised learning problem.\r\n\r\nImage Credit: [Hands-On Reinforcement Learning with Python, Sudharsan Ravichandiran](https://subscription.packtpub.com/book/big_data_and_business_intelligence/9781788836524)" - } - } - video: { - video_id: "V7_cNTfm2i8" - video_title: "PR-005: Playing Atari with Deep Reinforcement Learning (NIPS 2013 Deep Learning Workshop)" - number_of_likes: 53 - number_of_views: 8181 - published_date: { - seconds: 1494165820 - } - uploader: "Sung Kim" - } - } -} -pr_id_to_video: { - key: 6 - value: { - papers: { - paper_id: "neural-turing-machines" - title: "Neural Turing Machines" - arxiv_id: "1410.5401" - abstract: "We extend the capabilities of neural networks by coupling them to external\nmemory resources, which they can interact with by attentional processes. The\ncombined system is analogous to a Turing Machine or Von Neumann architecture\nbut is differentiable end-to-end, allowing it to be efficiently trained with\ngradient descent. Preliminary results demonstrate that Neural Turing Machines\ncan infer simple algorithms such as copying, sorting, and associative recall\nfrom input and output examples." - pub_date: { - seconds: 1413763200 - } - authors: "Alex Graves" - authors: "Greg Wayne" - authors: "Ivo Danihelka" - repositories: { - url: "https://github.com/dgedon/lightning-ntm" - framework: FRAMEWORK_PYTORCH - description: "PyTorch Lightning implementation of Neural Turing Machine (NTM)." - } - repositories: { - url: "https://github.com/theneuralbeing/ntm" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "A PyTorch Implementation of Neural Turing Machine" - } - repositories: { - url: "https://github.com/mdabagia/NeuralTuringMachine" - framework: FRAMEWORK_PYTORCH - description: "PyTorch implementation of the neural Turing machine architecture" - } - repositories: { - url: "https://github.com/rs9000/Neural-Turing-machine" - framework: FRAMEWORK_PYTORCH - description: "NTM in PyTorch" - } - repositories: { - url: "https://github.com/shanyaanand/ntm" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/camigord/Neural-Turing-Machine" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 22 - description: "TensorFlow implementation of a Neural Turing Machine" - } - repositories: { - url: "https://github.com/loudinthecloud/pytorch-ntm" - framework: FRAMEWORK_PYTORCH - number_of_stars: 468 - description: "Neural Turing Machines (NTM) - PyTorch Implementation" - } - repositories: { - url: "https://github.com/adityagilra/archibrain" - framework: FRAMEWORK_PYTORCH - number_of_stars: 9 - description: "Synthesize bio-plausible neural networks for cognitive tasks, mimicking brain architecture" - } - repositories: { - url: "https://github.com/MarkPKCollier/NeuralTuringMachine" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 559 - description: "Tensorflow implementation of a Neural Turing Machine" - } - repositories: { - url: "https://github.com/jingweiz/pytorch-dnc" - framework: FRAMEWORK_PYTORCH - number_of_stars: 266 - description: "Neural Turing Machine (NTM) & Differentiable Neural Computer (DNC) with pytorch & visdom" - } - methods: { - name: "Content-based Attention" - full_name: "Content-based Attention" - description: "**Content-based attention** is an attention mechanism based on cosine similarity:\r\n\r\n$$f_{att}\\left(\\textbf{h}_{i}, \\textbf{s}\\_{j}\\right) = \\cos\\left[\\textbf{h}\\_{i};\\textbf{s}\\_{j}\\right] $$\r\n\r\nIt was utilised in [Neural Turing Machines](https://paperswithcode.com/method/neural-turing-machine) as part of the Addressing Mechanism.\r\n\r\nWe produce a normalized attention weighting by taking a softmax over these attention alignment scores." - } - methods: { - name: "Tanh Activation" - full_name: "Tanh Activation" - description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" - } - methods: { - name: "LSTM" - full_name: "Long Short-Term Memory" - description: "An **LSTM** is a type of [recurrent neural network](https://paperswithcode.com/methods/category/recurrent-neural-networks) that addresses the vanishing gradient problem in vanilla RNNs through additional cells, input and output gates. Intuitively, vanishing gradients are solved through additional *additive* components, and forget gate activations, that allow the gradients to flow through the network without vanishing as quickly.\r\n\r\n(Image Source [here](https://medium.com/datadriveninvestor/how-do-lstm-networks-solve-the-problem-of-vanishing-gradients-a6784971a577))\r\n\r\n(Introduced by Hochreiter and Schmidhuber)" - } - methods: { - name: "Neural Turing Machine" - full_name: "Neural Turing Machine" - description: "A **Neural Turing Machine** is a working memory neural network model. It couples a neural network architecture with external memory resources. The whole architecture is differentiable end-to-end with gradient descent. The models can infer tasks such as copying, sorting and associative recall.\r\n\r\nA Neural Turing Machine (NTM) architecture contains two basic components: a neural\r\nnetwork controller and a memory bank. The Figure presents a high-level diagram of the NTM\r\narchitecture. Like most neural networks, the controller interacts with the external world via\r\ninput and output vectors. Unlike a standard network, it also interacts with a memory matrix\r\nusing selective read and write operations. By analogy to the Turing machine we refer to the\r\nnetwork outputs that parameterise these operations as “heads.”\r\n\r\nEvery component of the architecture is differentiable. This is achieved by defining 'blurry' read and write operations that interact to a greater or lesser degree with all the elements in memory (rather\r\nthan addressing a single element, as in a normal Turing machine or digital computer). The\r\ndegree of blurriness is determined by an attentional “focus” mechanism that constrains each\r\nread and write operation to interact with a small portion of the memory, while ignoring the\r\nrest. Because interaction with the memory is highly sparse, the NTM is biased towards\r\nstoring data without interference. The memory location brought into attentional focus is\r\ndetermined by specialised outputs emitted by the heads. These outputs define a normalised\r\nweighting over the rows in the memory matrix (referred to as memory “locations”). Each\r\nweighting, one per read or write head, defines the degree to which the head reads or writes\r\nat each location. A head can thereby attend sharply to the memory at a single location or\r\nweakly to the memory at many locations" - } - methods: { - name: "Sigmoid Activation" - full_name: "Sigmoid Activation" - description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." - } - } - video: { - video_id: "2wbDiZCWQtY" - video_title: "PR-006: Neural Turing Machine" - number_of_likes: 41 - number_of_views: 5053 - published_date: { - seconds: 1494447474 - } - uploader: "Suh Kiho" - } - } -} -pr_id_to_video: { - key: 7 - value: { - papers: { - paper_id: "deep-photo-style-transfer" - title: "Deep Photo Style Transfer" - arxiv_id: "1703.07511" - abstract: "This paper introduces a deep-learning approach to photographic style transfer\nthat handles a large variety of image content while faithfully transferring the\nreference style. Our approach builds upon the recent work on painterly transfer\nthat separates style from the content of an image by considering different\nlayers of a neural network. However, as is, this approach is not suitable for\nphotorealistic style transfer. Even when both the input and reference images\nare photographs, the output still exhibits distortions reminiscent of a\npainting. Our contribution is to constrain the transformation from the input to\nthe output to be locally affine in colorspace, and to express this constraint\nas a custom fully differentiable energy term. We show that this approach\nsuccessfully suppresses distortion and yields satisfying photorealistic style\ntransfers in a broad variety of scenarios, including transfer of the time of\nday, weather, season, and artistic edits." - pub_date: { - seconds: 1490140800 - } - authors: "Fujun Luan" - authors: "Sylvain Paris" - authors: "Eli Shechtman" - authors: "Kavita Bala" - repositories: { - url: "https://github.com/YooJiHyeong/SinIR" - framework: FRAMEWORK_PYTORCH - number_of_stars: 26 - description: "Official implementation of \"SinIR: Efficient General Image Manipulation with Single Image Reconstruction\" (ICML 2021)" - } - repositories: { - url: "https://github.com/EvanLi/Github-Ranking" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 456 - description: ":star:Github Ranking:star: Github stars and forks ranking list. Github Top100 stars list of different languages. Automatically update daily. | Github仓库排名,每日自动更新" - } - repositories: { - url: "https://github.com/EvanLi/github-most-stars-forks" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 457 - description: ":star:Github Ranking:star: Github stars and forks ranking list. Github Top100 stars list of different languages. Automatically update daily. | Github仓库排名,每日自动更新" - } - repositories: { - url: "https://github.com/LouieYang/deep-photo-styletransfer-tf" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 772 - description: "Tensorflow (Python API) implementation of Deep Photo Style Transfer" - } - repositories: { - url: "https://github.com/fatihky/starred" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5 - } - repositories: { - url: "https://github.com/alexanderivanov2424/CSCI-1430-Final-Project" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - } - repositories: { - url: "https://github.com/ucsd-dsc-arts/dsc160-final-dsc160_final_group4" - framework: FRAMEWORK_TENSORFLOW - description: "dsc160-final-dsc160_final_group4 created by GitHub Classroom" - } - repositories: { - url: "https://github.com/ritesh2212/DeepPhotoStyle_pytorch-master" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/johnsun03/myTest" - framework: FRAMEWORK_OTHERS - description: "one test" - } - repositories: { - url: "https://github.com/muriloime/awesome-stars" - framework: FRAMEWORK_TENSORFLOW - } - } - video: { - video_id: "YF6nLVDlznE" - video_title: "PR-007: Deep Photo Style Transfer" - number_of_likes: 29 - number_of_views: 5720 - published_date: { - seconds: 1494826006 - } - uploader: "Sung Kim" - } - } -} -pr_id_to_video: { - key: 8 - value: { - papers: { - paper_id: "reverse-classification-accuracy-predicting" - title: "Reverse Classification Accuracy: Predicting Segmentation Performance in the Absence of Ground Truth" - arxiv_id: "1702.03407" - abstract: "When integrating computational tools such as automatic segmentation into\nclinical practice, it is of utmost importance to be able to assess the level of\naccuracy on new data, and in particular, to detect when an automatic method\nfails. However, this is difficult to achieve due to absence of ground truth.\nSegmentation accuracy on clinical data might be different from what is found\nthrough cross-validation because validation data is often used during\nincremental method development, which can lead to overfitting and unrealistic\nperformance expectations. Before deployment, performance is quantified using\ndifferent metrics, for which the predicted segmentation is compared to a\nreference segmentation, often obtained manually by an expert. But little is\nknown about the real performance after deployment when a reference is\nunavailable. In this paper, we introduce the concept of reverse classification\naccuracy (RCA) as a framework for predicting the performance of a segmentation\nmethod on new data. In RCA we take the predicted segmentation from a new image\nto train a reverse classifier which is evaluated on a set of reference images\nwith available ground truth. The hypothesis is that if the predicted\nsegmentation is of good quality, then the reverse classifier will perform well\non at least some of the reference images. We validate our approach on\nmulti-organ segmentation with different classifiers and segmentation methods.\nOur results indicate that it is indeed possible to predict the quality of\nindividual segmentations, in the absence of ground truth. Thus, RCA is ideal\nfor integration into automatic processing pipelines in clinical routine and as\npart of large-scale image analysis studies." - pub_date: { - seconds: 1486771200 - } - authors: "Vanya V. Valindria" - authors: "Ioannis Lavdas" - authors: "Wenjia Bai" - authors: "Konstantinos Kamnitsas" - authors: "Eric O. Aboagye" - authors: "Andrea G. Rockall" - authors: "Daniel Rueckert" - authors: "Ben Glocker" - } - video: { - video_id: "jbnjzyJDldA" - } - } -} -pr_id_to_video: { - key: 9 - value: { - papers: { - paper_id: "distilling-the-knowledge-in-a-neural-network" - title: "Distilling the Knowledge in a Neural Network" - arxiv_id: "1503.02531" - abstract: "A very simple way to improve the performance of almost any machine learning\nalgorithm is to train many different models on the same data and then to\naverage their predictions. Unfortunately, making predictions using a whole\nensemble of models is cumbersome and may be too computationally expensive to\nallow deployment to a large number of users, especially if the individual\nmodels are large neural nets. Caruana and his collaborators have shown that it\nis possible to compress the knowledge in an ensemble into a single model which\nis much easier to deploy and we develop this approach further using a different\ncompression technique. We achieve some surprising results on MNIST and we show\nthat we can significantly improve the acoustic model of a heavily used\ncommercial system by distilling the knowledge in an ensemble of models into a\nsingle model. We also introduce a new type of ensemble composed of one or more\nfull models and many specialist models which learn to distinguish fine-grained\nclasses that the full models confuse. Unlike a mixture of experts, these\nspecialist models can be trained rapidly and in parallel." - pub_date: { - seconds: 1425859200 - } - authors: "Geoffrey Hinton" - authors: "Oriol Vinyals" - authors: "Jeff Dean" - repositories: { - url: "https://github.com/jaychoi12/LG_KD" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "LG paper review QnA session - Knowledge Distillation" - } - repositories: { - url: "https://github.com/yoshitomo-matsubara/torchdistill" - framework: FRAMEWORK_PYTORCH - number_of_stars: 310 - description: "PyTorch-based modular, configuration-driven framework for knowledge distillation. 🏆18 methods presented at CVPR, ICLR, ECCV, NeurIPS, ICCV, etc are implemented so far. 🎁 Trained models, training logs and configurations are available for ensuring the reproducibiliy." - } - repositories: { - url: "https://github.com/franknb/Text-Summarization" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "An experimental repo for testing effective text summarization tools." - } - repositories: { - url: "https://github.com/TakieddineSOUALHI/Transfer_learning" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - } - repositories: { - url: "https://github.com/millenialSpirou/ift6010" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/MasLiang/Learning-without-Forgetting-using-Pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 8 - description: "This is the Pytorch implementation of LwF" - } - repositories: { - url: "https://github.com/KaiyuYue/mgd" - framework: FRAMEWORK_PYTORCH - number_of_stars: 37 - description: "Matching Guided Distillation (ECCV 2020)" - } - repositories: { - url: "https://github.com/see--/speech_recognition" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 19 - description: "This repo contains my part of the code for our winning entry in the TensorFlow Speech Recognition Challenge hosted by kaggle" - } - repositories: { - url: "https://github.com/jpmcd/TensorFlow-KnowledgeDistillation" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 - description: "Knowledge Distillation with CIFAR10" - } - repositories: { - url: "https://github.com/KellyYutongHe/Knowledge-Distillation-Net-with-Swish" - framework: FRAMEWORK_PYTORCH - } - } - video: { - video_id: "tOItokBZSfU" - video_title: "PR-009: Distilling the Knowledge in a Neural Network (Slide: English, Speaking: Korean)" - number_of_likes: 43 - number_of_views: 6456 - published_date: { - seconds: 1495514577 - } - uploader: "Choung young jae" - } - } -} -pr_id_to_video: { - key: 10 - value: { - papers: { - paper_id: "auto-encoding-variational-bayes" - title: "Auto-Encoding Variational Bayes" - arxiv_id: "1312.6114" - abstract: "How can we perform efficient inference and learning in directed probabilistic\nmodels, in the presence of continuous latent variables with intractable\nposterior distributions, and large datasets? We introduce a stochastic\nvariational inference and learning algorithm that scales to large datasets and,\nunder some mild differentiability conditions, even works in the intractable\ncase. Our contributions is two-fold. First, we show that a reparameterization\nof the variational lower bound yields a lower bound estimator that can be\nstraightforwardly optimized using standard stochastic gradient methods. Second,\nwe show that for i.i.d. datasets with continuous latent variables per\ndatapoint, posterior inference can be made especially efficient by fitting an\napproximate inference model (also called a recognition model) to the\nintractable posterior using the proposed lower bound estimator. Theoretical\nadvantages are reflected in experimental results." - pub_date: { - seconds: 1387497600 - } - authors: "Diederik P Kingma" - authors: "Max Welling" - repositories: { - url: "https://github.com/ngiann/ApproximateVI.jl" - framework: FRAMEWORK_OTHERS - description: "Approximate variational inference in Julia" - } - repositories: { - url: "https://github.com/nghorbani/human_body_prior" - framework: FRAMEWORK_PYTORCH - number_of_stars: 290 - description: "VPoser: Variational Human Pose Prior" - } - repositories: { - url: "https://github.com/lanzhang128/disentanglement" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 - } - repositories: { - url: "https://github.com/carbonati/variational-zoo" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 15 - description: "Variational inference and disentangled representations through unsupervised learning" - } - repositories: { - url: "https://github.com/tonystevenj/vae-celeba-pytorch-lightning" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Valinna VAE implemented in pytorch-lightning, trained through Celeba dataset" - } - repositories: { - url: "https://github.com/leokster/CVAE" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/selimseker/logogram-language-generator" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - } - repositories: { - url: "https://github.com/shinshoji01/Style-Restricted_GAN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "This repository is to introduce our model, Style-Restricted GAN." - } - repositories: { - url: "https://github.com/EugenHotaj/pytorch-generative/blob/master/pytorch_generative/models/vae/vae.py" - framework: FRAMEWORK_PYTORCH - number_of_stars: 142 - description: "Easy generative modeling in PyTorch." - } - repositories: { - url: "https://github.com/chandu-97/BayesByBackprop" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "My implementation of Bayes by Backprop(MLP)" - } - methods: { - name: "VAE" - full_name: "Variational Autoencoder" - description: "A **Variational Autoencoder** is a type of likelihood-based generative model. It consists of an encoder, that takes in data $x$ as input and transforms this into a latent representation $z$, and a decoder, that takes a latent representation $z$ and returns a reconstruction $\\hat{x}$. Inference is performed via variational inference to approximate the posterior of the model." - } - methods: { - name: "Stochastic Gradient Variational Bayes" - full_name: "Stochastic Gradient Variational Bayes" - } - } - video: { - video_id: "KYA-GEhObIs" - video_title: "PR-010: Auto-Encoding Variational Bayes, ICLR 2014" - number_of_likes: 203 - number_of_views: 12143 - published_date: { - seconds: 1495549847 - } - uploader: "차준범" - } - } -} -pr_id_to_video: { - key: 11 - value: { - papers: { - paper_id: "spatial-transformer-networks" - title: "Spatial Transformer Networks" - arxiv_id: "1506.02025" - abstract: "Convolutional Neural Networks define an exceptionally powerful class of\nmodels, but are still limited by the lack of ability to be spatially invariant\nto the input data in a computationally and parameter efficient manner. In this\nwork we introduce a new learnable module, the Spatial Transformer, which\nexplicitly allows the spatial manipulation of data within the network. This\ndifferentiable module can be inserted into existing convolutional\narchitectures, giving neural networks the ability to actively spatially\ntransform feature maps, conditional on the feature map itself, without any\nextra training supervision or modification to the optimisation process. We show\nthat the use of spatial transformers results in models which learn invariance\nto translation, scale, rotation and more generic warping, resulting in\nstate-of-the-art performance on several benchmarks, and for a number of classes\nof transformations." - pub_date: { - seconds: 1433462400 - } - authors: "Max Jaderberg" - authors: "Karen Simonyan" - authors: "Andrew Zisserman" - authors: "Koray Kavukcuoglu" - repositories: { - url: "https://github.com/dabane-ghassan/int-lab-book" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "\"Foveated Spatial Transformers\", benchmarking Spatial Transformer Networks against a bio-inspired artificial vision model." - } - repositories: { - url: "https://github.com/vinod377/STN-OCR" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Implementation of \"STN-OCR: A single Neural Network for Text Detection and Text Recognition\" in natural Scenes by Christian Bartz." - } - repositories: { - url: "https://github.com/sayakpaul/Spatial-Transformer-Networks-with-Keras" - framework: FRAMEWORK_OTHERS - number_of_stars: 15 - description: "This repository provides a Colab Notebook that shows how to use Spatial Transformer Networks inside CNNs build in Keras." - } - repositories: { - url: "https://github.com/TencentYoutuResearch/SelfSupervisedLearning-DSM" - framework: FRAMEWORK_PYTORCH - number_of_stars: 22 - description: "code for AAAI21 paper \"Enhancing Unsupervised Video Representation Learning by Decoupling the Scene and the Motion“" - } - repositories: { - url: "https://github.com/dedhiaparth98/spatial-transformer-network" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 6 - description: "Spatial Transformer Network (STN) provides attention to a particular region to in an image, by doing transformation to the input image. The code in this repository does Affine transformation to image, but other transformation can be explored." - } - repositories: { - url: "https://github.com/chenwuperth/rgz_rcnn" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 36 - description: "ClaRAN: A deep learning classifier for radio morphologies" - } - repositories: { - url: "https://github.com/FingerRec/DSM" - framework: FRAMEWORK_OTHERS - number_of_stars: 43 - description: "[AAAI2021] The source code for our paper 《Enhancing Unsupervised Video Representation Learning by Decoupling the Scene and the Motion》." - } - repositories: { - url: "https://github.com/tianyu-tristan/Visual-Attention-Model" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 41 - } - repositories: { - url: "https://github.com/elisiojsj/Kuzushiji-49" - framework: FRAMEWORK_PYTORCH - description: "Classifier for Kuzushiji (Japanese calligraphy) characters." - } - repositories: { - url: "https://github.com/Mugilvanan/stnbhwd" - framework: FRAMEWORK_OTHERS - } - methods: { - name: "Spatial Transformer" - full_name: "Spatial Transformer" - description: "A **Spatial Transformer** is an image model block that explicitly allows the spatial manipulation of data within a convolutional neural network. It gives CNNs the ability to actively spatially transform feature maps, conditional on the feature map itself, without any extra training supervision or modification to the optimisation process. Unlike pooling layers, where the receptive fields are fixed and local, the spatial transformer module is a dynamic mechanism that can actively spatially transform an image (or a feature map) by producing an appropriate transformation for each input sample. The transformation is then performed on the entire feature map (non-locally) and can include scaling, cropping, rotations, as well as non-rigid deformations.\r\n\r\nThe architecture is shown in the Figure to the right. The input feature map $U$ is passed to a localisation network which regresses the transformation parameters $\\theta$. The regular spatial grid $G$ over $V$ is transformed to the sampling grid $T\\_{\\theta}\\left(G\\right)$, which is applied to $U$, producing the warped output feature map $V$. The combination of the localisation network and sampling mechanism defines a spatial transformer." - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "SGD" - full_name: "Stochastic Gradient Descent" - description: "**Stochastic Gradient Descent** is an iterative optimization technique that uses minibatches of data to form an expectation of the gradient, rather than the full gradient using all available data. That is for weights $w$ and a loss function $L$ we have:\r\n\r\n$$ w\\_{t+1} = w\\_{t} - \\eta\\hat{\\nabla}\\_{w}{L(w\\_{t})} $$\r\n\r\nWhere $\\eta$ is a learning rate. SGD reduces redundancy compared to batch gradient descent - which recomputes gradients for similar examples before each parameter update - so it is usually much faster.\r\n\r\n(Image Source: [here](http://rasbt.github.io/mlxtend/user_guide/general_concepts/gradient-optimization/))" - } - } - video: { - video_id: "Rv3osRZWGbg" - video_title: "PR-011: Spatial Transformer Networks" - number_of_likes: 45 - number_of_views: 5441 - published_date: { - seconds: 1495978512 - } - uploader: "Jiyang Kang" - } - } -} -pr_id_to_video: { - key: 12 - value: { - papers: { - paper_id: "faster-r-cnn-towards-real-time-object" - title: "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" - arxiv_id: "1506.01497" - abstract: "State-of-the-art object detection networks depend on region proposal\nalgorithms to hypothesize object locations. Advances like SPPnet and Fast R-CNN\nhave reduced the running time of these detection networks, exposing region\nproposal computation as a bottleneck. In this work, we introduce a Region\nProposal Network (RPN) that shares full-image convolutional features with the\ndetection network, thus enabling nearly cost-free region proposals. An RPN is a\nfully convolutional network that simultaneously predicts object bounds and\nobjectness scores at each position. The RPN is trained end-to-end to generate\nhigh-quality region proposals, which are used by Fast R-CNN for detection. We\nfurther merge RPN and Fast R-CNN into a single network by sharing their\nconvolutional features---using the recently popular terminology of neural\nnetworks with 'attention' mechanisms, the RPN component tells the unified\nnetwork where to look. For the very deep VGG-16 model, our detection system has\na frame rate of 5fps (including all steps) on a GPU, while achieving\nstate-of-the-art object detection accuracy on PASCAL VOC 2007, 2012, and MS\nCOCO datasets with only 300 proposals per image. In ILSVRC and COCO 2015\ncompetitions, Faster R-CNN and RPN are the foundations of the 1st-place winning\nentries in several tracks. Code has been made publicly available." - pub_date: { - seconds: 1433376000 - } - authors: "Shaoqing Ren" - authors: "Kaiming He" - authors: "Ross Girshick" - authors: "Jian Sun" - repositories: { - url: "https://github.com/miaohua1982/simple_fasterrcnn_pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/JeffCHEN2017/WSSTG" - framework: FRAMEWORK_PYTORCH - number_of_stars: 40 - description: "This repository contains the main baselines introduced in WSSTG (ACL 2019)." - } - repositories: { - url: "https://github.com/VDIGPKU/OPANAS" - framework: FRAMEWORK_PYTORCH - number_of_stars: 14 - description: "The official code for OPANAS: One-Shot Path Aggregation Network Architecture Search for Object Detection" - } - repositories: { - url: "https://github.com/KostadinovShalon/UAVDetectionTrackingBenchmark" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - } - repositories: { - url: "https://github.com/potterhsu/easy-faster-rcnn.pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 147 - description: "An easy implementation of Faster R-CNN (https://arxiv.org/pdf/1506.01497.pdf) in PyTorch." - } - repositories: { - url: "https://github.com/zhudelong/elevator_button_recognition" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 16 - description: "Button recognition for autonomous elevator operation" - } - repositories: { - url: "https://github.com/EmGarr/kerod" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 34 - description: "DETR - Faster RCNN implementation in tensorflow 2" - } - repositories: { - url: "https://github.com/liangheming/faster_rcnnv1" - framework: FRAMEWORK_PYTORCH - number_of_stars: 9 - description: "pytorch implement of fasterRCNN,736px(max side),39.4mAP(COCO),30.21fps(RTX 2080TI)" - } - repositories: { - url: "https://github.com/chenwuperth/ClaRAN" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 - description: "ClaRAN - Classifying Radio Galaxies Automatically with Neural Networks" - } - repositories: { - url: "https://github.com/AlphaJia/pytorch-faster-rcnn" - framework: FRAMEWORK_PYTORCH - number_of_stars: 292 - description: "pytorch based implementation faster rcnn" - } - methods: { - name: "RPN" - full_name: "Region Proposal Network" - description: "A **Region Proposal Network**, or **RPN**, is a fully convolutional network that simultaneously predicts object bounds and objectness scores at each position. The RPN is trained end-to-end to generate high-quality region proposals. RPN and algorithms like Fast R-CNN can be merged into a single network by sharing their convolutional features - using the recently popular terminology of neural networks with attention mechanisms, the RPN component tells the unified network where to look.\r\n\r\nRPNs are designed to efficiently predict region proposals with a wide range of scales and aspect ratios. RPNs use anchor boxes that serve as references at multiple scales and aspect ratios. The scheme can be thought of as a pyramid of regression references, which avoids enumerating images or filters of multiple scales or aspect ratios." - } - methods: { - name: "Fast R-CNN" - full_name: "Fast R-CNN" - description: "**Fast R-CNN** is an object detection model that improves in its predecessor [R-CNN](https://paperswithcode.com/method/r-cnn) in a number of ways. Instead of extracting CNN features independently for each region of interest, Fast R-CNN aggregates them into a single forward pass over the image; i.e. regions of interest from the same image share computation and memory in the forward and backward passes." - } - methods: { - name: "RoIPool" - full_name: "RoIPool" - description: "**Region of Interest Pooling**, or **RoIPool**, is an operation for extracting a small feature map (e.g., $7×7$) from each RoI in detection and segmentation based tasks. Features are extracted from each candidate box, and thereafter in models like Fast R-CNN, are then classified and bounding box regression performed.\r\n\r\nThe actual scaling to, e.g., $7×7$, occurs by dividing the region proposal into equally sized sections, finding the largest value in each section, and then copying these max values to the output buffer. In essence, **RoIPool** is max pooling on a discrete grid based on a box.\r\n\r\nImage Source: [Joyce Xu](https://towardsdatascience.com/deep-learning-for-object-detection-a-comprehensive-review-73930816d8d9)" - } - methods: { - name: "VGG-16" - full_name: "VGG-16" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "Faster R-CNN" - full_name: "Faster R-CNN" - description: "**Faster R-CNN** is an object detection model that improves on [Fast R-CNN](https://paperswithcode.com/method/fast-r-cnn) by utilising a region proposal network ([RPN](https://paperswithcode.com/method/rpn)) with the CNN model. The RPN shares full-image convolutional features with the detection network, enabling nearly cost-free region proposals. It is a fully convolutional network that simultaneously predicts object bounds and objectness scores at each position. The RPN is trained end-to-end to generate high-quality region proposals, which are used by [Fast R-CNN](https://paperswithcode.com/method/fast-r-cnn) for detection. RPN and Fast R-CNN are merged into a single network by sharing their convolutional features: the RPN component tells the unified network where to look.\r\n\r\nAs a whole, Faster R-CNN consists of two modules. The first module is a deep fully convolutional network that proposes regions, and the second module is the Fast R-CNN detector that uses the proposed regions." - } - } - video: { - video_id: "kcPAGIgBGRs" - video_title: "PR-012: Faster R-CNN : Towards Real-Time Object Detection with Region Proposal Networks" - number_of_likes: 387 - number_of_views: 48654 - published_date: { - seconds: 1495981094 - } - uploader: "JinWon Lee" - } - } -} -pr_id_to_video: { - key: 13 - value: { - papers: { - paper_id: "domain-adversarial-training-of-neural" - title: "Domain-Adversarial Training of Neural Networks" - arxiv_id: "1505.07818" - abstract: "We introduce a new representation learning approach for domain adaptation, in\nwhich data at training and test time come from similar but different\ndistributions. Our approach is directly inspired by the theory on domain\nadaptation suggesting that, for effective domain transfer to be achieved,\npredictions must be made based on features that cannot discriminate between the\ntraining (source) and test (target) domains. The approach implements this idea\nin the context of neural network architectures that are trained on labeled data\nfrom the source domain and unlabeled data from the target domain (no labeled\ntarget-domain data is necessary). As the training progresses, the approach\npromotes the emergence of features that are (i) discriminative for the main\nlearning task on the source domain and (ii) indiscriminate with respect to the\nshift between the domains. We show that this adaptation behaviour can be\nachieved in almost any feed-forward model by augmenting it with few standard\nlayers and a new gradient reversal layer. The resulting augmented architecture\ncan be trained using standard backpropagation and stochastic gradient descent,\nand can thus be implemented with little effort using any of the deep learning\npackages. We demonstrate the success of our approach for two distinct\nclassification problems (document sentiment analysis and image classification),\nwhere state-of-the-art domain adaptation performance on standard benchmarks is\nachieved. We also validate the approach for descriptor learning task in the\ncontext of person re-identification application." - pub_date: { - seconds: 1432771200 - } - authors: "Yaroslav Ganin" - authors: "Evgeniya Ustinova" - authors: "Hana Ajakan" - authors: "Pascal Germain" - authors: "Hugo Larochelle" - authors: "François Laviolette" - authors: "Mario Marchand" - authors: "Victor Lempitsky" - repositories: { - url: "https://github.com/criteo-research/pytorch-ada" - framework: FRAMEWORK_PYTORCH - number_of_stars: 53 - description: "Another Domain Adaptation library, aimed at researchers." - } - repositories: { - url: "https://github.com/rpryzant/proxy-a-distance" - framework: FRAMEWORK_OTHERS - number_of_stars: 29 - description: "Proxy A-Distance algorithm for measuring domain disparity in parallel corpora" - } - repositories: { - url: "https://github.com/JorisRoels/domain-adaptive-segmentation" - framework: FRAMEWORK_PYTORCH - number_of_stars: 15 - description: "Domain adaptation segmentation for volume EM imaging" - } - repositories: { - url: "https://github.com/facebookresearch/DomainBed" - framework: FRAMEWORK_PYTORCH - number_of_stars: 339 - description: "DomainBed is a suite to test domain generalization algorithms" - } - repositories: { - url: "https://github.com/monkey0head/Domain_Adaptation_thesis" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Source code for master thesis on Unsupervised Domain Adaptation for Image Processing " - } - repositories: { - url: "https://github.com/dv-fenix/Domain-Adaptation" - framework: FRAMEWORK_PYTORCH - description: "PyTorch implementations of some papers on Domain Adaptation" - } - repositories: { - url: "https://github.com/Nadavc220/DomainAdversarialTrainingOfNeuralNetworks" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "This is a Pytorch implementation of the 2014 paper named Domain Adversarial Training of Neural Networks " - } - repositories: { - url: "https://github.com/asahi417/DeepDomainAdaptation" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 18 - description: "Tensorflow deep learning based domain adaptation model implementations with experiment of estimate MNIST by SVHN data (SVHN -> MNIST): DANN (domain-adversarial neural network), Deep JDOT (joint distribution optimal transportation)" - } - repositories: { - url: "https://github.com/ShichengChen/WaveNetSeparateAudio" - framework: FRAMEWORK_PYTORCH - number_of_stars: 44 - description: "WaveNet for the separation of audio sources" - } - repositories: { - url: "https://github.com/scpark20/universal-music-translation" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 26 - description: "A Universal Music Translation Network Implementation" - } - } - video: { - video_id: "n2J7giHrS-Y" - video_title: "PR-013: Domain Adversarial Training of Neural Network" - number_of_likes: 51 - number_of_views: 5877 - published_date: { - seconds: 1496675287 - } - uploader: "Jaejun Yoo" - } - } -} -pr_id_to_video: { - key: 14 - value: { - papers: { - paper_id: "on-human-motion-prediction-using-recurrent" - title: "On human motion prediction using recurrent neural networks" - arxiv_id: "1705.02445" - abstract: "Human motion modelling is a classical problem at the intersection of graphics\nand computer vision, with applications spanning human-computer interaction,\nmotion synthesis, and motion prediction for virtual and augmented reality.\nFollowing the success of deep learning methods in several computer vision\ntasks, recent work has focused on using deep recurrent neural networks (RNNs)\nto model human motion, with the goal of learning time-dependent representations\nthat perform tasks such as short-term motion prediction and long-term human\nmotion synthesis. We examine recent work, with a focus on the evaluation\nmethodologies commonly used in the literature, and show that, surprisingly,\nstate-of-the-art performance can be achieved by a simple baseline that does not\nattempt to model motion at all. We investigate this result, and analyze recent\nRNN methods by looking at the architectures, loss functions, and training\nprocedures used in state-of-the-art approaches. We propose three changes to the\nstandard RNN models typically used for human motion, which result in a simple\nand scalable RNN architecture that obtains state-of-the-art performance on\nhuman motion prediction." - pub_date: { - seconds: 1494028800 - } - authors: "Julieta Martinez" - authors: "Michael J. Black" - authors: "Javier Romero" - repositories: { - url: "https://github.com/nageshpindi/human-motion-prediction-master" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/YQRickWang/tf" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/facebookresearch/QuaterNet" - framework: FRAMEWORK_PYTORCH - number_of_stars: 602 - description: "Proposes neural networks that can generate animation of virtual characters for different actions." - } - repositories: { - is_official: true - url: "https://github.com/una-dinosauria/human-motion-prediction" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 302 - description: "Simple baselines and RNNs for predicting human motion in tensorflow. Presented at CVPR 17." - } - repositories: { - url: "https://github.com/garroud/human-motion-prediction-pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 12 - description: "Pytorch implementation of human motion prediction" - } - repositories: { - url: "https://github.com/YQRickWang/Motion_Prediction" - framework: FRAMEWORK_PYTORCH - description: "Motion Prediciton on deepfly" - } - } - video: { - video_id: "Y1loN3Sc4Dk" - video_title: "PR-014: On Human Motion Prediction using RNNs (2017)" - number_of_likes: 53 - number_of_views: 5112 - published_date: { - seconds: 1496611967 - } - uploader: "Terry TaeWoong Um" - } - } -} -pr_id_to_video: { - key: 15 - value: { - papers: { - paper_id: "convolutional-neural-networks-for-sentence" - title: "Convolutional Neural Networks for Sentence Classification" - arxiv_id: "1408.5882" - abstract: "We report on a series of experiments with convolutional neural networks (CNN)\ntrained on top of pre-trained word vectors for sentence-level classification\ntasks. We show that a simple CNN with little hyperparameter tuning and static\nvectors achieves excellent results on multiple benchmarks. Learning\ntask-specific vectors through fine-tuning offers further gains in performance.\nWe additionally propose a simple modification to the architecture to allow for\nthe use of both task-specific and static vectors. The CNN models discussed\nherein improve upon the state of the art on 4 out of 7 tasks, which include\nsentiment analysis and question classification." - pub_date: { - seconds: 1408924800 - } - authors: "Yoon Kim" - repositories: { - url: "https://github.com/GayeonKim-data/section4-project" - framework: FRAMEWORK_OTHERS - description: "딥러닝을 활용한 영화 리뷰 속 스포일러 탐지 프로젝트" - } - repositories: { - url: "https://github.com/PaddlePaddle/PaddleRec/tree/release/2.1.0/models/contentunderstanding/textcnn" - framework: FRAMEWORK_OTHERS - number_of_stars: 527 - description: "大规模推荐模型训练工具" - } - repositories: { - url: "https://github.com/guanliu321/CNN-RNN-HAN-for-Text-Classification-Using-NLP" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "It’s a NLP Problem,the goal of our project is to classify categories of news based on the content of news articles from the BBC website using CNN, RNN and HAN models on two datasets that the former dataset have 2225 news, 5 categories and the latter dataset have 18846 news, 20 categories. Set hyperparameters, such as embedding dimensions of glove model, trainable parameter of embedding layer, bidirectional LSTM or simple LSTM Preprocess the news articles, including removing punctuation ,stopwords, lemmatization,removing outliers in terms of news length and the number of sentences and set the corresponding parameters Tokenize the data using word-index which is fit on the train data,then generate 2D input data (article, word) for CNN and RNN algorithms,and then generate 3D input data (article, sentence, word) for HAN algorithm Use set hyperparameters to build the model architecture and use checkpointing, early stopping to train model, and then compare the test accuracy and validation loss of these three models Utilized:Python,Pandas,Numpy,Seaborn,Matplolib,NLP,DNN,CNN,RNN,HAN,LSTM,GPU,Text Classification,Hyperparameters Tuning" - } - repositories: { - url: "https://github.com/dongjun-Lee/text-classification-models-tf" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 486 - description: "Tensorflow implementations of Text Classification Models." - } - repositories: { - url: "https://github.com/yinghao1019/NLP_and_DL_practice/blob/master/Convolution_Neural_Netowrks_for_sentence_classification_Practice.ipynb" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "This repository is used for NLP Model practice and learning" - } - repositories: { - url: "https://github.com/yinghao1019/NLP_and_DL_practice" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "This repository is used for NLP Model practice and learning" - } - repositories: { - url: "https://github.com/chiemenz/automl_vs_hyperdrive" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/yschoi-nisp/AI-Grand-Challenge-2020" - framework: FRAMEWORK_PYTORCH - number_of_stars: 24 - description: "AI grand challenge 2020 Repo (Speech Recognition Track)" - } - repositories: { - url: "https://github.com/prakashpandey9/Text-Classification-Pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 710 - description: "Text classification using deep learning models in Pytorch" - } - repositories: { - url: "https://github.com/HuihuiChyan/BJTUNLP_Practice2021" - framework: FRAMEWORK_PYTORCH - number_of_stars: 7 - description: "This is the third version of the practices for the rookies of BJTUNLPers." - } - } - video: { - video_id: "IRB2vXSet2E" - video_title: "PR-015:Convolutional Neural Networks for Sentence Classification" - number_of_likes: 49 - number_of_views: 5781 - published_date: { - seconds: 1497187460 - } - uploader: "keun bong Kwak" - } - } -} -pr_id_to_video: { - key: 16 - value: { - papers: { - paper_id: "you-only-look-once-unified-real-time-object" - title: "You Only Look Once: Unified, Real-Time Object Detection" - arxiv_id: "1506.02640" - abstract: "We present YOLO, a new approach to object detection. Prior work on object\ndetection repurposes classifiers to perform detection. Instead, we frame object\ndetection as a regression problem to spatially separated bounding boxes and\nassociated class probabilities. A single neural network predicts bounding boxes\nand class probabilities directly from full images in one evaluation. Since the\nwhole detection pipeline is a single network, it can be optimized end-to-end\ndirectly on detection performance.\n Our unified architecture is extremely fast. Our base YOLO model processes\nimages in real-time at 45 frames per second. A smaller version of the network,\nFast YOLO, processes an astounding 155 frames per second while still achieving\ndouble the mAP of other real-time detectors. Compared to state-of-the-art\ndetection systems, YOLO makes more localization errors but is far less likely\nto predict false detections where nothing exists. Finally, YOLO learns very\ngeneral representations of objects. It outperforms all other detection methods,\nincluding DPM and R-CNN, by a wide margin when generalizing from natural images\nto artwork on both the Picasso Dataset and the People-Art Dataset." - pub_date: { - seconds: 1433721600 - } - authors: "Joseph Redmon" - authors: "Santosh Divvala" - authors: "Ross Girshick" - authors: "Ali Farhadi" - repositories: { - url: "https://github.com/DevBruce/YOLOv1-TF2" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "YOLOv1 implementation with TensorFlow2" - } - repositories: { - url: "https://github.com/msuhail1997/YOLO-Pytorch-Object_Detection" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/jalotra/Queue-Detection" - framework: FRAMEWORK_PYTORCH - description: "A naive Algorithm that uses People Detection and Convex Hull as subroutines to solve this problem: \"Given an image of people standing in a queue, how many people are standing in some queue{Q}.\"" - } - repositories: { - url: "https://github.com/jalotra/Queue-Detection-" - framework: FRAMEWORK_PYTORCH - description: "A naive Algorithm that uses People Detection and Convex Hull as subroutines to solve this problem: \"Given an image of people standing in a queue, how many people are standing in some queue{Q}.\"" - } - repositories: { - url: "https://github.com/TeamML-2021/knowledge-base" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/zer0sh0t/artificial_intelligence/tree/master/object_detection/you_only_look_once" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "ai codebase" - } - repositories: { - url: "https://github.com/hamidriasat/Computer-Vision-and-Deep-Learning" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/ritesh2448/Text-Detection-And-Recognition" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/MINED30/Face_Mask_Detection_YOLO" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/Qengineering/YoloV3-ncnn-Raspberry-Pi-4" - framework: FRAMEWORK_OTHERS - number_of_stars: 21 - description: "MobileNetV2_YOLOV3 for ncnn framework" - } - methods: { - name: "Non Maximum Suppression" - full_name: "Non Maximum Suppression" - description: "**Non Maximum Suppression** is a computer vision method that selects a single entity out of many overlapping entities (for example bounding boxes in object detection). The criteria is usually discarding entities that are below a given probability bound. With remaining entities we repeatedly pick the entity with the highest probability, output that as the prediction, and discard any remaining box where a $\\text{IoU} \\geq 0.5$ with the box output in the previous step.\r\n\r\nImage Credit: [Martin Kersner](https://github.com/martinkersner/non-maximum-suppression-cpp)" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Max Pooling" - full_name: "Max Pooling" - description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - methods: { - name: "Dropout" - full_name: "Dropout" - description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." - } - methods: { - name: "Step Decay" - full_name: "Step Decay" - description: "**Step Decay** is a learning rate schedule that drops the learning rate by a factor every few epochs, where the number of epochs is a hyperparameter.\r\n\r\nImage Credit: [Suki Lau](https://towardsdatascience.com/learning-rate-schedules-and-adaptive-learning-rate-methods-for-deep-learning-2c8f433990d1)" - } - methods: { - name: "Random Resized Crop" - full_name: "Random Resized Crop" - description: "**RandomResizedCrop** is a type of image data augmentation where a crop of random size of the original size and a random aspect ratio of the original aspect ratio is made. This crop is finally resized to given size.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" - } - methods: { - name: "SGD with Momentum" - full_name: "SGD with Momentum" - description: "**SGD with Momentum** is a stochastic optimization method that adds a momentum term to regular stochastic gradient descent:\r\n\r\n$$v\\_{t} = \\gamma{v}\\_{t-1} + \\eta\\nabla\\_{\\theta}J\\left(\\theta\\right)$$\r\n$$\\theta\\_{t} = \\theta\\_{t-1} - v\\_{t} $$\r\n\r\nA typical value for $\\gamma$ is $0.9$. The momentum name comes from an analogy to physics, such as ball accelerating down a slope. In the case of weight updates, we can think of the weights as a particle traveling through parameter space which incurs acceleration from the gradient of the loss.\r\n\r\nImage Source: [Juan Du](https://www.researchgate.net/figure/The-compare-of-the-SGD-algorithms-with-and-without-momentum-Take-Task-1-as-example-The_fig1_333469047)" - } - } - video: { - video_id: "eTDcoeqj1_w" - video_title: "PR-016: You only look once: Unified, real-time object detection" - number_of_likes: 99 - number_of_views: 16120 - published_date: { - seconds: 1497795435 - } - uploader: "Taegyun Jeon" - } - } -} -pr_id_to_video: { - key: 17 - value: { - papers: { - paper_id: "neural-architecture-search-with-reinforcement" - title: "Neural Architecture Search with Reinforcement Learning" - arxiv_id: "1611.01578" - abstract: "Neural networks are powerful and flexible models that work well for many\ndifficult learning tasks in image, speech and natural language understanding.\nDespite their success, neural networks are still hard to design. In this paper,\nwe use a recurrent network to generate the model descriptions of neural\nnetworks and train this RNN with reinforcement learning to maximize the\nexpected accuracy of the generated architectures on a validation set. On the\nCIFAR-10 dataset, our method, starting from scratch, can design a novel network\narchitecture that rivals the best human-invented architecture in terms of test\nset accuracy. Our CIFAR-10 model achieves a test error rate of 3.65, which is\n0.09 percent better and 1.05x faster than the previous state-of-the-art model\nthat used a similar architectural scheme. On the Penn Treebank dataset, our\nmodel can compose a novel recurrent cell that outperforms the widely-used LSTM\ncell, and other state-of-the-art baselines. Our cell achieves a test set\nperplexity of 62.4 on the Penn Treebank, which is 3.6 perplexity better than\nthe previous state-of-the-art model. The cell can also be transferred to the\ncharacter language modeling task on PTB and achieves a state-of-the-art\nperplexity of 1.214." - pub_date: { - seconds: 1478304000 - } - authors: "Barret Zoph" - authors: "Quoc V. Le" - repositories: { - url: "https://github.com/abcp4/DAPytorch" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/YaCpotato/deepaugmentFix" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/TreeLimes/QANAS" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/cshannonn/blackscholes_nas" - framework: FRAMEWORK_OTHERS - number_of_stars: 3 - description: "Can a neural network learn Black Scholes, yes..." - } - repositories: { - url: "https://github.com/YaCpotato/B4ResearchDeepaugment" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - } - repositories: { - url: "https://github.com/GiuliaLanzillotta/INAS" - framework: FRAMEWORK_PYTORCH - description: "Infinite Neural Architecture Search" - } - repositories: { - url: "https://github.com/carpedm20/ENAS-pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2444 - description: "PyTorch implementation of \"Efficient Neural Architecture Search via Parameters Sharing\"" - } - repositories: { - is_official: true - url: "https://github.com/tensorflow/models" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70333 - description: "Models and examples built with TensorFlow" - } - repositories: { - url: "https://github.com/barisozmen/deepaugment" - framework: FRAMEWORK_OTHERS - number_of_stars: 192 - description: "Discover augmentation strategies tailored for your dataset" - } - repositories: { - url: "https://github.com/DataCanvasIO/Hypernets" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 94 - description: "A General Automated Machine Learning framework to simplify the development of End-to-end AutoML toolkits in specific domains." - } - } - video: { - video_id: "XP3vyVrrt3Q" - video_title: "PR-017: Neural Architecture Search with Reinforcement Learning" - number_of_likes: 31 - number_of_views: 3950 - published_date: { - seconds: 1497796191 - } - uploader: "Suh Kiho" - } - } -} -pr_id_to_video: { - key: 18 - value: { - papers: { - paper_id: "a-simple-neural-network-module-for-relational" - title: "A simple neural network module for relational reasoning" - arxiv_id: "1706.01427" - abstract: "Relational reasoning is a central component of generally intelligent\nbehavior, but has proven difficult for neural networks to learn. In this paper\nwe describe how to use Relation Networks (RNs) as a simple plug-and-play module\nto solve problems that fundamentally hinge on relational reasoning. We tested\nRN-augmented networks on three tasks: visual question answering using a\nchallenging dataset called CLEVR, on which we achieve state-of-the-art,\nsuper-human performance; text-based question answering using the bAbI suite of\ntasks; and complex reasoning about dynamic physical systems. Then, using a\ncurated dataset called Sort-of-CLEVR we show that powerful convolutional\nnetworks do not have a general capacity to solve relational questions, but can\ngain this capacity when augmented with RNs. Our work shows how a deep learning\narchitecture equipped with an RN module can implicitly discover and learn to\nreason about entities and their relations." - pub_date: { - seconds: 1496620800 - } - authors: "Adam Santoro" - authors: "David Raposo" - authors: "David G. T. Barrett" - authors: "Mateusz Malinowski" - authors: "Razvan Pascanu" - authors: "Peter Battaglia" - authors: "Timothy Lillicrap" - repositories: { - url: "https://github.com/jaehyunnn/RelationalNetwork_pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "An un-official implementation of Relational Network [A. Santoro et al., 2017] (PyTorch) " - } - repositories: { - url: "https://github.com/ttok0s7u2n5/ML2_proj" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/moduIo/Relation-Networks" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5 - description: "Keras implementation of Relation Networks for Visual Question Answering using the CLEVR dataset." - } - repositories: { - url: "https://github.com/matwilso/relation-networks" - framework: FRAMEWORK_TENSORFLOW - description: "Messing around with Relation Networks and other stuff for state embedding" - } - repositories: { - url: "https://github.com/adriangoe/relational-networks-pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "A pytorch implementation of Relational Networks by Santoro et al (https://arxiv.org/abs/1706.01427)" - } - repositories: { - url: "https://github.com/mesnico/RelationNetworks-CLEVR" - framework: FRAMEWORK_PYTORCH - number_of_stars: 74 - description: "A pytorch implementation for \"A simple neural network module for relational reasoning\", working on the CLEVR dataset" - } - repositories: { - url: "https://github.com/fcorencoret/dynamic-rn" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/IllgamhoDuck/ResTR" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Aesthetic quality assessment Artificial Intelligence based on relation between elements / 요소 간 관계를 기반으로 미적 수준을 판별하는 인공지능 / 2018.11.21 기준 AVA dataset에서 State of the Art result" - } - repositories: { - url: "https://github.com/gitlimlab/Relation-Network-Tensorflow" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 326 - description: "Tensorflow implementations of Relational Networks and a VQA dataset named Sort-of-CLEVR proposed by DeepMind." - } - repositories: { - url: "https://github.com/mdda/relationships-from-entity-stream" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - description: " Research presented at the NIPs 2017 ViGiL Workshop." - } - } - video: { - video_id: "Lb1PVpFp9F8" - video_title: "PR-018: A Simple Neural Network Module for Relational Reasoning (DeepMind)" - number_of_likes: 63 - number_of_views: 6766 - published_date: { - seconds: 1498432650 - } - uploader: "Sung Kim" - } - } -} -pr_id_to_video: { - key: 19 - value: { - papers: { - paper_id: "continuous-control-with-deep-reinforcement" - title: "Continuous control with deep reinforcement learning" - arxiv_id: "1509.02971" - abstract: "We adapt the ideas underlying the success of Deep Q-Learning to the continuous action domain. We present an actor-critic, model-free algorithm based on the deterministic policy gradient that can operate over continuous action spaces. Using the same learning algorithm, network architecture and hyper-parameters, our algorithm robustly solves more than 20 simulated physics tasks, including classic problems such as cartpole swing-up, dexterous manipulation, legged locomotion and car driving. Our algorithm is able to find policies whose performance is competitive with those found by a planning algorithm with full access to the dynamics of the domain and its derivatives. We further demonstrate that for many of the tasks the algorithm can learn policies end-to-end: directly from raw pixel inputs." - pub_date: { - seconds: 1441756800 - } - authors: "Timothy P. Lillicrap" - authors: "Jonathan J. Hunt" - authors: "Alexander Pritzel" - authors: "Nicolas Heess" - authors: "Tom Erez" - authors: "Yuval Tassa" - authors: "David Silver" - authors: "Daan Wierstra" - repositories: { - url: "https://github.com/Brook1711/RIS_components" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "components of RIS simulations" - } - repositories: { - url: "https://github.com/rikluost/RL_DQN_Pong" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Tackling Atari 2600 game Pong with Reinforcement Learning by utilizing DQN and TF-Agents" - } - repositories: { - url: "https://github.com/Medabid1/RL_Project" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "My Deep learning project : Training a robot in MuJoCo with RL" - } - repositories: { - url: "https://github.com/flavioschneider/ml_papers_presentations" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - } - repositories: { - url: "https://github.com/KelvinYang0320/deepbots-panda" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "Panda with Deep Reinforcement Learning Simulation Environment Webots" - } - repositories: { - url: "https://github.com/wpiszlogin/driver_critic" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Solution for CarRacing-v0 environment from OpenAI Gym. It uses the Deep Deterministic Policy Gradient algorithm." - } - repositories: { - url: "https://github.com/backgom2357/Recommender_system_via_deep_RL" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 6 - description: "The implemetation of Deep Reinforcement Learning based Recommender System from the paper Deep Reinforcement Learning based Recommendation with Explicit User-Item Interactions Modeling by Liu et al." - } - repositories: { - url: "https://github.com/SarodYatawatta/smart-calibration" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "Deep reinforcement learning for smart calibration of radio telescopes. Automatic hyper-parameter tuning." - } - repositories: { - url: "https://github.com/dchetelat/acer" - framework: FRAMEWORK_PYTORCH - number_of_stars: 17 - description: "PyTorch implementation of both discrete and continuous ACER" - } - repositories: { - url: "https://github.com/DanielLSM/safe-rl-tutorial" - framework: FRAMEWORK_TENSORFLOW - description: "Just a mini tutorial on safe rl" - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "Q-Learning" - full_name: "Q-Learning" - description: "**Q-Learning** is an off-policy temporal difference control algorithm:\r\n\r\n$$Q\\left(S\\_{t}, A\\_{t}\\right) \\leftarrow Q\\left(S\\_{t}, A\\_{t}\\right) + \\alpha\\left[R_{t+1} + \\gamma\\max\\_{a}Q\\left(S\\_{t+1}, a\\right) - Q\\left(S\\_{t}, A\\_{t}\\right)\\right] $$\r\n\r\nThe learned action-value function $Q$ directly approximates $q\\_{*}$, the optimal action-value function, independent of the policy being followed.\r\n\r\nSource: Sutton and Barto, Reinforcement Learning, 2nd Edition" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Weight Decay" - full_name: "Weight Decay" - description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" - } - methods: { - name: "DDPG" - full_name: "Deep Deterministic Policy Gradient" - description: "**DDPG**, or **Deep Deterministic Policy Gradient**, is an actor-critic, model-free algorithm based on the deterministic policy gradient that can operate over continuous action spaces. It combines the actor-critic approach with insights from [DQNs](https://paperswithcode.com/method/dqn): in particular, the insights that 1) the network is trained off-policy with samples from a replay buffer to minimize correlations between samples, and 2) the network is trained with a target Q network to give consistent targets during temporal difference backups. DDPG makes use of the same ideas along with batch normalization." - } - methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - methods: { - name: "Batch Normalization" - full_name: "Batch Normalization" - description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." - } - methods: { - name: "Experience Replay" - full_name: "Experience Replay" - description: "**Experience Replay** is a replay memory technique used in reinforcement learning where we store the agent’s experiences at each time-step, $e\\_{t} = \\left(s\\_{t}, a\\_{t}, r\\_{t}, s\\_{t+1}\\right)$ in a data-set $D = e\\_{1}, \\cdots, e\\_{N}$ , pooled over many episodes into a replay memory. We then usually sample the memory randomly for a minibatch of experience, and use this to learn off-policy, as with Deep Q-Networks. This tackles the problem of autocorrelation leading to unstable training, by making the problem more like a supervised learning problem.\r\n\r\nImage Credit: [Hands-On Reinforcement Learning with Python, Sudharsan Ravichandiran](https://subscription.packtpub.com/book/big_data_and_business_intelligence/9781788836524)" - } - } - video: { - video_id: "h2WSVBAC1t4" - video_title: "PR-019: Continuous Control with Deep Reinforcement Learning" - number_of_likes: 52 - number_of_views: 5328 - published_date: { - seconds: 1498452479 - } - uploader: "Seungil Kim" - } - } -} -pr_id_to_video: { - key: 20 - value: { - papers: { - paper_id: "delving-deep-into-rectifiers-surpassing-human" - title: "Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification" - arxiv_id: "1502.01852" - abstract: "Rectified activation units (rectifiers) are essential for state-of-the-art\nneural networks. In this work, we study rectifier neural networks for image\nclassification from two aspects. First, we propose a Parametric Rectified\nLinear Unit (PReLU) that generalizes the traditional rectified unit. PReLU\nimproves model fitting with nearly zero extra computational cost and little\noverfitting risk. Second, we derive a robust initialization method that\nparticularly considers the rectifier nonlinearities. This method enables us to\ntrain extremely deep rectified models directly from scratch and to investigate\ndeeper or wider network architectures. Based on our PReLU networks\n(PReLU-nets), we achieve 4.94% top-5 test error on the ImageNet 2012\nclassification dataset. This is a 26% relative improvement over the ILSVRC 2014\nwinner (GoogLeNet, 6.66%). To our knowledge, our result is the first to surpass\nhuman-level performance (5.1%, Russakovsky et al.) on this visual recognition\nchallenge." - pub_date: { - seconds: 1423180800 - } - authors: "Kaiming He" - authors: "Xiangyu Zhang" - authors: "Shaoqing Ren" - authors: "Jian Sun" - repositories: { - url: "https://github.com/phogbinh/handwritten-digit-recognition" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/ihsuy/Train-by-Reconnect" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 8 - description: "Official code for the NeurIPS 2020 paper Train by Reconnect: Decoupling Locations of Weights from Their Values by Yushi Qiu and Reiji Suda." - } - repositories: { - url: "https://github.com/AnzorGozalishvili/autoencoders_playground" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "Playing with several types of autoencoders with supervised, unsupervised and semi-supervised learning objectives." - } - repositories: { - url: "https://github.com/hamiddimyati/dd2424-deep-learning" - framework: FRAMEWORK_OTHERS - description: "All codes and reports for assignments of deep learning course" - } - repositories: { - url: "https://github.com/krish-pinninti/api-ann-python" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/LiamLau1/MLDE" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/MrtnMndt/Rethinking_CNN_Layerwise_Feature_Amounts" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "PyTorch implementation of our paper \"Rethinking Layer-wise Feature Amounts in Convolutional Neural Network Architectures\"" - } - repositories: { - url: "https://github.com/patconrey/ANN-Example" - framework: FRAMEWORK_TENSORFLOW - description: "This is an example script to create, train, and evaluate an artificial neural network." - } - repositories: { - url: "https://github.com/dmbernaal/Daedalus" - framework: FRAMEWORK_PYTORCH - number_of_stars: 13 - description: "Deep Learning Research " - } - repositories: { - url: "https://github.com/LFhase/Research_Navigation" - framework: FRAMEWORK_OTHERS - number_of_stars: 3 - description: "Recordings of my research navigation, including paper/book reading notes and related implementations" - } - methods: { - name: "PReLU" - full_name: "Parameterized ReLU" - description: "A **Parametric Rectified Linear Unit**, or **PReLU**, is an activation function that generalizes the traditional rectified unit with a slope for negative values. Formally:\r\n\r\n$$f\\left(y\\_{i}\\right) = y\\_{i} \\text{ if } y\\_{i} \\ge 0$$\r\n$$f\\left(y\\_{i}\\right) = a\\_{i}y\\_{i} \\text{ if } y\\_{i} \\leq 0$$\r\n\r\nThe intuition is that different layers may require different types of nonlinearity. Indeed the authors find in experiments with convolutional neural networks that PReLus for the initial layer have more positive slopes, i.e. closer to linear. Since the filters of the first layers are Gabor-like filters such as edge or texture detectors, this shows a circumstance where positive and negative responses of filters are respected. In contrast the authors find deeper layers have smaller coefficients, suggesting the model becomes more discriminative at later layers (while it wants to retain more information at earlier layers)." - } - methods: { - name: "PReLU-Net" - full_name: "PReLU-Net" - description: "**PReLU-Net** is a type of convolutional neural network that utilises parameterized ReLUs for its activation function. It also uses a robust initialization scheme - afterwards known as [Kaiming Initialization](https://paperswithcode.com/method/he-initialization) - that accounts for non-linear activation functions." - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Max Pooling" - full_name: "Max Pooling" - description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" - } - methods: { - name: "Weight Decay" - full_name: "Weight Decay" - description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" - } - methods: { - name: "Spatial Pyramid Pooling" - full_name: "Spatial Pyramid Pooling" - description: "** Spatial Pyramid Pooling (SPP)** is a pooling layer that removes the fixed-size constraint of the network, i.e. a CNN does not require a fixed-size input image. Specifically, we add an SPP layer on top of the last convolutional layer. The SPP layer pools the features and generates fixed-length outputs, which are then fed into the fully-connected layers (or other classifiers). In other words, we perform some information aggregation at a deeper stage of the network hierarchy (between convolutional layers and fully-connected layers) to avoid the need for cropping or warping at the beginning." - } - methods: { - name: "Kaiming Initialization" - full_name: "Kaiming Initialization" - description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." - } - methods: { - name: "Dropout" - full_name: "Dropout" - description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." - } - methods: { - name: "Random Resized Crop" - full_name: "Random Resized Crop" - description: "**RandomResizedCrop** is a type of image data augmentation where a crop of random size of the original size and a random aspect ratio of the original aspect ratio is made. This crop is finally resized to given size.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" - } - } - video: { - video_id: "absOinFeGv0" - video_title: "PR-020: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification" - number_of_likes: 14 - number_of_views: 1813 - published_date: { - seconds: 1499002058 - } - uploader: "Jiyang Kang" - } - } -} -pr_id_to_video: { - key: 21 - value: { - papers: { - paper_id: "batch-normalization-accelerating-deep-network" - title: "Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift" - arxiv_id: "1502.03167" - abstract: "Training Deep Neural Networks is complicated by the fact that the\ndistribution of each layer's inputs changes during training, as the parameters\nof the previous layers change. This slows down the training by requiring lower\nlearning rates and careful parameter initialization, and makes it notoriously\nhard to train models with saturating nonlinearities. We refer to this\nphenomenon as internal covariate shift, and address the problem by normalizing\nlayer inputs. Our method draws its strength from making normalization a part of\nthe model architecture and performing the normalization for each training\nmini-batch. Batch Normalization allows us to use much higher learning rates and\nbe less careful about initialization. It also acts as a regularizer, in some\ncases eliminating the need for Dropout. Applied to a state-of-the-art image\nclassification model, Batch Normalization achieves the same accuracy with 14\ntimes fewer training steps, and beats the original model by a significant\nmargin. Using an ensemble of batch-normalized networks, we improve upon the\nbest published result on ImageNet classification: reaching 4.9% top-5\nvalidation error (and 4.8% test error), exceeding the accuracy of human raters." - pub_date: { - seconds: 1423612800 - } - authors: "Sergey Ioffe" - authors: "Christian Szegedy" - repositories: { - url: "https://github.com/tensorflow/models/tree/master/research/deeplab" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70334 - description: "Models and examples built with TensorFlow" - } - repositories: { - url: "https://github.com/simo-bat/Crack_detection" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - } - repositories: { - url: "https://github.com/dodoproptit99/deep-speaker" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Speaker identification with Deep Speaker" - } - repositories: { - url: "https://github.com/tensorflow/models/tree/master/research/slim" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70334 - description: "Models and examples built with TensorFlow" - } - repositories: { - url: "https://github.com/KushajveerSingh/SPADE-PyTorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 22 - description: "PyTorch unofficial implementation of Semantic Image Synthesis with Spatially-Adaptive Normalization paper by Nvidia Research" - } - repositories: { - url: "https://github.com/sayakpaul/Adaptive-Gradient-Clipping" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 57 - description: "Minimal implementation of adaptive gradient clipping (https://arxiv.org/abs/2102.06171) in TensorFlow 2. " - } - repositories: { - url: "https://github.com/osmr/imgclsmob" - framework: FRAMEWORK_OTHERS - number_of_stars: 2200 - description: "Sandbox for training deep learning networks" - } - repositories: { - url: "https://github.com/lab-ml/nn/tree/master/labml_nn/normalization/batch_norm" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3053 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." - } - repositories: { - url: "https://github.com/CPJKU/cca_layer" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 31 - description: "Implementation of Canonical Correlation Analysis Layer for Cross-Modality Retrieval." - } - repositories: { - url: "https://github.com/tensorflow/models/tree/master/research/seq_flow_lite" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70334 - description: "Models and examples built with TensorFlow" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Exponential Decay" - full_name: "Exponential Decay" - description: "**Exponential Decay** is a learning rate schedule where we decay the learning rate with more iterations using an exponential function:\r\n\r\n$$ \\text{lr} = \\text{lr}\\_{0}\\exp\\left(-kt\\right) $$\r\n\r\nImage Credit: [Suki Lau](https://towardsdatascience.com/learning-rate-schedules-and-adaptive-learning-rate-methods-for-deep-learning-2c8f433990d1)" - } - methods: { - name: "Max Pooling" - full_name: "Max Pooling" - description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" - } - methods: { - name: "Weight Decay" - full_name: "Weight Decay" - description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - methods: { - name: "Dropout" - full_name: "Dropout" - description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." - } - methods: { - name: "Batch Normalization" - full_name: "Batch Normalization" - description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." - } - methods: { - name: "Random Resized Crop" - full_name: "Random Resized Crop" - description: "**RandomResizedCrop** is a type of image data augmentation where a crop of random size of the original size and a random aspect ratio of the original aspect ratio is made. This crop is finally resized to given size.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" - } - } - video: { - video_id: "TDx8iZHwFtM" - video_title: "PR-021: Batch Normalization (language: korean)" - number_of_likes: 103 - number_of_views: 8033 - published_date: { - seconds: 1499004604 - } - uploader: "Choung young jae" - } - } -} -pr_id_to_video: { - key: 22 - value: { - papers: { - paper_id: "infogan-interpretable-representation-learning" - title: "InfoGAN: Interpretable Representation Learning by Information Maximizing Generative Adversarial Nets" - arxiv_id: "1606.03657" - abstract: "This paper describes InfoGAN, an information-theoretic extension to the\nGenerative Adversarial Network that is able to learn disentangled\nrepresentations in a completely unsupervised manner. InfoGAN is a generative\nadversarial network that also maximizes the mutual information between a small\nsubset of the latent variables and the observation. We derive a lower bound to\nthe mutual information objective that can be optimized efficiently, and show\nthat our training procedure can be interpreted as a variation of the Wake-Sleep\nalgorithm. Specifically, InfoGAN successfully disentangles writing styles from\ndigit shapes on the MNIST dataset, pose from lighting of 3D rendered images,\nand background digits from the central digit on the SVHN dataset. It also\ndiscovers visual concepts that include hair styles, presence/absence of\neyeglasses, and emotions on the CelebA face dataset. Experiments show that\nInfoGAN learns interpretable representations that are competitive with\nrepresentations learned by existing fully supervised methods." - pub_date: { - seconds: 1465689600 - } - authors: "Xi Chen" - authors: "Yan Duan" - authors: "Rein Houthooft" - authors: "John Schulman" - authors: "Ilya Sutskever" - authors: "Pieter Abbeel" - repositories: { - url: "https://github.com/yashgarg98/GAN" - framework: FRAMEWORK_OTHERS - description: "Some implementations of Generative Adversarial Networks.(DCGAN, InfoGAN)" - } - repositories: { - url: "https://github.com/chandragupta0001/GAN/tree/master/info_gan" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/elingaard/infogan-mnist" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "PyTorch implementation of InfoGAN" - } - repositories: { - url: "https://github.com/amiryanj/socialways" - framework: FRAMEWORK_PYTORCH - number_of_stars: 87 - description: "Social Ways: Learning Multi-Modal Distributions of Pedestrian Trajectories with GANs (CVPR 2019)" - } - repositories: { - url: "https://github.com/Neptune-Trojans/GANs" - framework: FRAMEWORK_TENSORFLOW - description: "Implementation of different GANs architectures" - } - repositories: { - url: "https://github.com/zcemycl/Matlab-GAN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 77 - description: "MATLAB implementations of Generative Adversarial Networks -- from GAN to Pixel2Pixel, CycleGAN" - } - repositories: { - url: "https://github.com/Evavanrooijen/InfoGAN-PyTorch" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/inkplatform/InfoGAN-PyTorch" - framework: FRAMEWORK_PYTORCH - description: "code for InfoGAN" - } - repositories: { - url: "https://github.com/vinoth654321/Casia-Webface" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/bacdavid/InfomaxVAE" - framework: FRAMEWORK_OTHERS - description: "Obtain the latent variables that contain the maximal mutual information." - } - methods: { - name: "Batch Normalization" - full_name: "Batch Normalization" - description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." - } - methods: { - name: "Tanh Activation" - full_name: "Tanh Activation" - description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" - } - methods: { - name: "Feedforward Network" - full_name: "Feedforward Network" - description: "A **Feedforward Network**, or a **Multilayer Perceptron (MLP)**, is a neural network with solely densely connected layers. This is the classic neural network architecture of the literature. It consists of inputs $x$ passed through units $h$ (of which there can be many layers) to predict a target $y$. Activation functions are generally chosen to be non-linear to allow for flexible functional approximation.\r\n\r\nImage Source: Deep Learning, Goodfellow et al" - } - methods: { - name: "InfoGAN" - full_name: "InfoGAN" - description: "**InfoGAN** is a type of generative adversarial network that modifies the GAN objective to\r\nencourage it to learn interpretable and meaningful representations. This is done by maximizing the\r\nmutual information between a fixed small subset of the GAN’s noise variables and the observations.\r\n\r\nFormally, InfoGAN is defined as a minimax game with a variational regularization of mutual information and the hyperparameter $\\lambda$:\r\n\r\n$$ \\min\\_{G, Q}\\max\\_{D}V\\_{INFOGAN}\\left(D, G, Q\\right) = V\\left(D, G\\right) - \\lambda{L}\\_{I}\\left(G, Q\\right) $$\r\n\r\nWhere $Q$ is an auxiliary distribution that approximates the posterior $P\\left(c\\mid{x}\\right)$ - the probability of the latent code $c$ given the data $x$ - and $L\\_{I}$ is the variational lower bound of the mutual information between the latent code and the observations.\r\n\r\nIn the practical implementation, there is another fully-connected layer to output parameters for the conditional distribution $Q$ (negligible computation ontop of regular GAN structures). Q is represented with a softmax non-linearity for a categorical latent code. For a continuous latent code, the authors assume a factored Gaussian." - } - methods: { - name: "Sigmoid Activation" - full_name: "Sigmoid Activation" - description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." - } - methods: { - name: "Leaky ReLU" - full_name: "Leaky ReLU" - description: "**Leaky Rectified Linear Unit**, or **Leaky ReLU**, is a type of activation function based on a [ReLU](https://paperswithcode.com/method/relu), but it has a small slope for negative values instead of a flat slope. The slope coefficient is determined before training, i.e. it is not learnt during training. This type of activation function is popular in tasks where we we may suffer from sparse gradients, for example training generative adversarial networks." - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "GAN" - full_name: "Generative Adversarial Network" - description: "A **GAN**, or **Generative Adversarial Network**, is a generative model that simultaneously trains\r\ntwo models: a generative model $G$ that captures the data distribution, and a discriminative model $D$ that estimates the\r\nprobability that a sample came from the training data rather than $G$.\r\n\r\nThe training procedure for $G$ is to maximize the probability of $D$ making\r\na mistake. This framework corresponds to a minimax two-player game. In the\r\nspace of arbitrary functions $G$ and $D$, a unique solution exists, with $G$\r\nrecovering the training data distribution and $D$ equal to $\\frac{1}{2}$\r\neverywhere. In the case where $G$ and $D$ are defined by multilayer perceptrons,\r\nthe entire system can be trained with backpropagation. \r\n\r\n(Image Source: [here](http://www.kdnuggets.com/2017/01/generative-adversarial-networks-hot-topic-machine-learning.html))" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - } - video: { - video_id: "_4jbgniqt_Q" - video_title: "PR-022: InfoGAN (OpenAI)" - number_of_likes: 42 - number_of_views: 5905 - published_date: { - seconds: 1499608297 - } - uploader: "차준범" - } - } -} -pr_id_to_video: { - key: 23 - value: { - papers: { - paper_id: "yolo9000-better-faster-stronger" - title: "YOLO9000: Better, Faster, Stronger" - arxiv_id: "1612.08242" - abstract: "We introduce YOLO9000, a state-of-the-art, real-time object detection system\nthat can detect over 9000 object categories. First we propose various\nimprovements to the YOLO detection method, both novel and drawn from prior\nwork. The improved model, YOLOv2, is state-of-the-art on standard detection\ntasks like PASCAL VOC and COCO. At 67 FPS, YOLOv2 gets 76.8 mAP on VOC 2007. At\n40 FPS, YOLOv2 gets 78.6 mAP, outperforming state-of-the-art methods like\nFaster RCNN with ResNet and SSD while still running significantly faster.\nFinally we propose a method to jointly train on object detection and\nclassification. Using this method we train YOLO9000 simultaneously on the COCO\ndetection dataset and the ImageNet classification dataset. Our joint training\nallows YOLO9000 to predict detections for object classes that don't have\nlabelled detection data. We validate our approach on the ImageNet detection\ntask. YOLO9000 gets 19.7 mAP on the ImageNet detection validation set despite\nonly having detection data for 44 of the 200 classes. On the 156 classes not in\nCOCO, YOLO9000 gets 16.0 mAP. But YOLO can detect more than just 200 classes;\nit predicts detections for more than 9000 different object categories. And it\nstill runs in real-time." - pub_date: { - seconds: 1482624000 - } - authors: "Joseph Redmon" - authors: "Ali Farhadi" - repositories: { - url: "https://github.com/Qengineering/YoloV2-ncnn-Jetson-Nano" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - } - repositories: { - url: "https://github.com/Qengineering/YoloV2-ncnn-Raspberry-Pi-4" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "YoloV2 for bare Raspberry Pi 4" - } - repositories: { - url: "https://github.com/benjamintli/darknet-gun-detector" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - } - repositories: { - url: "https://github.com/DavianYang/yolo.ai" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Yolo Implementation (v1, v2, v3)" - } - repositories: { - url: "https://github.com/preste-nakam/AI_whiteboard" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 6 - description: "The system which helps to transform any wall or flat surface into an interactive whiteboard just with an ordinary RGB camera and a hand! " - } - repositories: { - url: "https://github.com/preste-ai/camera_ai_whiteboard" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 8 - description: "Transform any wall to an intelligent whiteboard" - } - repositories: { - url: "https://gitlab.com/eavise/lightnet" - framework: FRAMEWORK_PYTORCH - number_of_stars: 56 - description: "Darknet got illuminated by PyTorch ~ Meet Lightnet" - } - repositories: { - url: "https://github.com/drscotthawley/SPNet" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Object detection for ESPI images of oscillating steelpan drums" - } - repositories: { - url: "https://github.com/Vijayabhaskar96/Object-Detection-Algorithms" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - description: "My Implementations of Popular Object detection algorithms in Pytorch." - } - repositories: { - url: "https://github.com/Maskify/darknet" - framework: FRAMEWORK_TENSORFLOW - } - methods: { - name: "Weight Decay" - full_name: "Weight Decay" - description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" - } - methods: { - name: "SSD" - full_name: "SSD" - description: "**SSD** is a single-stage object detection method that discretizes the output space of bounding boxes into a set of default boxes over different aspect ratios and scales per feature map location. At prediction time, the network generates scores for the presence of each object category in each default box and produces adjustments to the box to better match the object shape. Additionally, the network combines predictions from multiple feature maps with different resolutions to naturally handle objects of various sizes. \r\n\r\nThe fundamental improvement in speed comes from eliminating bounding box proposals and the subsequent pixel or feature resampling stage. Improvements over competing single-stage methods include using a small convolutional filter to predict object categories and offsets in bounding box locations, using separate predictors (filters) for different aspect ratio detections, and applying these filters to multiple feature maps from the later stages of a network in order to perform detection at multiple scales." - } - methods: { - name: "Fast-YOLOv2" - full_name: "Fast-YOLOv2" - } - methods: { - name: "Random Resized Crop" - full_name: "Random Resized Crop" - description: "**RandomResizedCrop** is a type of image data augmentation where a crop of random size of the original size and a random aspect ratio of the original aspect ratio is made. This crop is finally resized to given size.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" - } - methods: { - name: "Darknet-19" - full_name: "Darknet-19" - description: "**Darknet-19** is a convolutional neural network that is used as the backbone of [YOLOv2](https://paperswithcode.com/method/yolov2). Similar to the [VGG](https://paperswithcode.com/method/vgg) models it mostly uses $3 \\times 3$ filters and doubles the number of channels after every pooling step. Following the work on Network in Network (NIN) it uses global average pooling to make predictions as well as $1 \\times 1$ filters to compress the feature representation between $3 \\times 3$ convolutions. Batch Normalization is used to stabilize training, speed up convergence, and regularize the model batch." - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "Non Maximum Suppression" - full_name: "Non Maximum Suppression" - description: "**Non Maximum Suppression** is a computer vision method that selects a single entity out of many overlapping entities (for example bounding boxes in object detection). The criteria is usually discarding entities that are below a given probability bound. With remaining entities we repeatedly pick the entity with the highest probability, output that as the prediction, and discard any remaining box where a $\\text{IoU} \\geq 0.5$ with the box output in the previous step.\r\n\r\nImage Credit: [Martin Kersner](https://github.com/martinkersner/non-maximum-suppression-cpp)" - } - methods: { - name: "ColorJitter" - full_name: "Color Jitter" - description: "**ColorJitter** is a type of image data augmentation where we randomly change the brightness, contrast and saturation of an image.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" - } - methods: { - name: "SGD with Momentum" - full_name: "SGD with Momentum" - description: "**SGD with Momentum** is a stochastic optimization method that adds a momentum term to regular stochastic gradient descent:\r\n\r\n$$v\\_{t} = \\gamma{v}\\_{t-1} + \\eta\\nabla\\_{\\theta}J\\left(\\theta\\right)$$\r\n$$\\theta\\_{t} = \\theta\\_{t-1} - v\\_{t} $$\r\n\r\nA typical value for $\\gamma$ is $0.9$. The momentum name comes from an analogy to physics, such as ball accelerating down a slope. In the case of weight updates, we can think of the weights as a particle traveling through parameter space which incurs acceleration from the gradient of the loss.\r\n\r\nImage Source: [Juan Du](https://www.researchgate.net/figure/The-compare-of-the-SGD-algorithms-with-and-without-momentum-Take-Task-1-as-example-The_fig1_333469047)" - } - methods: { - name: "Polynomial Rate Decay" - full_name: "Polynomial Rate Decay" - description: "**Polynomial Rate Decay** is a learning rate schedule where we polynomially decay the learning rate." - } - } - video: { - video_id: "6fdclSGgeio" - video_title: "PR-023: YOLO9000: Better, Faster, Stronger" - number_of_likes: 95 - number_of_views: 12509 - published_date: { - seconds: 1500299473 - } - uploader: "JinWon Lee" - } - } -} -pr_id_to_video: { - key: 24 - value: { - papers: { - paper_id: "pixel-recurrent-neural-networks" - title: "Pixel Recurrent Neural Networks" - arxiv_id: "1601.06759" - abstract: "Modeling the distribution of natural images is a landmark problem in\nunsupervised learning. This task requires an image model that is at once\nexpressive, tractable and scalable. We present a deep neural network that\nsequentially predicts the pixels in an image along the two spatial dimensions.\nOur method models the discrete probability of the raw pixel values and encodes\nthe complete set of dependencies in the image. Architectural novelties include\nfast two-dimensional recurrent layers and an effective use of residual\nconnections in deep recurrent networks. We achieve log-likelihood scores on\nnatural images that are considerably better than the previous state of the art.\nOur main results also provide benchmarks on the diverse ImageNet dataset.\nSamples generated from the model appear crisp, varied and globally coherent." - pub_date: { - seconds: 1453680000 - } - authors: "Aaron van den Oord" - authors: "Nal Kalchbrenner" - authors: "Koray Kavukcuoglu" - repositories: { - url: "https://github.com/EugenHotaj/pytorch-generative/blob/master/pytorch_generative/models/autoregressive/pixel_cnn.py" - framework: FRAMEWORK_PYTORCH - number_of_stars: 142 - description: "Easy generative modeling in PyTorch." - } - repositories: { - url: "https://github.com/kamenbliznashki/pixel_models" - framework: FRAMEWORK_PYTORCH - number_of_stars: 18 - description: "Pytorch implementations of autoregressive pixel models - PixelCNN, PixelCNN++, PixelSNAIL" - } - repositories: { - url: "https://github.com/eyalbetzalel/pytorch-generative-v6" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/eyalbetzalel/pytorch-generative-v2" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/davidemartinelli/PixelCNN" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/doiodl/pixelcnn-rnn" - framework: FRAMEWORK_TENSORFLOW - description: "Реализация генеративных сетей PixelCNN и PixelRNN по оф. статье:https://arxiv.org/pdf/1601.06759.pdf . Стэк технологий: python, tensorflow и keras. Весь код был написан на google colab с tf 2.0" - } - repositories: { - url: "https://github.com/eyalbetzalel/pytorch-generative" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/tccnchsu/Artifical_Intelegent" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/vocong25/gated_pixelcnn" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/arcelien/hawc-deep-learning" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - description: "Reproducing physics simulations on HAWC data with deep learning" - } - methods: { - name: "Masked Convolution" - full_name: "Masked Convolution" - description: "A **Masked Convolution** is a type of convolution which masks certain pixels so that the model can only predict based on pixels already seen. This type of convolution was introduced with PixelRNN generative models, where an image is generated pixel by pixel, to ensure that the model was conditional only on pixels already visited." - } - methods: { - name: "Tanh Activation" - full_name: "Tanh Activation" - description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" - } - methods: { - name: "LSTM" - full_name: "Long Short-Term Memory" - description: "An **LSTM** is a type of [recurrent neural network](https://paperswithcode.com/methods/category/recurrent-neural-networks) that addresses the vanishing gradient problem in vanilla RNNs through additional cells, input and output gates. Intuitively, vanishing gradients are solved through additional *additive* components, and forget gate activations, that allow the gradients to flow through the network without vanishing as quickly.\r\n\r\n(Image Source [here](https://medium.com/datadriveninvestor/how-do-lstm-networks-solve-the-problem-of-vanishing-gradients-a6784971a577))\r\n\r\n(Introduced by Hochreiter and Schmidhuber)" - } - methods: { - name: "Sigmoid Activation" - full_name: "Sigmoid Activation" - description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." - } - methods: { - name: "PixelRNN" - full_name: "Pixel Recurrent Neural Network" - description: "**PixelRNNs** are generative neural networks that sequentially predicts the pixels in an image along the two spatial dimensions. They model the discrete probability of the raw pixel values and encode the complete set of dependencies in the image. Variants include the Row LSTM and the Diagonal BiLSTM, that scale more easily to larger datasets. Pixel values are treated as discrete random variables by using a softmax layer in the conditional distributions. Masked convolutions are employed to allow PixelRNNs to model full dependencies between the color channels." - } - } - video: { - video_id: "BvcwEz4VPIQ" - video_title: "PR-024: Pixel Recurrent Neural Network" - number_of_likes: 49 - number_of_views: 5536 - published_date: { - seconds: 1502156580 - } - uploader: "Jaejun Yoo" - } - } -} -pr_id_to_video: { - key: 25 - value: { - papers: { - paper_id: "online-sensor-hallucination-via-knowledge" - title: "Online Sensor Hallucination via Knowledge Distillation for Multimodal Image Classification" - arxiv_id: "1908.10559" - abstract: "We deal with the problem of information fusion driven satellite image/scene classification and propose a generic hallucination architecture considering that all the available sensor information are present during training while some of the image modalities may be absent while testing. It is well-known that different sensors are capable of capturing complementary information for a given geographical area and a classification module incorporating information from all the sources are expected to produce an improved performance as compared to considering only a subset of the modalities. However, the classical classifier systems inherently require all the features used to train the module to be present for the test instances as well, which may not always be possible for typical remote sensing applications (say, disaster management). As a remedy, we provide a robust solution in terms of a hallucination module that can approximate the missing modalities from the available ones during the decision-making stage. In order to ensure better knowledge transfer during modality hallucination, we explicitly incorporate concepts of knowledge distillation for the purpose of exploring the privileged (side) information in our framework and subsequently introduce an intuitive modular training approach. The proposed network is evaluated extensively on a large-scale corpus of PAN-MS image pairs (scene recognition) as well as on a benchmark hyperspectral image dataset (image classification) where we follow different experimental scenarios and find that the proposed hallucination based module indeed is capable of capturing the multi-source information, albeit the explicit absence of some of the sensor information, and aid in improved scene characterization." - pub_date: { - seconds: 1566950400 - } - authors: "Saurabh Kumar" - authors: "Biplab Banerjee" - authors: "Subhasis Chaudhuri" - } - video: { - video_id: "KdRo7ATNs9g" - video_title: "PR-025: Learning with side information through modality hallucination (2016)" - number_of_likes: 18 - number_of_views: 1823 - published_date: { - seconds: 1500818803 - } - uploader: "Terry TaeWoong Um" - } - } -} -pr_id_to_video: { - key: 26 - value: { - papers: { - paper_id: "u-net-convolutional-networks-for-biomedical" - title: "U-Net: Convolutional Networks for Biomedical Image Segmentation" - arxiv_id: "1505.04597" - abstract: "There is large consent that successful training of deep networks requires\nmany thousand annotated training samples. In this paper, we present a network\nand training strategy that relies on the strong use of data augmentation to use\nthe available annotated samples more efficiently. The architecture consists of\na contracting path to capture context and a symmetric expanding path that\nenables precise localization. We show that such a network can be trained\nend-to-end from very few images and outperforms the prior best method (a\nsliding-window convolutional network) on the ISBI challenge for segmentation of\nneuronal structures in electron microscopic stacks. Using the same network\ntrained on transmitted light microscopy images (phase contrast and DIC) we won\nthe ISBI cell tracking challenge 2015 in these categories by a large margin.\nMoreover, the network is fast. Segmentation of a 512x512 image takes less than\na second on a recent GPU. The full implementation (based on Caffe) and the\ntrained networks are available at\nhttp://lmb.informatik.uni-freiburg.de/people/ronneber/u-net ." - pub_date: { - seconds: 1431907200 - } - authors: "Olaf Ronneberger" - authors: "Philipp Fischer" - authors: "Thomas Brox" - repositories: { - url: "https://github.com/mateuszbuda/brain-segmentation-pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 408 - description: "U-Net implementation in PyTorch for FLAIR abnormality segmentation in brain MRI" - } - repositories: { - url: "https://github.com/taha7ussein007/Papers_Implementation/tree/main/Paper_Implementation_From_Scratch/UNet_FromScratch_Pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "In this repo i'm going to practice implementing research, academic and business papers." - } - repositories: { - url: "https://github.com/ayushdabra/dubai-satellite-imagery-segmentation" - framework: FRAMEWORK_TENSORFLOW - description: "Multi-Class Semantic Segmentation on Dubai's Satellite Images." - } - repositories: { - url: "https://github.com/SahinTiryaki/Brain-tumor-segmentation-Vgg19UNet" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Brain tumor segmentation was performed using the Tensorflow Keras api." - } - repositories: { - url: "https://github.com/sagnik1511/U-Net-Reduced-with-keras" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 9 - description: "Complete U-net Implementation with keras" - } - repositories: { - url: "https://github.com/osmr/imgclsmob" - framework: FRAMEWORK_OTHERS - number_of_stars: 2200 - description: "Sandbox for training deep learning networks" - } - repositories: { - url: "https://github.com/creeper121386/vielab" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/zongyue-lu/pytorch-unet-family" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - } - repositories: { - url: "https://github.com/Aryavir07/Detecting-Brain-Tumor-Using-Deep-Learning" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Detecting Brain Tumor using Res-U-Net architecture. This would reduce the cost of cancer diagnosis and help in the early diagnosis of tumors which would essentially be a life saver." - } - repositories: { - url: "https://github.com/zarif101/blood_cell_segmentation_unet" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "A Tensorflow/Keras project to segment white blood cells from microscope images!" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Max Pooling" - full_name: "Max Pooling" - description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - methods: { - name: "U-Net" - full_name: "U-Net" - description: "**U-Net** is an architecture for semantic segmentation. It consists of a contracting path and an expansive path. The contracting path follows the typical architecture of a convolutional network. It consists of the repeated application of two 3x3 convolutions (unpadded convolutions), each followed by a rectified linear unit (ReLU) and a 2x2 max pooling operation with stride 2 for downsampling. At each downsampling step we double the number of feature channels. Every step in the expansive path consists of an upsampling of the feature map followed by a 2x2 convolution (“up-convolution”) that halves the number of feature channels, a concatenation with the correspondingly cropped feature map from the contracting path, and two 3x3 convolutions, each followed by a ReLU. The cropping is necessary due to the loss of border pixels in every convolution. At the final layer a 1x1 convolution is used to map each 64-component feature vector to the desired number of classes. In total the network has 23 convolutional layers." - } - methods: { - name: "Concatenated Skip Connection" - full_name: "Concatenated Skip Connection" - description: "A **Concatenated Skip Connection** is a type of skip connection that seeks to reuse features by concatenating them to new layers, allowing more information to be retained from previous layers of the network. This contrasts with say, residual connections, where element-wise summation is used instead to incorporate information from previous layers. This type of skip connection is prominently used in DenseNets (and also Inception networks), which the Figure to the right illustrates." - } - } - video: { - video_id: "ZdPBkPGfRSk" - video_title: "PR-026: Notes for CVPR Machine Learning Session" - number_of_likes: 9 - number_of_views: 1502 - published_date: { - seconds: 1501469470 - } - uploader: "Taegyun Jeon" - } - } -} -pr_id_to_video: { - key: 27 - value: { - papers: { - paper_id: "linking-glove-with-word2vec" - title: "Linking GloVe with word2vec" - arxiv_id: "1411.5595" - abstract: "The Global Vectors for word representation (GloVe), introduced by Jeffrey\nPennington et al. is reported to be an efficient and effective method for\nlearning vector representations of words. State-of-the-art performance is also\nprovided by skip-gram with negative-sampling (SGNS) implemented in the word2vec\ntool. In this note, we explain the similarities between the training objectives\nof the two models, and show that the objective of SGNS is similar to the\nobjective of a specialized form of GloVe, though their cost functions are\ndefined differently." - pub_date: { - seconds: 1416441600 - } - authors: "Tianze Shi" - authors: "Zhiyuan Liu" - methods: { - name: "GloVe" - full_name: "GloVe Embeddings" - description: "**GloVe Embeddings** are a type of word embedding that encode the co-occurrence probability ratio between two words as vector differences. GloVe uses a weighted least squares objective $J$ that minimizes the difference between the dot product of the vectors of two words and the logarithm of their number of co-occurrences:\r\n\r\n$$ J=\\sum\\_{i, j=1}^{V}f\\left(𝑋\\_{i j}\\right)(w^{T}\\_{i}\\tilde{w}_{j} + b\\_{i} + \\tilde{b}\\_{j} - \\log{𝑋}\\_{ij})^{2} $$\r\n\r\nwhere $w\\_{i}$ and $b\\_{i}$ are the word vector and bias respectively of word $i$, $\\tilde{w}_{j}$ and $b\\_{j}$ are the context word vector and bias respectively of word $k$, $X\\_{ij}$ is the number of times word $i$ occurs in the context of word $j$, and $f$ is a weighting function that assigns lower weights to rare and frequent co-occurrences." - } - } - video: { - video_id: "uZ2GtEe-50E" - video_title: "PR-027:GloVe - Global vectors for word representation" - number_of_likes: 65 - number_of_views: 4316 - published_date: { - seconds: 1502026123 - } - uploader: "keun bong Kwak" - } - } -} -pr_id_to_video: { - key: 28 - value: { - papers: { - paper_id: "densely-connected-convolutional-networks" - title: "Densely Connected Convolutional Networks" - arxiv_id: "1608.06993" - abstract: "Recent work has shown that convolutional networks can be substantially\ndeeper, more accurate, and efficient to train if they contain shorter\nconnections between layers close to the input and those close to the output. In\nthis paper, we embrace this observation and introduce the Dense Convolutional\nNetwork (DenseNet), which connects each layer to every other layer in a\nfeed-forward fashion. Whereas traditional convolutional networks with L layers\nhave L connections - one between each layer and its subsequent layer - our\nnetwork has L(L+1)/2 direct connections. For each layer, the feature-maps of\nall preceding layers are used as inputs, and its own feature-maps are used as\ninputs into all subsequent layers. DenseNets have several compelling\nadvantages: they alleviate the vanishing-gradient problem, strengthen feature\npropagation, encourage feature reuse, and substantially reduce the number of\nparameters. We evaluate our proposed architecture on four highly competitive\nobject recognition benchmark tasks (CIFAR-10, CIFAR-100, SVHN, and ImageNet).\nDenseNets obtain significant improvements over the state-of-the-art on most of\nthem, whilst requiring less computation to achieve high performance. Code and\npre-trained models are available at https://github.com/liuzhuang13/DenseNet ." - pub_date: { - seconds: 1472083200 - } - authors: "Gao Huang" - authors: "Zhuang Liu" - authors: "Laurens van der Maaten" - authors: "Kilian Q. Weinberger" - repositories: { - url: "https://github.com/Duplums/bhb10k-dl-benchmark" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "A Reproducible Benchmark for CNN Models on the BHB-10K Dataset" - } - repositories: { - url: "https://github.com/priyavrat-misra/xrays-and-gradcam" - framework: FRAMEWORK_PYTORCH - number_of_stars: 14 - description: "Classification and Gradient-based Localization of Chest Radiographs using PyTorch." - } - repositories: { - url: "https://github.com/cmasch/densenet" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 38 - description: "Implementation of Densely Connected Convolutional Network with Keras and TensorFlow." - } - repositories: { - url: "https://github.com/lpirola13/flower-recognizer" - framework: FRAMEWORK_TENSORFLOW - description: "This project aims to create a deep learning model suitable in a mobile context that can recognize flowers from images." - } - repositories: { - url: "https://github.com/bozliu/E2E-Keyword-Spotting" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Wake-Up Keyword Detection With End To End Deep Neural Networks" - } - repositories: { - url: "https://github.com/pytorch/vision" - framework: FRAMEWORK_PYTORCH - number_of_stars: 9293 - description: "Datasets, Transforms and Models specific to Computer Vision" - } - repositories: { - url: "https://github.com/lpirola13/flower_recognizer" - framework: FRAMEWORK_TENSORFLOW - description: "This project aims to create a deep learning model suitable in a mobile context that can recognize flowers from images." - } - repositories: { - url: "https://github.com/osmr/imgclsmob" - framework: FRAMEWORK_OTHERS - number_of_stars: 2200 - description: "Sandbox for training deep learning networks" - } - repositories: { - url: "https://github.com/niranjana98/Image-Classification" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/PaddlePaddle/PaddleClas" - framework: FRAMEWORK_OTHERS - number_of_stars: 1302 - description: "A treasure chest for visual recognition powered by PaddlePaddle" - } - methods: { - name: "Nesterov Accelerated Gradient" - full_name: "Nesterov Accelerated Gradient" - description: "**Nesterov Accelerated Gradient** is a momentum-based SGD optimizer that \"looks ahead\" to where the parameters will be to calculate the gradient **ex post** rather than **ex ante**:\r\n\r\n$$ v\\_{t} = \\gamma{v}\\_{t-1} + \\eta\\nabla\\_{\\theta}J\\left(\\theta-\\gamma{v\\_{t-1}}\\right) $$\r\n$$\\theta\\_{t} = \\theta\\_{t-1} + v\\_{t}$$\r\n\r\nLike SGD with momentum $\\gamma$ is usually set to $0.9$.\r\n\r\nThe intuition is that the [standard momentum](https://paperswithcode.com/method/sgd-with-momentum) method first computes the gradient at the current location and then takes a big jump in the direction of the updated accumulated gradient. In contrast Nesterov momentum first makes a big jump in the direction of the previous accumulated gradient and then measures the gradient where it ends up and makes a correction. The idea being that it is better to correct a mistake after you have made it. \r\n\r\nImage Source: [Geoff Hinton lecture notes](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)" - } - methods: { - name: "Average Pooling" - full_name: "Average Pooling" - description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." - } - methods: { - name: "Concatenated Skip Connection" - full_name: "Concatenated Skip Connection" - description: "A **Concatenated Skip Connection** is a type of skip connection that seeks to reuse features by concatenating them to new layers, allowing more information to be retained from previous layers of the network. This contrasts with say, residual connections, where element-wise summation is used instead to incorporate information from previous layers. This type of skip connection is prominently used in DenseNets (and also Inception networks), which the Figure to the right illustrates." - } - methods: { - name: "DenseNet" - full_name: "DenseNet" - description: "A **DenseNet** is a type of convolutional neural network that utilises dense connections between layers, through [Dense Blocks](http://www.paperswithcode.com/method/dense-block), where we connect *all layers* (with matching feature-map sizes) directly with each other. To preserve the feed-forward nature, each layer obtains additional inputs from all preceding layers and passes on its own feature-maps to all subsequent layers." - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Max Pooling" - full_name: "Max Pooling" - description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" - } - methods: { - name: "Weight Decay" - full_name: "Weight Decay" - description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" - } - } - video: { - video_id: "fe2Vn0mwALI" - video_title: "PR-028: Densely Connected Convolutional Networks (CVPR 2017, Best Paper Award) by Gao Huang et al." - number_of_likes: 124 - number_of_views: 12472 - published_date: { - seconds: 1502159004 - } - uploader: "Sung Kim" - } - } -} -pr_id_to_video: { - key: 29 - value: { - papers: { - paper_id: "apprenticeship-learning-using-inverse" - title: "Apprenticeship Learning using Inverse Reinforcement Learning and Gradient Methods" - arxiv_id: "1206.5264" - abstract: "In this paper we propose a novel gradient algorithm to learn a policy from an expert's observed behavior assuming that the expert behaves optimally with respect to some unknown reward function of a Markovian Decision Problem. The algorithm's aim is to find a reward function such that the resulting optimal policy matches well the expert's observed behavior. The main difficulty is that the mapping from the parameters to policies is both nonsmooth and highly redundant. Resorting to subdifferentials solves the first difficulty, while the second one is over- come by computing natural gradients. We tested the proposed method in two artificial domains and found it to be more reliable and efficient than some previous methods." - pub_date: { - seconds: 1340150400 - } - authors: "Gergely Neu" - authors: "Csaba Szepesvari" - } - video: { - video_id: "AXi4s3aFN6M" - video_title: "PR-029: Apprenticeship Learning via Inverse Reinforcement Learning" - number_of_likes: 17 - number_of_views: 2086 - published_date: { - seconds: 1505165154 - } - uploader: "Suh Kiho" - } - } -} -pr_id_to_video: { - key: 30 - value: { - papers: { - paper_id: "photo-realistic-single-image-super-resolution" - title: "Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network" - arxiv_id: "1609.04802" - abstract: "Despite the breakthroughs in accuracy and speed of single image\nsuper-resolution using faster and deeper convolutional neural networks, one\ncentral problem remains largely unsolved: how do we recover the finer texture\ndetails when we super-resolve at large upscaling factors? The behavior of\noptimization-based super-resolution methods is principally driven by the choice\nof the objective function. Recent work has largely focused on minimizing the\nmean squared reconstruction error. The resulting estimates have high peak\nsignal-to-noise ratios, but they are often lacking high-frequency details and\nare perceptually unsatisfying in the sense that they fail to match the fidelity\nexpected at the higher resolution. In this paper, we present SRGAN, a\ngenerative adversarial network (GAN) for image super-resolution (SR). To our\nknowledge, it is the first framework capable of inferring photo-realistic\nnatural images for 4x upscaling factors. To achieve this, we propose a\nperceptual loss function which consists of an adversarial loss and a content\nloss. The adversarial loss pushes our solution to the natural image manifold\nusing a discriminator network that is trained to differentiate between the\nsuper-resolved images and original photo-realistic images. In addition, we use\na content loss motivated by perceptual similarity instead of similarity in\npixel space. Our deep residual network is able to recover photo-realistic\ntextures from heavily downsampled images on public benchmarks. An extensive\nmean-opinion-score (MOS) test shows hugely significant gains in perceptual\nquality using SRGAN. The MOS scores obtained with SRGAN are closer to those of\nthe original high-resolution images than to those obtained with any\nstate-of-the-art method." - pub_date: { - seconds: 1473897600 - } - authors: "Christian Ledig" - authors: "Lucas Theis" - authors: "Ferenc Huszar" - authors: "Jose Caballero" - authors: "Andrew Cunningham" - authors: "Alejandro Acosta" - authors: "Andrew Aitken" - authors: "Alykhan Tejani" - authors: "Johannes Totz" - authors: "Zehan Wang" - authors: "Wenzhe Shi" - repositories: { - url: "https://github.com/chaoxu0512/Pushbroom-satellite-image-SRGAN" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/omkarghugarkar007/Neural_Super_Sampling" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "An attempt to upsample images by a factor of 4 using GAN" - } - repositories: { - url: "https://github.com/AntonioAlgaida/Edge.SRGAN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "A solution of SISR that merges the ideas of SRGAN and Edge Informed SISR. This solution was presented on 1st SpainAI hackathon obtain 4th position." - } - repositories: { - url: "https://github.com/Idelcads/Super_Resolution_overview" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/Idelcads/IMKI_Technical_test" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/akanametov/SuperResolution" - framework: FRAMEWORK_PYTORCH - description: "A SuperResolution GAN trained on STL10 dataset" - } - repositories: { - url: "https://github.com/akanametov/Pix2Pix" - framework: FRAMEWORK_PYTORCH - description: "A Pix2Pix GAN trained on Facades dataset" - } - repositories: { - url: "https://github.com/TanyaChutani/Image-Super-Resolution-SRGAN-TF2.0" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "A Tensorflow2.0 implementation of Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network" - } - repositories: { - url: "https://github.com/BearNinja123/esrGAN_vBearNinja123" - framework: FRAMEWORK_TENSORFLOW - description: "My implementation of the srGAN and esrGAN models." - } - repositories: { - url: "https://github.com/wkhademi/ImageEnhancement" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 37 - description: "Various models for handling underexposure, overexposure, super-resolution, shadow removal, etc." - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "VGG" - full_name: "VGG" - description: "**VGG** is a classical convolutional neural network architecture. It was based on an analysis of how to increase the depth of such networks. The network utilises small 3 x 3 filters. Otherwise the network is characterized by its simplicity: the only other components being pooling layers and a fully connected layer.\r\n\r\nImage: [Davi Frossard](https://www.cs.toronto.edu/frossard/post/vgg16/)" - } - methods: { - name: "SRGAN Residual Block" - full_name: "SRGAN Residual Block" - description: "**SRGAN Residual Block** is a residual block used in the [SRGAN](https://paperswithcode.com/method/srgan#) generator for image super-resolution. It is similar to standard [residual blocks](https://paperswithcode.com/method/residual-block), although it uses a [PReLU](https://paperswithcode.com/method/prelu) activation function to help training (preventing sparse gradients during GAN training)." - } - methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." - } - methods: { - name: "VGG Loss" - full_name: "VGG Loss" - description: "**VGG Loss** is a type of content loss intorduced in the [Perceptual Losses for Real-Time Style Transfer and Super-Resolution](https://paperswithcode.com/paper/perceptual-losses-for-real-time-style) super-resolution and style transfer framework. It is an alternative to pixel-wise losses; VGG Loss attempts to be closer to perceptual similarity. The VGG loss is based on the ReLU activation layers of the pre-trained 19 layer VGG network. With $\\phi\\_{i,j}$ we indicate the feature map obtained by the $j$-th convolution (after activation) before the $i$-th maxpooling layer within the VGG19 network, which we consider given. We then define the VGG loss as the euclidean distance between the feature representations of a reconstructed image $G\\_{\\theta\\_{G}}\\left(I^{LR}\\right)$ and the reference image $I^{HR}$:\r\n\r\n$$ l\\_{VGG/i.j} = \\frac{1}{W\\_{i,j}H\\_{i,j}}\\sum\\_{x=1}^{W\\_{i,j}}\\sum\\_{y=1}^{H\\_{i,j}}\\left(\\phi\\_{i,j}\\left(I^{HR}\\right)\\_{x, y} - \\phi\\_{i,j}\\left(G\\_{\\theta\\_{G}}\\left(I^{LR}\\right)\\right)\\_{x, y}\\right)^{2}$$ \r\n\r\nHere $W\\_{i,j}$ and $H\\_{i,j}$ describe the dimensions of the respective feature maps within the VGG network." - } - methods: { - name: "Dropout" - full_name: "Dropout" - description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." - } - methods: { - name: "Batch Normalization" - full_name: "Batch Normalization" - description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." - } - methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." - } - methods: { - name: "SRGAN" - full_name: "SRGAN" - description: "**SRGAN** is a generative adversarial network for single image super-resolution. It uses a perceptual loss function which consists of an adversarial loss and a content loss. The adversarial loss pushes the solution to the natural image manifold using a discriminator network that is trained to differentiate between the super-resolved images and original photo-realistic images. In addition, the authors use a content loss motivated by perceptual similarity instead of similarity in pixel space. The actual networks - depicted in the Figure to the right - consist mainly of residual blocks for feature extraction.\r\n\r\nFormally we write the perceptual loss function as a weighted sum of a (VGG) content loss $l^{SR}\\_{X}$ and an adversarial loss component $l^{SR}\\_{Gen}$:\r\n\r\n$$ l^{SR} = l^{SR}\\_{X} + 10^{-3}l^{SR}\\_{Gen} $$" - } - } - video: { - video_id: "nGPMKnoJTcI" - video_title: "PR-030: Photo-Realistic Single Image Super Resolution Using a Generative Adversarial Network" - number_of_likes: 24 - number_of_views: 2797 - published_date: { - seconds: 1502636018 - } - uploader: "Seungil Kim" - } - } -} -pr_id_to_video: { - key: 31 - value: { - papers: { - paper_id: "learning-to-learn-by-gradient-descent-by" - title: "Learning to learn by gradient descent by gradient descent" - arxiv_id: "1606.04474" - abstract: "The move from hand-designed features to learned features in machine learning\nhas been wildly successful. In spite of this, optimization algorithms are still\ndesigned by hand. In this paper we show how the design of an optimization\nalgorithm can be cast as a learning problem, allowing the algorithm to learn to\nexploit structure in the problems of interest in an automatic way. Our learned\nalgorithms, implemented by LSTMs, outperform generic, hand-designed competitors\non the tasks for which they are trained, and also generalize well to new tasks\nwith similar structure. We demonstrate this on a number of tasks, including\nsimple convex problems, training neural networks, and styling images with\nneural art." - pub_date: { - seconds: 1465862400 - } - authors: "Marcin Andrychowicz" - authors: "Misha Denil" - authors: "Sergio Gomez" - authors: "Matthew W. Hoffman" - authors: "David Pfau" - authors: "Tom Schaul" - authors: "Brendan Shillingford" - authors: "Nando de Freitas" - repositories: { - is_official: true - url: "https://github.com/deepmind/learning-to-learn" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4021 - description: "Learning to Learn in TensorFlow" - } - repositories: { - url: "https://github.com/chenwydj/learning-to-learn-by-gradient-descent-by-gradient-descent" - framework: FRAMEWORK_PYTORCH - number_of_stars: 31 - description: "Pytorch version of NIPS'16 \"Learning to learn by gradient descent by gradient descent\"" - } - repositories: { - url: "https://github.com/yangsenius/learning-to-learn-by-pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 35 - description: "\"Learning to learn by gradient descent by gradient descent \"by PyTorch -- a simple re-implementation." - } - } - video: { - video_id: "p55H46RiZ6k" - video_title: "PR-031: Learning to learn by gradient descent by gradient descent" - number_of_likes: 16 - number_of_views: 2373 - published_date: { - seconds: 1504453983 - } - uploader: "차준범" - } - } -} -pr_id_to_video: { - key: 32 - value: { - papers: { - paper_id: "deep-visual-semantic-alignments-for" - title: "Deep Visual-Semantic Alignments for Generating Image Descriptions" - arxiv_id: "1412.2306" - abstract: "We present a model that generates natural language descriptions of images and\ntheir regions. Our approach leverages datasets of images and their sentence\ndescriptions to learn about the inter-modal correspondences between language\nand visual data. Our alignment model is based on a novel combination of\nConvolutional Neural Networks over image regions, bidirectional Recurrent\nNeural Networks over sentences, and a structured objective that aligns the two\nmodalities through a multimodal embedding. We then describe a Multimodal\nRecurrent Neural Network architecture that uses the inferred alignments to\nlearn to generate novel descriptions of image regions. We demonstrate that our\nalignment model produces state of the art results in retrieval experiments on\nFlickr8K, Flickr30K and MSCOCO datasets. We then show that the generated\ndescriptions significantly outperform retrieval baselines on both full images\nand on a new dataset of region-level annotations." - pub_date: { - seconds: 1417910400 - } - authors: "Andrej Karpathy" - authors: "Li Fei-Fei" - repositories: { - url: "https://github.com/IzabelaKrupinska/PROJBAD" - framework: FRAMEWORK_OTHERS - description: "Pliki do projektu badawczego." - } - repositories: { - url: "https://github.com/VinitSR7/Image-Caption-Generation" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 12 - description: "Image Captioning: Implementing the Neural Image Caption Generator" - } - } - video: { - video_id: "Q-Cm7nw85iE" - video_title: "PR-032: Deep Visual-Semantic Alignments for Generating Image Descriptions" - number_of_likes: 13 - number_of_views: 2031 - published_date: { - seconds: 1504445734 - } - uploader: "Jiyang Kang" - } - } -} -pr_id_to_video: { - key: 33 - value: { - papers: { - paper_id: "pvanet-lightweight-deep-neural-networks-for" - title: "PVANet: Lightweight Deep Neural Networks for Real-time Object Detection" - arxiv_id: "1611.08588" - abstract: "In object detection, reducing computational cost is as important as improving\naccuracy for most practical usages. This paper proposes a novel network\nstructure, which is an order of magnitude lighter than other state-of-the-art\nnetworks while maintaining the accuracy. Based on the basic principle of more\nlayers with less channels, this new deep neural network minimizes its\nredundancy by adopting recent innovations including C.ReLU and Inception\nstructure. We also show that this network can be trained efficiently to achieve\nsolid results on well-known object detection benchmarks: 84.9% and 84.2% mAP on\nVOC2007 and VOC2012 while the required compute is less than 10% of the recent\nResNet-101." - pub_date: { - seconds: 1479859200 - } - authors: "Sanghoon Hong" - authors: "Byungseok Roh" - authors: "Kye-Hyeon Kim" - authors: "Yeongjae Cheon" - authors: "Minje Park" - repositories: { - is_official: true - url: "https://github.com/sanghoon/pva-faster-rcnn" - framework: FRAMEWORK_OTHERS - number_of_stars: 656 - description: "Demo code for PVANet" - } - repositories: { - url: "https://github.com/busyboxs/Some-resources-useful-for-me" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - } - repositories: { - url: "https://github.com/jeffshih/autoTrain" - framework: FRAMEWORK_OTHERS - description: "training tool for faster rcnn" - } - repositories: { - url: "https://github.com/wuyx/pva-faster-rcnn" - framework: FRAMEWORK_OTHERS - } - } - video: { - video_id: "TYDGTnxUGHQ" - video_title: "PR-033: PVANet: Lightweight Deep Neural Networks for Real-time Object Detection" - number_of_likes: 25 - number_of_views: 3382 - published_date: { - seconds: 1504446966 - } - uploader: "JinWon Lee" - } - } -} -pr_id_to_video: { - key: 34 - value: { - papers: { - paper_id: "xception-deep-learning-with-depthwise" - title: "Xception: Deep Learning with Depthwise Separable Convolutions" - arxiv_id: "1610.02357" - abstract: "We present an interpretation of Inception modules in convolutional neural\nnetworks as being an intermediate step in-between regular convolution and the\ndepthwise separable convolution operation (a depthwise convolution followed by\na pointwise convolution). In this light, a depthwise separable convolution can\nbe understood as an Inception module with a maximally large number of towers.\nThis observation leads us to propose a novel deep convolutional neural network\narchitecture inspired by Inception, where Inception modules have been replaced\nwith depthwise separable convolutions. We show that this architecture, dubbed\nXception, slightly outperforms Inception V3 on the ImageNet dataset (which\nInception V3 was designed for), and significantly outperforms Inception V3 on a\nlarger image classification dataset comprising 350 million images and 17,000\nclasses. Since the Xception architecture has the same number of parameters as\nInception V3, the performance gains are not due to increased capacity but\nrather to a more efficient use of model parameters." - pub_date: { - seconds: 1475798400 - } - authors: "François Chollet" - repositories: { - url: "https://github.com/osmr/imgclsmob" - framework: FRAMEWORK_OTHERS - number_of_stars: 2200 - description: "Sandbox for training deep learning networks" - } - repositories: { - url: "https://github.com/tensorflow/models/tree/master/research/deeplab" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70334 - description: "Models and examples built with TensorFlow" - } - repositories: { - url: "https://github.com/rwightman/pytorch-image-models" - framework: FRAMEWORK_PYTORCH - number_of_stars: 11022 - description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" - } - repositories: { - url: "https://github.com/PaddlePaddle/PaddleClas" - framework: FRAMEWORK_OTHERS - number_of_stars: 1302 - description: "A treasure chest for visual recognition powered by PaddlePaddle" - } - repositories: { - url: "https://github.com/amogh7joshi/engagement-detection" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 6 - description: "Engagement Detection, including facial detection and emotion recognition, using CNNs/LSTMs." - } - repositories: { - url: "https://github.com/amogh7joshi/fer" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 6 - description: "Engagement Detection, including facial detection and emotion recognition, using CNNs/LSTMs." - } - repositories: { - url: "https://github.com/ced-kin/dog-breed-ai" - framework: FRAMEWORK_TENSORFLOW - description: "android application for classifying dog breeds" - } - repositories: { - url: "https://github.com/krishnakarthi/COVID-19_Prediction" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Diagnose the COVID-19 from patient’s chest X-ray's using Convolution Neural Networks (CNN) Deep Transfer Learning technique in Azure ML workspace" - } - repositories: { - url: "https://github.com/bluejurand/Photos-colorization" - framework: FRAMEWORK_TENSORFLOW - description: "Keras repository which colorize black-white images." - } - repositories: { - url: "https://github.com/zotrick/Pneumonia_classification_Xception" - framework: FRAMEWORK_TENSORFLOW - description: "This projects uses Xception CNN for pneumonia classification with competitive results." - } - methods: { - name: "Pointwise Convolution" - full_name: "Pointwise Convolution" - description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "Max Pooling" - full_name: "Max Pooling" - description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" - } - methods: { - name: "Weight Decay" - full_name: "Weight Decay" - description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - methods: { - name: "Dropout" - full_name: "Dropout" - description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." - } - methods: { - name: "Step Decay" - full_name: "Step Decay" - description: "**Step Decay** is a learning rate schedule that drops the learning rate by a factor every few epochs, where the number of epochs is a hyperparameter.\r\n\r\nImage Credit: [Suki Lau](https://towardsdatascience.com/learning-rate-schedules-and-adaptive-learning-rate-methods-for-deep-learning-2c8f433990d1)" - } - methods: { - name: "SGD with Momentum" - full_name: "SGD with Momentum" - description: "**SGD with Momentum** is a stochastic optimization method that adds a momentum term to regular stochastic gradient descent:\r\n\r\n$$v\\_{t} = \\gamma{v}\\_{t-1} + \\eta\\nabla\\_{\\theta}J\\left(\\theta\\right)$$\r\n$$\\theta\\_{t} = \\theta\\_{t-1} - v\\_{t} $$\r\n\r\nA typical value for $\\gamma$ is $0.9$. The momentum name comes from an analogy to physics, such as ball accelerating down a slope. In the case of weight updates, we can think of the weights as a particle traveling through parameter space which incurs acceleration from the gradient of the loss.\r\n\r\nImage Source: [Juan Du](https://www.researchgate.net/figure/The-compare-of-the-SGD-algorithms-with-and-without-momentum-Take-Task-1-as-example-The_fig1_333469047)" - } - } - video: { - video_id: "V0dLhyg5_Dw" - video_title: "PR-034: Inception and Xception" - number_of_likes: 79 - number_of_views: 10199 - published_date: { - seconds: 1505052461 - } - uploader: "Jaejun Yoo" - } - } -} -pr_id_to_video: { - key: 35 - value: { - papers: { - paper_id: "understanding-black-box-predictions-via" - title: "Understanding Black-box Predictions via Influence Functions" - arxiv_id: "1703.04730" - abstract: "How can we explain the predictions of a black-box model? In this paper, we use influence functions -- a classic technique from robust statistics -- to trace a model's prediction through the learning algorithm and back to its training data, thereby identifying training points most responsible for a given prediction. To scale up influence functions to modern machine learning settings, we develop a simple, efficient implementation that requires only oracle access to gradients and Hessian-vector products. We show that even on non-convex and non-differentiable models where the theory breaks down, approximations to influence functions can still provide valuable information. On linear models and convolutional neural networks, we demonstrate that influence functions are useful for multiple purposes: understanding model behavior, debugging models, detecting dataset errors, and even creating visually-indistinguishable training-set attacks." - pub_date: { - seconds: 1489449600 - } - authors: "Pang Wei Koh" - authors: "Percy Liang" - repositories: { - url: "https://github.com/4pygmalion/Federated_learning-filtering-non-influence-data" - framework: FRAMEWORK_TENSORFLOW - description: "Federated learning with influence function" - } - repositories: { - url: "https://github.com/nimarb/pytorch_influence_functions" - framework: FRAMEWORK_PYTORCH - number_of_stars: 121 - description: "This is a PyTorch reimplementation of Influence Functions from the ICML2017 best paper: Understanding Black-box Predictions via Influence Functions by Pang Wei Koh and Percy Liang." - } - repositories: { - url: "https://github.com/kohpangwei/influence-release" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 581 - } - repositories: { - is_official: true - url: "https://worksheets.codalab.org/worksheets/0x2b314dc3536b482dbba02783a24719fd" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/Timothy-Ye/example-based-explanation" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 - description: "CST Part II Project: Example Based Explanation in Machine Learning" - } - repositories: { - url: "https://github.com/Shmoo137/Interpretable-Phase-Classification" - framework: FRAMEWORK_PYTORCH - number_of_stars: 8 - description: "The repository accompanying the research paper \"Phase Detection with Neural Networks: Interpreting the Black Box\" by A. Dawid, P. Huembeli, M. Tomza, M. Lewenstein, and A. Dauphin" - } - repositories: { - url: "https://github.com/TooTouch/WhiteBox-Part2" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5 - description: "The White Box Project is a project that introduces many ways to solve the part of the black box of machine learning. This project is based on Interpretable Machine Learning by Christoph Molnar. I recommend you to read the book first and practice this project." - } - repositories: { - url: "https://github.com/bsharchilev/influence_boosting" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 54 - description: "Supporting code for the paper \"Finding Influential Training Samples for Gradient Boosted Decision Trees\"" - } - repositories: { - url: "https://github.com/ShinKyuY/Understanding-Black-box-Predictions-via-Influence-Functions-tutorial-MNIST-7-vs-1-Classification" - framework: FRAMEWORK_OTHERS - number_of_stars: 8 - description: "Tiny Tutorial on https://arxiv.org/abs/1703.04730" - } - repositories: { - url: "https://github.com/darkonhub/darkon" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 234 - description: "Toolkit to Hack Your Deep Learning Models" - } - } - video: { - video_id: "xlmlY8WHjkU" - video_title: "PR-035: Understanding Black-box Predictions via Influence Functions (2017)" - number_of_likes: 26 - number_of_views: 3394 - published_date: { - seconds: 1505051523 - } - uploader: "Terry TaeWoong Um" - } - } -} -pr_id_to_video: { - key: 36 - value: { - papers: { - paper_id: "learning-to-remember-rare-events" - title: "Learning to Remember Rare Events" - arxiv_id: "1703.03129" - abstract: "Despite recent advances, memory-augmented deep neural networks are still\nlimited when it comes to life-long and one-shot learning, especially in\nremembering rare events. We present a large-scale life-long memory module for\nuse in deep learning. The module exploits fast nearest-neighbor algorithms for\nefficiency and thus scales to large memory sizes. Except for the\nnearest-neighbor query, the module is fully differentiable and trained\nend-to-end with no extra supervision. It operates in a life-long manner, i.e.,\nwithout the need to reset it during training.\n Our memory module can be easily added to any part of a supervised neural\nnetwork. To show its versatility we add it to a number of networks, from simple\nconvolutional ones tested on image classification to deep sequence-to-sequence\nand recurrent-convolutional models. In all cases, the enhanced network gains\nthe ability to remember and do life-long one-shot learning. Our module\nremembers training examples shown many thousands of steps in the past and it\ncan successfully generalize from them. We set new state-of-the-art for one-shot\nlearning on the Omniglot dataset and demonstrate, for the first time, life-long\none-shot learning in recurrent neural networks on a large-scale machine\ntranslation task." - pub_date: { - seconds: 1489017600 - } - authors: "Łukasz Kaiser" - authors: "Ofir Nachum" - authors: "Aurko Roy" - authors: "Samy Bengio" - repositories: { - is_official: true - url: "https://github.com/tensorflow/models" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70333 - description: "Models and examples built with TensorFlow" - } - } - video: { - video_id: "S_fbBYbXypc" - video_title: "PR-036: Learning to Remember Rare Events" - number_of_likes: 7 - number_of_views: 1488 - published_date: { - seconds: 1505657142 - } - uploader: "Taegyun Jeon" - } - } -} -pr_id_to_video: { - key: 37 - value: { - papers: { - paper_id: "ask-me-anything-dynamic-memory-networks-for" - title: "Ask Me Anything: Dynamic Memory Networks for Natural Language Processing" - arxiv_id: "1506.07285" - abstract: "Most tasks in natural language processing can be cast into question answering\n(QA) problems over language input. We introduce the dynamic memory network\n(DMN), a neural network architecture which processes input sequences and\nquestions, forms episodic memories, and generates relevant answers. Questions\ntrigger an iterative attention process which allows the model to condition its\nattention on the inputs and the result of previous iterations. These results\nare then reasoned over in a hierarchical recurrent sequence model to generate\nanswers. The DMN can be trained end-to-end and obtains state-of-the-art results\non several types of tasks and datasets: question answering (Facebook's bAbI\ndataset), text classification for sentiment analysis (Stanford Sentiment\nTreebank) and sequence modeling for part-of-speech tagging (WSJ-PTB). The\ntraining for these different tasks relies exclusively on trained word vector\nrepresentations and input-question-answer triplets." - pub_date: { - seconds: 1435104000 - } - authors: "Ankit Kumar" - authors: "Ozan Irsoy" - authors: "Peter Ondruska" - authors: "Mohit Iyyer" - authors: "James Bradbury" - authors: "Ishaan Gulrajani" - authors: "Victor Zhong" - authors: "Romain Paulus" - authors: "Richard Socher" - repositories: { - url: "https://github.com/DongjunLee/dmn-tensorflow" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 41 - description: "TensorFlow implementation of 'Ask Me Anything: Dynamic Memory Networks for Natural Language Processing (2015)'" - } - repositories: { - url: "https://github.com/scakc/QAwiki" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Question Answering models that can get an answer from Wikipedia pages and select a sentence subset as a reply to your question." - } - repositories: { - url: "https://github.com/navodhya/DMN" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/macco3k/deepstories" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/radiodee1/awesome-chatbot" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 13 - description: "work in progress - python Keras, Tensorflow, or Pytorch implementation of a chatbot or possibly smart-speaker" - } - repositories: { - url: "https://github.com/rgsachin/DMTN" - framework: FRAMEWORK_OTHERS - number_of_stars: 13 - } - repositories: { - url: "https://github.com/Asteur/someChatbot" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/jxz542189/dmn_plus" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Dynamic memory network tensorflow tf.data tf.estimator" - } - repositories: { - url: "https://github.com/ajenningsfrankston/Dynamic-Memory-Network-Plus-master" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/vchudinov/dynamic_memory_networks_with_keras" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5 - description: "Keras implementation of the dynamic memory networks from https://arxiv.org/pdf/1603.01417.pdf" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "GRU" - full_name: "Gated Recurrent Unit" - description: "A **Gated Recurrent Unit**, or **GRU**, is a type of recurrent neural network. It is similar to an [LSTM](https://paperswithcode.com/method/lstm), but only has two gates - a reset gate and an update gate - and notably lacks an output gate. Fewer parameters means GRUs are generally easier/faster to train than their LSTM counterparts.\r\n\r\nImage Source: [here](https://www.google.com/url?sa=i&url=https%3A%2F%2Fcommons.wikimedia.org%2Fwiki%2FFile%3AGated_Recurrent_Unit%2C_type_1.svg&psig=AOvVaw3EmNX8QXC5hvyxeenmJIUn&ust=1590332062671000&source=images&cd=vfe&ved=0CA0QjhxqFwoTCMiev9-eyukCFQAAAAAdAAAAABAR)" - } - methods: { - name: "Dynamic Memory Network" - full_name: "Dynamic Memory Network" - description: "A **Dynamic Memory Network** is a neural network architecture which processes input sequences and questions, forms episodic memories, and generates relevant answers. Questions trigger an iterative attention process which allows the model to condition its attention on the inputs and the result of previous iterations. These results are then reasoned over in a hierarchical recurrent sequence model to generate answers. \r\n\r\nThe DMN consists of a number of modules:\r\n\r\n- Input Module: The input module encodes raw text inputs from the task into distributed vector representations. The input takes forms like a sentence, a long story, a movie review and so on.\r\n- Question Module: The question module encodes the question of the task into a distributed\r\nvector representation. For question answering, the question may be a sentence such as \"Where did the author first fly?\". The representation is fed into the episodic memory module, and forms the basis, or initial state, upon which the episodic memory module iterates.\r\n- Episodic Memory Module: Given a collection of input representations, the episodic memory module chooses which parts of the inputs to focus on through the attention mechanism. It then produces a ”memory” vector representation taking into account the question as well as the previous memory. Each iteration provides the module with newly relevant information about the input. In other words,\r\nthe module has the ability to retrieve new information, in the form of input representations, which were thought to be irrelevant in previous iterations.\r\n- Answer Module: The answer module generates an answer from the final memory vector of the memory module." - } - } - video: { - video_id: "oxSrjuspQEs" - video_title: "PR-037: Ask me anything: Dynamic memory networks for natural language processing" - number_of_likes: 24 - number_of_views: 2364 - published_date: { - seconds: 1505654553 - } - uploader: "keun bong Kwak" - } - } -} -pr_id_to_video: { - key: 38 - value: { - papers: { - paper_id: "explaining-and-harnessing-adversarial" - title: "Explaining and Harnessing Adversarial Examples" - arxiv_id: "1412.6572" - abstract: "Several machine learning models, including neural networks, consistently\nmisclassify adversarial examples---inputs formed by applying small but\nintentionally worst-case perturbations to examples from the dataset, such that\nthe perturbed input results in the model outputting an incorrect answer with\nhigh confidence. Early attempts at explaining this phenomenon focused on\nnonlinearity and overfitting. We argue instead that the primary cause of neural\nnetworks' vulnerability to adversarial perturbation is their linear nature.\nThis explanation is supported by new quantitative results while giving the\nfirst explanation of the most intriguing fact about them: their generalization\nacross architectures and training sets. Moreover, this view yields a simple and\nfast method of generating adversarial examples. Using this approach to provide\nexamples for adversarial training, we reduce the test set error of a maxout\nnetwork on the MNIST dataset." - pub_date: { - seconds: 1419033600 - } - authors: "Ian J. Goodfellow" - authors: "Jonathon Shlens" - authors: "Christian Szegedy" - repositories: { - url: "https://github.com/anirudh9784/Adversarial-Defense" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - } - repositories: { - url: "https://github.com/anirudh9784/Major_Project" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - } - repositories: { - url: "https://github.com/openai/cleverhans" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5154 - description: "An adversarial example library for constructing attacks, building defenses, and benchmarking both" - } - repositories: { - url: "https://github.com/cleverhans-lab/cleverhans" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5154 - description: "An adversarial example library for constructing attacks, building defenses, and benchmarking both" - } - repositories: { - url: "https://github.com/dunky11/adversarial-frontier-stitching" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 6 - description: "Implementation of \"Adversarial Frontier Stitching for Remote Neural Network Watermarking\" in TensorFlow." - } - repositories: { - url: "https://github.com/Jupetus/ExplainableAI" - framework: FRAMEWORK_PYTORCH - description: "Collection of ways to explain NN outputs" - } - repositories: { - url: "https://github.com/pwj1996/mycleverhans" - framework: FRAMEWORK_TENSORFLOW - description: "修改的cleverhans框架" - } - repositories: { - url: "https://github.com/SifatMd/Research-Papers" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/axelbrando/Mixture-Density-Networks-for-distribution-and-uncertainty-estimation" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 260 - description: "A generic Mixture Density Networks (MDN) implementation for distribution and uncertainty estimation by using Keras (TensorFlow)" - } - repositories: { - url: "https://github.com/winycg/HCGNet" - framework: FRAMEWORK_PYTORCH - number_of_stars: 25 - description: "[AAAI-2020] Official implementations of HCGNets: Gated Convolutional Networks with Hybrid Connectivity for Image Classification" - } - } - video: { - video_id: "7hRO2bS810M" - video_title: "PR-038: Explaining and Harnessing Adversarial Examples" - number_of_likes: 7 - number_of_views: 1540 - published_date: { - seconds: 1507170279 - } - uploader: "Suh Kiho" - } - } -} -pr_id_to_video: { - key: 39 - value: { - papers: { - paper_id: "dropout-as-a-bayesian-approximation" - title: "Dropout as a Bayesian Approximation: Representing Model Uncertainty in Deep Learning" - arxiv_id: "1506.02142" - abstract: "Deep learning tools have gained tremendous attention in applied machine\nlearning. However such tools for regression and classification do not capture\nmodel uncertainty. In comparison, Bayesian models offer a mathematically\ngrounded framework to reason about model uncertainty, but usually come with a\nprohibitive computational cost. In this paper we develop a new theoretical\nframework casting dropout training in deep neural networks (NNs) as approximate\nBayesian inference in deep Gaussian processes. A direct result of this theory\ngives us tools to model uncertainty with dropout NNs -- extracting information\nfrom existing models that has been thrown away so far. This mitigates the\nproblem of representing uncertainty in deep learning without sacrificing either\ncomputational complexity or test accuracy. We perform an extensive study of the\nproperties of dropout's uncertainty. Various network architectures and\nnon-linearities are assessed on tasks of regression and classification, using\nMNIST as an example. We show a considerable improvement in predictive\nlog-likelihood and RMSE compared to existing state-of-the-art methods, and\nfinish by using dropout's uncertainty in deep reinforcement learning." - pub_date: { - seconds: 1433548800 - } - authors: "Yarin Gal" - authors: "Zoubin Ghahramani" - repositories: { - url: "https://github.com/cdebeunne/uncertainties_CNN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "A repo for toy examples to test uncertainties estimation of neural networks" - } - repositories: { - url: "https://github.com/asharakeh/probdet" - framework: FRAMEWORK_PYTORCH - number_of_stars: 29 - description: "Code for \"Estimating and Evaluating Regression Predictive Uncertainty in Deep Object Detectors.\" (ICLR 2021)" - } - repositories: { - url: "https://github.com/erickgalinkin/dropout_privacy" - framework: FRAMEWORK_TENSORFLOW - description: "Project repository for Drexel CS590 " - } - repositories: { - url: "https://github.com/MayarLotfy/bayesianNN" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/arodriguezca/uncertainty-ts-forecasting" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/aredier/monte_carlo_dropout" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "using monte carlo dropout to have uncertainty estimation of predictions" - } - repositories: { - url: "https://github.com/agnesdeng/misle" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Multiple imputation through statistical learning" - } - repositories: { - url: "https://github.com/gtegner/hyper-gan" - framework: FRAMEWORK_PYTORCH - description: "Uncertainty Estimation with HyperGANS in PyTorch!" - } - repositories: { - url: "https://github.com/marcovirgolin/UncertaintyEstimationInDeepNets" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "Attempt to reproduce the toy experiment of http://bit.ly/2C9Z8St with an ensemble of nets and with dropout." - } - repositories: { - url: "https://github.com/jelleman8/TractSeg" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - methods: { - name: "Monte Carlo Dropout" - full_name: "Monte Carlo Dropout" - } - methods: { - name: "Dropout" - full_name: "Dropout" - description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." - } - } - video: { - video_id: "aU91bDGmy7I" - video_title: "PR-039: Dropout as a Bayesian approximation" - number_of_likes: 56 - number_of_views: 5082 - published_date: { - seconds: 1508076910 - } - uploader: "차준범" - } - } -} -pr_id_to_video: { - key: 40 - value: { - papers: { - paper_id: "wavenet-a-generative-model-for-raw-audio" - title: "WaveNet: A Generative Model for Raw Audio" - arxiv_id: "1609.03499" - abstract: "This paper introduces WaveNet, a deep neural network for generating raw audio\nwaveforms. The model is fully probabilistic and autoregressive, with the\npredictive distribution for each audio sample conditioned on all previous ones;\nnonetheless we show that it can be efficiently trained on data with tens of\nthousands of samples per second of audio. When applied to text-to-speech, it\nyields state-of-the-art performance, with human listeners rating it as\nsignificantly more natural sounding than the best parametric and concatenative\nsystems for both English and Mandarin. A single WaveNet can capture the\ncharacteristics of many different speakers with equal fidelity, and can switch\nbetween them by conditioning on the speaker identity. When trained to model\nmusic, we find that it generates novel and often highly realistic musical\nfragments. We also show that it can be employed as a discriminative model,\nreturning promising results for phoneme recognition." - pub_date: { - seconds: 1473638400 - } - authors: "Aaron van den Oord" - authors: "Sander Dieleman" - authors: "Heiga Zen" - authors: "Karen Simonyan" - authors: "Oriol Vinyals" - authors: "Alex Graves" - authors: "Nal Kalchbrenner" - authors: "Andrew Senior" - authors: "Koray Kavukcuoglu" - repositories: { - url: "https://github.com/pbrandl/aNN_Audio" - framework: FRAMEWORK_PYTORCH - description: "Digital twin of analog audio distortion devices (WavNet based)." - } - repositories: { - url: "https://github.com/ibab/tensorflow-wavenet" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5127 - description: "A TensorFlow implementation of DeepMind's WaveNet paper" - } - repositories: { - url: "https://github.com/otosense/slang" - framework: FRAMEWORK_OTHERS - number_of_stars: 3 - description: "A light weight version of Slang: Tools to build a language of sound." - } - repositories: { - url: "https://github.com/isadrtdinov/wavenet" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "WaveNet vocoder implementation for speech synthesis task" - } - repositories: { - url: "https://github.com/AI-Huang/WaveNet" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "Keras and PyTorch implementations for Google's WaveNet" - } - repositories: { - url: "https://github.com/stdereka/liverpool-ion-switching" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 13 - description: "Liverpool Ion Switching kaggle competition 2nd place winning solution" - } - repositories: { - url: "https://github.com/pascalbakker/WaveNet-Implementation" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Tensorflow implementation of Wavenet architecture " - } - repositories: { - url: "https://github.com/randomrandom/deep-atrous-cnn-sentiment" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 65 - description: "Deep-Atrous-CNN-Text-Network: End-to-end word level model for sentiment analysis and other text classifications" - } - repositories: { - url: "https://github.com/sriharireddypusapati/speech-to-text-wavenet2" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/thorwhalen/slang" - framework: FRAMEWORK_OTHERS - number_of_stars: 3 - description: "A light weight version of Slang: Tools to build a language of sound." - } - methods: { - name: "WaveNet" - full_name: "WaveNet" - description: "**WaveNet** is an audio generative model based on the [PixelCNN](https://paperswithcode.com/method/pixelcnn) architecture. In order to deal with long-range temporal dependencies needed for raw audio generation, architectures are developed based on dilated causal convolutions, which exhibit very large receptive fields.\r\n\r\nThe joint probability of a waveform $\\vec{x} = \\{ x_1, \\dots, x_T \\}$ is factorised as a product of conditional probabilities as follows:\r\n\r\n$$p\\left(\\vec{x}\\right) = \\prod_{t=1}^{T} p\\left(x_t \\mid x_1, \\dots ,x_{t-1}\\right)$$\r\n\r\nEach audio sample $x_t$ is therefore conditioned on the samples at all previous timesteps." - } - methods: { - name: "Dilated Causal Convolution" - full_name: "Dilated Causal Convolution" - description: "A **Dilated Causal Convolution** is a causal convolution where the filter is applied over an area larger than its length by skipping input values with a certain step. A dilated causal convolution effectively allows the network to have very large receptive fields with just a few layers." - } - methods: { - name: "Causal Convolution" - full_name: "Causal Convolution" - description: "**Causal convolutions** are a type of convolution used for temporal data which ensures the model cannot violate the ordering in which we model the data: the prediction $p(x_{t+1} | x_{1}, \\ldots, x_{t})$ emitted by the model at timestep $t$ cannot depend on any of the future timesteps $x_{t+1}, x_{t+2}, \\ldots, x_{T}$. For images, the equivalent of a causal convolution is a masked convolution which can be implemented by constructing a mask tensor and doing an element-wise multiplication of this mask with the convolution kernel before applying it. For 1-D data such as audio one can more easily implement this by shifting the output of a normal convolution by a few timesteps." - } - methods: { - name: "Mixture of Logistic Distributions" - full_name: "Mixture of Logistic Distributions" - description: "**Mixture of Logistic Distributions (MoL)** is a type of output function, and an alternative to a [softmax](https://paperswithcode.com/method/softmax) layer. Discretized logistic mixture likelihood is used in PixelCNN++ and [WaveNet](https://paperswithcode.com/method/wavenet) to predict discrete values.\r\n\r\nImage Credit: [Hao Gao](https://medium.com/@smallfishbigsea/an-explanation-of-discretized-logistic-mixture-likelihood-bdfe531751f0)" - } - } - video: { - video_id: "GyQnex_DK2k" - video_title: "PR-040: WaveNet - A Generative Model for Raw Audio" - number_of_likes: 63 - number_of_views: 7141 - published_date: { - seconds: 1508077701 - } - uploader: "Seungil Kim" - } - } -} -pr_id_to_video: { - key: 41 - value: { - papers: { - paper_id: "show-and-tell-a-neural-image-caption" - title: "Show and Tell: A Neural Image Caption Generator" - arxiv_id: "1411.4555" - abstract: "Automatically describing the content of an image is a fundamental problem in\nartificial intelligence that connects computer vision and natural language\nprocessing. In this paper, we present a generative model based on a deep\nrecurrent architecture that combines recent advances in computer vision and\nmachine translation and that can be used to generate natural sentences\ndescribing an image. The model is trained to maximize the likelihood of the\ntarget description sentence given the training image. Experiments on several\ndatasets show the accuracy of the model and the fluency of the language it\nlearns solely from image descriptions. Our model is often quite accurate, which\nwe verify both qualitatively and quantitatively. For instance, while the\ncurrent state-of-the-art BLEU-1 score (the higher the better) on the Pascal\ndataset is 25, our approach yields 59, to be compared to human performance\naround 69. We also show BLEU-1 score improvements on Flickr30k, from 56 to 66,\nand on SBU, from 19 to 28. Lastly, on the newly released COCO dataset, we\nachieve a BLEU-4 of 27.7, which is the current state-of-the-art." - pub_date: { - seconds: 1416182400 - } - authors: "Oriol Vinyals" - authors: "Alexander Toshev" - authors: "Samy Bengio" - authors: "Dumitru Erhan" - repositories: { - url: "https://github.com/supreethub/Image-Captioning" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "A complete pipeline of Convolutional Neural Networks (CNN) and Recurrent Neural Networks (RNN) knowledge to build a deep learning model that produces captions given an input image." - } - repositories: { - url: "https://github.com/jelifysh/Image-Captioning" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 16 - description: "Implemented 3 different architectures to tackle the Image Caption problem, i.e, Merged Encoder-Decoder - Bahdanau Attention - Transformers" - } - repositories: { - url: "https://github.com/juletx/image-caption-generation" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Automatic Image Caption Generation model that uses a CNN to condition a LSTM based language model" - } - repositories: { - url: "https://github.com/Djmcflush/Quantum-Hackathon" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/sd2001/Image2Caption" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 8 - description: "🎥Image2Caption🔤: Upload an image and let the model generate a caption for you🤖." - } - repositories: { - url: "https://github.com/sd2001/Auto-Image2Caption" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 8 - description: "🎥Image2Caption🔤: Upload an image and let the model generate a caption for you🤖." - } - repositories: { - url: "https://github.com/Tamim-MR14/Image_Caption_Generator" - framework: FRAMEWORK_PYTORCH - description: "Project Done as a part of requirements of Graduation of Udacity computer Vision Nanodegree" - } - repositories: { - url: "https://github.com/simnyatsanga/image-caption-generator" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Image Caption Generators in TensorFlow and Keras" - } - repositories: { - url: "https://github.com/neerav47/Image-Captioning" - framework: FRAMEWORK_PYTORCH - number_of_stars: 7 - description: "Complete pipeline to predict captions for a given image." - } - repositories: { - url: "https://github.com/atharv6/Image-Captioning" - framework: FRAMEWORK_PYTORCH - description: "Generating Captions from Images" - } - } - video: { - video_id: "BrmCnoYhQb4" - video_title: "PR-041: Show and Tell: A Neural Image Caption Generator" - number_of_likes: 26 - number_of_views: 4493 - published_date: { - seconds: 1508678893 - } - uploader: "Jiyang Kang" - } - } -} -pr_id_to_video: { - key: 42 - value: { - papers: { - paper_id: "adam-a-method-for-stochastic-optimization" - title: "Adam: A Method for Stochastic Optimization" - arxiv_id: "1412.6980" - abstract: "We introduce Adam, an algorithm for first-order gradient-based optimization\nof stochastic objective functions, based on adaptive estimates of lower-order\nmoments. The method is straightforward to implement, is computationally\nefficient, has little memory requirements, is invariant to diagonal rescaling\nof the gradients, and is well suited for problems that are large in terms of\ndata and/or parameters. The method is also appropriate for non-stationary\nobjectives and problems with very noisy and/or sparse gradients. The\nhyper-parameters have intuitive interpretations and typically require little\ntuning. Some connections to related algorithms, on which Adam was inspired, are\ndiscussed. We also analyze the theoretical convergence properties of the\nalgorithm and provide a regret bound on the convergence rate that is comparable\nto the best known results under the online convex optimization framework.\nEmpirical results demonstrate that Adam works well in practice and compares\nfavorably to other stochastic optimization methods. Finally, we discuss AdaMax,\na variant of Adam based on the infinity norm." - pub_date: { - seconds: 1419206400 - } - authors: "Diederik P. Kingma" - authors: "Jimmy Ba" - repositories: { - url: "https://github.com/vanyle/vlearn" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "A machine learning framework written in C++ designed for distributed computing " - } - repositories: { - url: "https://github.com/joseluis1061/neuralnilm" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Neural nilm python 3.3" - } - repositories: { - url: "https://github.com/chuiyunjun/projectCSC413" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/nnaisense/pgpelib" - framework: FRAMEWORK_PYTORCH - number_of_stars: 22 - description: "A mini library for Policy Gradients with Parameter-based Exploration, with reference implementation of the ClipUp optimizer (https://arxiv.org/abs/2008.02387) from NNAISENSE." - } - repositories: { - url: "https://github.com/lab-ml/nn/tree/master/labml_nn/optimizers" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3053 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." - } - repositories: { - url: "https://github.com/offscale/cdd-python" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Open API to/fro routes, models, and tests. Convert between docstrings, classes, methods, argparse, and SQLalchemy." - } - repositories: { - url: "https://github.com/SamuelMarks/doctrans" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Open API to/fro routes, models, and tests. Convert between docstrings, classes, methods, argparse, and SQLalchemy." - } - repositories: { - url: "https://github.com/safakkbilici/Academic-Paper-Title-Recommendation" - framework: FRAMEWORK_OTHERS - number_of_stars: 12 - description: "Supervised text summarization (title generation/recommendation) based on academic paper abstracts, with Seq2Seq LSTM and the power of Transfer Learning and T5." - } - repositories: { - url: "https://github.com/JaneliaSciComp/SongExplorer" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "deep learning for acoustic signals" - } - repositories: { - url: "https://github.com/zhuchen03/maxva" - framework: FRAMEWORK_PYTORCH - } - methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." - } - methods: { - name: "AdaMax" - full_name: "AdaMax" - description: "**AdaMax** is a generalisation of [Adam](https://paperswithcode.com/method/adam) from the $l\\_{2}$ norm to the $l\\_{\\infty}$ norm. Define:\r\n\r\n$$ u\\_{t} = \\beta^{\\infty}\\_{2}v\\_{t-1} + \\left(1-\\beta^{\\infty}\\_{2}\\right)|g\\_{t}|^{\\infty}$$\r\n\r\n$$ = \\max\\left(\\beta\\_{2}\\cdot{v}\\_{t-1}, |g\\_{t}|\\right)$$\r\n\r\nWe can plug into the Adam update equation by replacing $\\sqrt{\\hat{v}_{t} + \\epsilon}$ with $u\\_{t}$ to obtain the AdaMax update rule:\r\n\r\n$$ \\theta\\_{t+1} = \\theta\\_{t} - \\frac{\\eta}{u\\_{t}}\\hat{m}\\_{t} $$\r\n\r\nCommon default values are $\\eta = 0.002$ and $\\beta\\_{1}=0.9$ and $\\beta\\_{2}=0.999$." - } - } - video: { - video_id: "KN120w3PZIA" - video_title: "PR-042: Adam: A Method for Stochastic Optimization" - number_of_likes: 39 - number_of_views: 4165 - published_date: { - seconds: 1508682336 - } - uploader: "Ji-Hoon Kim" - } - } -} -pr_id_to_video: { - key: 43 - value: { - papers: { - paper_id: "hypernetworks" - title: "HyperNetworks" - arxiv_id: "1609.09106" - abstract: "This work explores hypernetworks: an approach of using a one network, also\nknown as a hypernetwork, to generate the weights for another network.\nHypernetworks provide an abstraction that is similar to what is found in\nnature: the relationship between a genotype - the hypernetwork - and a\nphenotype - the main network. Though they are also reminiscent of HyperNEAT in\nevolution, our hypernetworks are trained end-to-end with backpropagation and\nthus are usually faster. The focus of this work is to make hypernetworks useful\nfor deep convolutional networks and long recurrent networks, where\nhypernetworks can be viewed as relaxed form of weight-sharing across layers.\nOur main result is that hypernetworks can generate non-shared weights for LSTM\nand achieve near state-of-the-art results on a variety of sequence modelling\ntasks including character-level language modelling, handwriting generation and\nneural machine translation, challenging the weight-sharing paradigm for\nrecurrent networks. Our results also show that hypernetworks applied to\nconvolutional networks still achieve respectable results for image recognition\ntasks compared to state-of-the-art baseline models while requiring fewer\nlearnable parameters." - pub_date: { - seconds: 1474934400 - } - authors: "David Ha" - authors: "Andrew Dai" - authors: "Quoc V. Le" - repositories: { - is_official: true - url: "https://github.com/hardmaru/supercell" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 182 - description: "supercell" - } - repositories: { - url: "https://github.com/lab-ml/nn" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3055 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." - } - repositories: { - url: "https://github.com/gtegner/hyper-gan" - framework: FRAMEWORK_PYTORCH - description: "Uncertainty Estimation with HyperGANS in PyTorch!" - } - repositories: { - url: "https://github.com/gahaalt/continual-learning-with-hypernets" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - } - repositories: { - url: "https://github.com/g1910/HyperNetworks" - framework: FRAMEWORK_PYTORCH - number_of_stars: 113 - description: "PyTorch implementation of HyperNetworks (Ha et al., ICLR 2017) for ResNet (Residual Networks)" - } - repositories: { - url: "https://github.com/gahaalt/continual-learning-overview" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - } - methods: { - name: "HyperNetwork" - full_name: "HyperNetwork" - description: "A **HyperNetwork** is a network that generates a network for a main network. The behavior of the main network is the same with any usual neural network: it learns to map some raw inputs to their desired targets; whereas the hypernetwork takes a set of inputs that contain information about the structure of the weights and generates the weight for that layer." - } - } - video: { - video_id: "-tUQXSdEsMk" - video_title: "PR-043: HyperNetworks" - number_of_likes: 13 - number_of_views: 1681 - published_date: { - seconds: 1509287449 - } - uploader: "taesu" - } - } -} -pr_id_to_video: { - key: 44 - value: { - papers: { - paper_id: "mobilenets-efficient-convolutional-neural" - title: "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" - arxiv_id: "1704.04861" - abstract: "We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization." - pub_date: { - seconds: 1492387200 - } - authors: "Andrew G. Howard" - authors: "Menglong Zhu" - authors: "Bo Chen" - authors: "Dmitry Kalenichenko" - authors: "Weijun Wang" - authors: "Tobias Weyand" - authors: "Marco Andreetto" - authors: "Hartwig Adam" - repositories: { - url: "https://github.com/prasadji/Flower-Classifaction-with-Fine-Tuned-Mobilenet" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - } - repositories: { - url: "https://github.com/osmr/imgclsmob" - framework: FRAMEWORK_OTHERS - number_of_stars: 2200 - description: "Sandbox for training deep learning networks" - } - repositories: { - url: "https://github.com/akrapukhin/MobileNetV3" - framework: FRAMEWORK_PYTORCH - description: "An implementation of the MobileNetV3 models in Pytorch with scripts for training, testing and measuring latency." - } - repositories: { - url: "https://github.com/rsreetech/MultiModalSearch" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "In this repository I demonstrate how you can perform multimodal(image+text) search to find similar images+texts given a test image+text from a multimodal (texts+images) database . I use the Kaggle Shopee dataset. I use Tensorflow MobileNet CNN and hugging face sentence transformers BERT to extract image and text embeddings to create a joint embedding search space. Given an image and it text description I extract joint embedding and then use nearest neighbours algorithm to find top 5 similar images+texts description from my joint embedding search space" - } - repositories: { - url: "https://github.com/Video-Streaming-Pipeline/Video-Streaming-Pipeline" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "실시간 이미지 처리 모델을 위한 모바일, 클라우드 영상 전송 파이프라인 개발" - } - repositories: { - url: "https://github.com/SalvadorAlbarran/TFG2020" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Aceleración de AI en dispositivos de bajo consumo" - } - repositories: { - url: "https://github.com/lpirola13/flower-recognizer" - framework: FRAMEWORK_TENSORFLOW - description: "This project aims to create a deep learning model suitable in a mobile context that can recognize flowers from images." - } - repositories: { - url: "https://github.com/Rishit-dagli/Greenathon-Plant-AI" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 23 - description: "Identify Diseases in Plants☘️ with Machine Learning on the web using TFJS" - } - repositories: { - url: "https://github.com/tensorflow/models/tree/master/research/slim" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70334 - description: "Models and examples built with TensorFlow" - } - repositories: { - url: "https://github.com/jaykshirsagar05/CrowdCounting" - framework: FRAMEWORK_OTHERS - } - methods: { - name: "Average Pooling" - full_name: "Average Pooling" - description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "Random Horizontal Flip" - full_name: "Random Horizontal Flip" - description: "**RandomHorizontalFlip** is a type of image data augmentation which horizontally flips a given image with a given probability.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" - } - methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." - } - methods: { - name: "MobileNetV1" - full_name: "MobileNetV1" - description: "**MobileNet** is a type of convolutional neural network designed for mobile and embedded vision applications. They are based on a streamlined architecture that uses depthwise separable convolutions to build lightweight deep neural networks that can have low latency for mobile and embedded devices." - } - methods: { - name: "Depthwise Separable Convolution" - full_name: "Depthwise Separable Convolution" - description: "While [standard convolution](https://paperswithcode.com/method/convolution) performs the channelwise and spatial-wise computation in one step, **Depthwise Separable Convolution** splits the computation into two steps: depthwise convolution applies a single convolutional filter per each input channel and pointwise convolution is used to create a linear combination of the output of the depthwise convolution. The comparison of standard convolution and depthwise separable convolution is shown to the right.\r\n\r\nCredit: [Depthwise Convolution Is All You Need for Learning Multiple Visual Domains](https://paperswithcode.com/paper/depthwise-convolution-is-all-you-need-for)" - } - methods: { - name: "Depthwise Convolution" - full_name: "Depthwise Convolution" - description: "**Depthwise Convolution** is a type of convolution where we apply a single convolutional filter for each input channel. In the regular 2D [convolution](https://paperswithcode.com/method/convolution) performed over multiple input channels, the filter is as deep as the input and lets us freely mix channels to generate each element in the output. In contrast, depthwise convolutions keep each channel separate. To summarize the steps, we:\r\n\r\n1. Split the input and filter into channels.\r\n2. We convolve each input with the respective filter.\r\n3. We stack the convolved outputs together.\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" - } - methods: { - name: "Pointwise Convolution" - full_name: "Pointwise Convolution" - description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - } - video: { - video_id: "7UoOFKcyIvM" - video_title: "PR-044: MobileNet" - number_of_likes: 140 - number_of_views: 14766 - published_date: { - seconds: 1509456696 - } - uploader: "JinWon Lee" - } - } -} -pr_id_to_video: { - key: 45 - value: { - papers: { - paper_id: "deeplab-semantic-image-segmentation-with-deep" - title: "DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs" - arxiv_id: "1606.00915" - abstract: "In this work we address the task of semantic image segmentation with Deep\nLearning and make three main contributions that are experimentally shown to\nhave substantial practical merit. First, we highlight convolution with\nupsampled filters, or 'atrous convolution', as a powerful tool in dense\nprediction tasks. Atrous convolution allows us to explicitly control the\nresolution at which feature responses are computed within Deep Convolutional\nNeural Networks. It also allows us to effectively enlarge the field of view of\nfilters to incorporate larger context without increasing the number of\nparameters or the amount of computation. Second, we propose atrous spatial\npyramid pooling (ASPP) to robustly segment objects at multiple scales. ASPP\nprobes an incoming convolutional feature layer with filters at multiple\nsampling rates and effective fields-of-views, thus capturing objects as well as\nimage context at multiple scales. Third, we improve the localization of object\nboundaries by combining methods from DCNNs and probabilistic graphical models.\nThe commonly deployed combination of max-pooling and downsampling in DCNNs\nachieves invariance but has a toll on localization accuracy. We overcome this\nby combining the responses at the final DCNN layer with a fully connected\nConditional Random Field (CRF), which is shown both qualitatively and\nquantitatively to improve localization performance. Our proposed \"DeepLab\"\nsystem sets the new state-of-art at the PASCAL VOC-2012 semantic image\nsegmentation task, reaching 79.7% mIOU in the test set, and advances the\nresults on three other datasets: PASCAL-Context, PASCAL-Person-Part, and\nCityscapes. All of our code is made publicly available online." - pub_date: { - seconds: 1464825600 - } - authors: "Liang-Chieh Chen" - authors: "George Papandreou" - authors: "Iasonas Kokkinos" - authors: "Kevin Murphy" - authors: "Alan L. Yuille" - repositories: { - url: "https://github.com/johnnylu305/Simple-does-it-weakly-supervised-instance-and-semantic-segmentation" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 43 - description: "Weakly Supervised Segmentation by Tensorflow. Implements semantic segmentation in Simple Does It: Weakly Supervised Instance and Semantic Segmentation, by Khoreva et al. (CVPR 2017)." - } - repositories: { - url: "https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1/configs/deeplabv3" - framework: FRAMEWORK_OTHERS - number_of_stars: 1477 - description: "End-to-end image segmentation kit based on PaddlePaddle. " - } - repositories: { - url: "https://github.com/tensorflow/models/tree/master/research/deeplab" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70334 - description: "Models and examples built with TensorFlow" - } - repositories: { - url: "https://github.com/NASA-NeMO-Net/NeMO-Net" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - } - repositories: { - url: "https://github.com/CompVis/taming-transformers" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1435 - } - repositories: { - url: "https://github.com/leimao/DeepLab-V3" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 78 - description: "Google DeepLab V3 for Image Semantic Segmentation" - } - repositories: { - url: "https://github.com/kuangbixia/DeepLab" - framework: FRAMEWORK_PYTORCH - description: "Backup the source codes I learned and modified." - } - repositories: { - url: "https://github.com/Media-Smart/vedaseg" - framework: FRAMEWORK_PYTORCH - number_of_stars: 382 - description: "A semantic segmentation toolbox based on PyTorch" - } - repositories: { - url: "https://github.com/Popcorn-sugar/Deep_v2" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/Qengineering/TensorFlow_Lite_Segmentation_RPi_32-bit" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "TensorFlow Lite segmentation on Raspberry Pi 4 aka Unet at 4.2 FPS" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Residual Block" - full_name: "Residual Block" - description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "Max Pooling" - full_name: "Max Pooling" - description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" - } - methods: { - name: "Weight Decay" - full_name: "Weight Decay" - description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - methods: { - name: "Spatial Pyramid Pooling" - full_name: "Spatial Pyramid Pooling" - description: "** Spatial Pyramid Pooling (SPP)** is a pooling layer that removes the fixed-size constraint of the network, i.e. a CNN does not require a fixed-size input image. Specifically, we add an SPP layer on top of the last convolutional layer. The SPP layer pools the features and generates fixed-length outputs, which are then fed into the fully-connected layers (or other classifiers). In other words, we perform some information aggregation at a deeper stage of the network hierarchy (between convolutional layers and fully-connected layers) to avoid the need for cropping or warping at the beginning." - } - methods: { - name: "Kaiming Initialization" - full_name: "Kaiming Initialization" - description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." - } - methods: { - name: "Dropout" - full_name: "Dropout" - description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." - } - } - video: { - video_id: "JiC78rUF4iI" - video_title: "PR-045: DeepLab: Semantic Image Segmentation" - number_of_likes: 93 - number_of_views: 8710 - published_date: { - seconds: 1509896571 - } - uploader: "Taeoh Kim" - } - } -} -pr_id_to_video: { - key: 46 - value: { - papers: { - paper_id: "deep-knowledge-tracing" - title: "Deep Knowledge Tracing" - arxiv_id: "1506.05908" - abstract: "Knowledge tracing---where a machine models the knowledge of a student as they\ninteract with coursework---is a well established problem in computer supported\neducation. Though effectively modeling student knowledge would have high\neducational impact, the task has many inherent challenges. In this paper we\nexplore the utility of using Recurrent Neural Networks (RNNs) to model student\nlearning. The RNN family of models have important advantages over previous\nmethods in that they do not require the explicit encoding of human domain\nknowledge, and can capture more complex representations of student knowledge.\nUsing neural networks results in substantial improvements in prediction\nperformance on a range of knowledge tracing datasets. Moreover the learned\nmodel can be used for intelligent curriculum design and allows straightforward\ninterpretation and discovery of structure in student tasks. These results\nsuggest a promising new line of research for knowledge tracing and an exemplary\napplication task for RNNs." - pub_date: { - seconds: 1434672000 - } - authors: "Chris Piech" - authors: "Jonathan Spencer" - authors: "Jonathan Huang" - authors: "Surya Ganguli" - authors: "Mehran Sahami" - authors: "Leonidas Guibas" - authors: "Jascha Sohl-Dickstein" - repositories: { - url: "https://github.com/YangZhouEdu/DKT_pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "Deep Knowledge Tracing by Pytorch" - } - repositories: { - url: "https://github.com/jdxyw/deepKT" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "A repo for knowledge tracing implementation by PyTorch" - } - repositories: { - is_official: true - url: "https://github.com/chrispiech/DeepKnowledgeTracing" - framework: FRAMEWORK_OTHERS - number_of_stars: 183 - description: "source code for the paper Deep Knowledge Tracing" - } - repositories: { - url: "https://github.com/jarviszhb/KnowledgeTracing" - framework: FRAMEWORK_PYTORCH - number_of_stars: 9 - description: "Some implementations of knowledge tracing with pytorch" - } - methods: { - name: "LINE" - full_name: "Large-scale Information Network Embedding" - description: "LINE is a novel network embedding method which is suitable for arbitrary types of information networks: undirected, directed, and/or weighted. The method optimizes a carefully designed objective function that preserves both the local and global network structures.\r\n\r\nSource: [Tang et al.](https://arxiv.org/pdf/1503.03578v1.pdf)\r\n\r\nImage source: [Tang et al.](https://arxiv.org/pdf/1503.03578v1.pdf)" - } - } - video: { - video_id: "8hdY6Jns5-k" - video_title: "PR-046: Deep Knowledge Tracing" - number_of_views: 2013 - published_date: { - seconds: 1509893052 - } - uploader: "Byung-Hak Kim" - } - } -} -pr_id_to_video: { - key: 47 - value: { - papers: { - paper_id: "learning-deep-features-for-discriminative" - title: "Learning Deep Features for Discriminative Localization" - arxiv_id: "1512.04150" - abstract: "In this work, we revisit the global average pooling layer proposed in [13],\nand shed light on how it explicitly enables the convolutional neural network to\nhave remarkable localization ability despite being trained on image-level\nlabels. While this technique was previously proposed as a means for\nregularizing training, we find that it actually builds a generic localizable\ndeep representation that can be applied to a variety of tasks. Despite the\napparent simplicity of global average pooling, we are able to achieve 37.1%\ntop-5 error for object localization on ILSVRC 2014, which is remarkably close\nto the 34.2% top-5 error achieved by a fully supervised CNN approach. We\ndemonstrate that our network is able to localize the discriminative image\nregions on a variety of tasks despite not being trained for them" - pub_date: { - seconds: 1450051200 - } - authors: "Bolei Zhou" - authors: "Aditya Khosla" - authors: "Agata Lapedriza" - authors: "Aude Oliva" - authors: "Antonio Torralba" - repositories: { - url: "https://github.com/zhoubolei/CAM" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1500 - description: "Class Activation Mapping" - } - repositories: { - url: "https://github.com/FrancescoSaverioZuppichini/A-journey-into-Convolutional-Neural-Network-visualization-" - framework: FRAMEWORK_PYTORCH - number_of_stars: 184 - description: "A journey into Convolutional Neural Network visualization " - } - repositories: { - url: "https://github.com/frgfm/torch-cam" - framework: FRAMEWORK_PYTORCH - number_of_stars: 337 - description: "Class activation maps for your PyTorch models (CAM, Grad-CAM, Grad-CAM++, Smooth Grad-CAM++, Score-CAM, SS-CAM, IS-CAM, XGrad-CAM)" - } - repositories: { - url: "https://github.com/HRanWang/Spatial-Re-Scaling" - framework: FRAMEWORK_PYTORCH - number_of_stars: 129 - } - repositories: { - url: "https://github.com/vlue-c/PyTorch-Explanations" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/jsr66/Machine-Learning-Phases-of-Matter-with-Discriminative-Localization-" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/Seb-Good/deep_ecg" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 53 - description: "A library for classifying single-lead ECG waveforms as either Normal Sinus Rhythm, Atrial Fibrillation, or Other Rhythm." - } - repositories: { - url: "https://github.com/Tetsuya-Nishikawa/CAM" - framework: FRAMEWORK_TENSORFLOW - description: "CAM(class activation map)の実験(https://arxiv.org/pdf/1512.04150.pdf)" - } - repositories: { - url: "https://github.com/FelixFu520/CAM-Cifar10" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/metalbubble/CAM" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1500 - description: "Class Activation Mapping" - } - methods: { - name: "Average Pooling" - full_name: "Average Pooling" - description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" - } - methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." - } - } - video: { - video_id: "-Z1NIzLxgRU" - video_title: "PR047: Learning Deep Features for Discriminative Localization" - number_of_likes: 31 - number_of_views: 2216 - published_date: { - seconds: 1510500873 - } - uploader: "이광희" - } - } -} -pr_id_to_video: { - key: 48 - value: { - papers: { - paper_id: "towards-principled-methods-for-training" - title: "Towards Principled Methods for Training Generative Adversarial Networks" - arxiv_id: "1701.04862" - abstract: "The goal of this paper is not to introduce a single algorithm or method, but\nto make theoretical steps towards fully understanding the training dynamics of\ngenerative adversarial networks. In order to substantiate our theoretical\nanalysis, we perform targeted experiments to verify our assumptions, illustrate\nour claims, and quantify the phenomena. This paper is divided into three\nsections. The first section introduces the problem at hand. The second section\nis dedicated to studying and proving rigorously the problems including\ninstability and saturation that arize when training generative adversarial\nnetworks. The third section examines a practical and theoretically grounded\ndirection towards solving these problems, while introducing new tools to study\nthem." - pub_date: { - seconds: 1484611200 - } - authors: "Martin Arjovsky" - authors: "Léon Bottou" - repositories: { - url: "https://github.com/voqtuyen/GAN-Intuition" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - } - } - video: { - video_id: "RlAgB0Ooxaw" - video_title: "PR-048: Towards Principled Methods for Training Generative Adversarial Networks" - number_of_likes: 19 - number_of_views: 1704 - published_date: { - seconds: 1510652207 - } - uploader: "Ji-Hoon Kim" - } - } -} -pr_id_to_video: { - key: 49 - value: { - papers: { - paper_id: "attention-is-all-you-need" - title: "Attention Is All You Need" - arxiv_id: "1706.03762" - abstract: "The dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks in an encoder-decoder configuration. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transformer, based\nsolely on attention mechanisms, dispensing with recurrence and convolutions\nentirely. Experiments on two machine translation tasks show these models to be\nsuperior in quality while being more parallelizable and requiring significantly\nless time to train. Our model achieves 28.4 BLEU on the WMT 2014\nEnglish-to-German translation task, improving over the existing best results,\nincluding ensembles by over 2 BLEU. On the WMT 2014 English-to-French\ntranslation task, our model establishes a new single-model state-of-the-art\nBLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction\nof the training costs of the best models from the literature. We show that the\nTransformer generalizes well to other tasks by applying it successfully to\nEnglish constituency parsing both with large and limited training data." - pub_date: { - seconds: 1497225600 - } - authors: "Ashish Vaswani" - authors: "Noam Shazeer" - authors: "Niki Parmar" - authors: "Jakob Uszkoreit" - authors: "Llion Jones" - authors: "Aidan N. Gomez" - authors: "Lukasz Kaiser" - authors: "Illia Polosukhin" - repositories: { - url: "https://github.com/bangoc123/transformer" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5 - description: "Build English-Vietnamese machine translation with ProtonX Transformer. :D" - } - repositories: { - url: "https://github.com/brainsqueeze/text2vec" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 6 - description: "Contextual embedding for text blobs." - } - repositories: { - url: "https://github.com/maroxtn/tun-sentiment" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - description: "My solution in Zindi Tunisian Sentiment Analysis competition. Ranked #1st." - } - repositories: { - url: "https://github.com/han-shi/SparseBERT" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - } - repositories: { - url: "https://github.com/rupakdas18/SuperGlue-tasks-using-BERT" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "In this project we have implemented 2 SuperGlue tasks (RTE and BOOLQ)." - } - repositories: { - url: "https://github.com/PaddlePaddle/PaddleNLP/tree/develop/examples/machine_translation/transformer" - framework: FRAMEWORK_OTHERS - number_of_stars: 1363 - description: "An NLP library with Awesome pre-trained Transformer models and easy-to-use interface, supporting wide-range of NLP tasks from research to industrial applications." - } - repositories: { - url: "https://github.com/mitran27/Attention-is-all-you-Need" - framework: FRAMEWORK_OTHERS - description: "building the Transformer (new world of NLP) completely from scratch" - } - repositories: { - url: "https://github.com/xmu-xiaoma666/External-Attention-pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 479 - description: "Pytorch implementation of various Attention Mechanism" - } - repositories: { - url: "https://github.com/stevinc/Transformer_Timeseries" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "Pytorch code for Google's Temporal Fusion Transformer" - } - repositories: { - url: "https://github.com/xydaytoy/BMI-NMT" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - } - methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." - } - methods: { - name: "Layer Normalization" - full_name: "Layer Normalization" - description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "Label Smoothing" - full_name: "Label Smoothing" - description: "**Label Smoothing** is a regularization technique that introduces noise for the labels. This accounts for the fact that datasets may have mistakes in them, so maximizing the likelihood of $\\log{p}\\left(y\\mid{x}\\right)$ directly can be harmful. Assume for a small constant $\\epsilon$, the training set label $y$ is correct with probability $1-\\epsilon$ and incorrect otherwise. Label Smoothing regularizes a model based on a softmax with $k$ output values by replacing the hard $0$ and $1$ classification targets with targets of $\\frac{\\epsilon}{k-1}$ and $1-\\epsilon$ respectively.\r\n\r\nSource: Deep Learning, Goodfellow et al\r\n\r\nImage Source: [When Does Label Smoothing Help?](https://arxiv.org/abs/1906.02629)" - } - methods: { - name: "Transformer" - full_name: "Transformer" - description: "A **Transformer** is a model architecture that eschews recurrence and instead relies entirely on an [attention mechanism](https://paperswithcode.com/methods/category/attention-mechanisms-1) to draw global dependencies between input and output. Before Transformers, the dominant sequence transduction models were based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The Transformer also employs an encoder and decoder, but removing recurrence in favor of [attention mechanisms](https://paperswithcode.com/methods/category/attention-mechanisms-1) allows for significantly more parallelization than methods like [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and [CNNs](https://paperswithcode.com/methods/category/convolutional-neural-networks)." - } - methods: { - name: "Scaled Dot-Product Attention" - full_name: "Scaled Dot-Product Attention" - description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "Multi-Head Attention" - full_name: "Multi-Head Attention" - description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" - } - methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - } - video: { - video_id: "6zGgVIlStXs" - video_title: "PR-049: Attention is All You Need" - number_of_views: 7229 - published_date: { - seconds: 1512304902 - } - uploader: "Suh Kiho" - } - } -} -pr_id_to_video: { - key: 50 - value: { - papers: { - paper_id: "convolutional-lstm-network-a-machine-learning" - title: "Convolutional LSTM Network: A Machine Learning Approach for Precipitation Nowcasting" - arxiv_id: "1506.04214" - abstract: "The goal of precipitation nowcasting is to predict the future rainfall\nintensity in a local region over a relatively short period of time. Very few\nprevious studies have examined this crucial and challenging weather forecasting\nproblem from the machine learning perspective. In this paper, we formulate\nprecipitation nowcasting as a spatiotemporal sequence forecasting problem in\nwhich both the input and the prediction target are spatiotemporal sequences. By\nextending the fully connected LSTM (FC-LSTM) to have convolutional structures\nin both the input-to-state and state-to-state transitions, we propose the\nconvolutional LSTM (ConvLSTM) and use it to build an end-to-end trainable model\nfor the precipitation nowcasting problem. Experiments show that our ConvLSTM\nnetwork captures spatiotemporal correlations better and consistently\noutperforms FC-LSTM and the state-of-the-art operational ROVER algorithm for\nprecipitation nowcasting." - pub_date: { - seconds: 1434153600 - } - authors: "Xingjian Shi" - authors: "Zhourong Chen" - authors: "Hao Wang" - authors: "Dit-Yan Yeung" - authors: "Wai-kin Wong" - authors: "Wang-chun Woo" - repositories: { - url: "https://github.com/czifan/ConvLSTM.pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 25 - } - repositories: { - url: "https://github.com/Tetsuya-Nishikawa/ConvLSTM_DEMO" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "自作ConvLSTMデモ" - } - repositories: { - url: "https://github.com/rogertrullo/pytorch_convlstm" - framework: FRAMEWORK_PYTORCH - number_of_stars: 126 - description: "convolutional lstm implementation in pytorch" - } - repositories: { - url: "https://github.com/trichtu/ConvLSTM-RAU-net" - framework: FRAMEWORK_PYTORCH - number_of_stars: 18 - description: "Spatial-temperal Prediction Model based on history observation and WRF numerical prediction " - } - repositories: { - url: "https://github.com/ndrplz/ConvLSTM_pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 829 - description: "Implementation of Convolutional LSTM in PyTorch." - } - repositories: { - url: "https://github.com/automan000/Convolution_LSTM_pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 675 - description: "Multi-layer convolutional LSTM with Pytorch" - } - methods: { - name: "Tanh Activation" - full_name: "Tanh Activation" - description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "LSTM" - full_name: "Long Short-Term Memory" - description: "An **LSTM** is a type of [recurrent neural network](https://paperswithcode.com/methods/category/recurrent-neural-networks) that addresses the vanishing gradient problem in vanilla RNNs through additional cells, input and output gates. Intuitively, vanishing gradients are solved through additional *additive* components, and forget gate activations, that allow the gradients to flow through the network without vanishing as quickly.\r\n\r\n(Image Source [here](https://medium.com/datadriveninvestor/how-do-lstm-networks-solve-the-problem-of-vanishing-gradients-a6784971a577))\r\n\r\n(Introduced by Hochreiter and Schmidhuber)" - } - methods: { - name: "Sigmoid Activation" - full_name: "Sigmoid Activation" - description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." - } - methods: { - name: "ConvLSTM" - full_name: "ConvLSTM" - description: "**ConvLSTM** is a type of recurrent neural network for spatio-temporal prediction that has convolutional structures in both the input-to-state and state-to-state transitions. The ConvLSTM determines the future state of a certain cell in the grid by the inputs and past states of its local neighbors. This can easily be achieved by using a convolution operator in the state-to-state and input-to-state transitions (see Figure). The key equations of ConvLSTM are shown below, where $∗$ denotes the convolution operator and $\\odot$ the Hadamard product:\r\n\r\n$$ i\\_{t} = \\sigma\\left(W\\_{xi} ∗ X\\_{t} + W\\_{hi} ∗ H\\_{t−1} + W\\_{ci} \\odot \\mathcal{C}\\_{t−1} + b\\_{i}\\right) $$\r\n\r\n$$ f\\_{t} = \\sigma\\left(W\\_{xf} ∗ X\\_{t} + W\\_{hf} ∗ H\\_{t−1} + W\\_{cf} \\odot \\mathcal{C}\\_{t−1} + b\\_{f}\\right) $$\r\n\r\n$$ \\mathcal{C}\\_{t} = f\\_{t} \\odot \\mathcal{C}\\_{t−1} + i\\_{t} \\odot \\text{tanh}\\left(W\\_{xc} ∗ X\\_{t} + W\\_{hc} ∗ \\mathcal{H}\\_{t−1} + b\\_{c}\\right) $$\r\n\r\n$$ o\\_{t} = \\sigma\\left(W\\_{xo} ∗ X\\_{t} + W\\_{ho} ∗ \\mathcal{H}\\_{t−1} + W\\_{co} \\odot \\mathcal{C}\\_{t} + b\\_{o}\\right) $$\r\n\r\n$$ \\mathcal{H}\\_{t} = o\\_{t} \\odot \\text{tanh}\\left(C\\_{t}\\right) $$\r\n\r\nIf we view the states as the hidden representations of moving objects, a ConvLSTM with a larger transitional kernel should be able to capture faster motions while one with a smaller kernel can capture slower motions. \r\n\r\nTo ensure that the states have the same number of rows and same number of columns as the inputs, padding is needed before applying the convolution operation. Here, padding of the hidden states on the boundary points can be viewed as using the state of the outside world for calculation. Usually, before the first input comes, we initialize all the states of the LSTM to zero which corresponds to \"total ignorance\" of the future." - } - } - video: { - video_id: "3cFfCM4CXws" - video_title: "PR-050: Convolutional LSTM Network: A Machine Learning Approach for Precipitation Nowcasting" - number_of_likes: 38 - number_of_views: 6945 - published_date: { - seconds: 1511707163 - } - uploader: "Taegyun Jeon" - } - } -} -pr_id_to_video: { - key: 51 - value: { - papers: { - paper_id: "conditional-generative-adversarial-nets" - title: "Conditional Generative Adversarial Nets" - arxiv_id: "1411.1784" - abstract: "Generative Adversarial Nets [8] were recently introduced as a novel way to train generative models. In this work we introduce the conditional version of generative adversarial nets, which can be constructed by simply feeding the data, y, we wish to condition on to both the generator and discriminator. We show that this model can generate MNIST digits conditioned on class labels. We also illustrate how this model could be used to learn a multi-modal model, and provide preliminary examples of an application to image tagging in which we demonstrate how this approach can generate descriptive tags which are not part of training labels." - pub_date: { - seconds: 1415232000 - } - authors: "Mehdi Mirza" - authors: "Simon Osindero" - repositories: { - url: "https://github.com/asiltureli/gan-in-colab" - framework: FRAMEWORK_PYTORCH - description: "GAN implementations on Google Colab" - } - repositories: { - url: "https://github.com/AshishSingh2261/GAN" - framework: FRAMEWORK_OTHERS - description: "Contains code for different types of GANs trained on different datasets." - } - repositories: { - url: "https://github.com/YigitGunduc/Conditional-GANs-CGANs" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Conditional Generative Adversarial Networks(cgans) to convert text to image implemented in Python and TensorFlow & Keras" - } - repositories: { - url: "https://github.com/kynk94/TF2-Image-Generation" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 9 - description: "Tensorflow2 reimplementation of image generation model (GAN, Style Transfer, Image to Image Translation, etc)" - } - repositories: { - url: "https://github.com/otepencelik/GAN-Artwork-Generation" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - } - repositories: { - url: "https://github.com/MCLYang/RhythmGAN_pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "The pytorch implementation for RhythmGAN" - } - repositories: { - url: "https://github.com/Lornatang/CGAN-PyTorch" - framework: FRAMEWORK_PYTORCH - description: "Simple implementation of conditional general adverse nets in pytorch machine learning framework" - } - repositories: { - url: "https://github.com/jamesloyys/PyTorch-Lightning-GAN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 10 - description: "Implementations of various GAN architectures using PyTorch Lightning" - } - repositories: { - url: "https://github.com/gordicaleksa/pytorch-gans" - framework: FRAMEWORK_PYTORCH - number_of_stars: 287 - description: "My implementation of various GAN (generative adversarial networks) architectures like vanilla GAN (Goodfellow et al.), cGAN (Mirza et al.), DCGAN (Radford et al.), etc." - } - repositories: { - url: "https://github.com/alles9fresser/Conditional-GAN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 7 - } - } - video: { - video_id: "iCgT8G4PkqI" - video_title: "PR-051: Conditional Generative Adversarial Nets" - number_of_likes: 24 - number_of_views: 3429 - published_date: { - seconds: 1512310569 - } - uploader: "Seungil Kim" - } - } -} -pr_id_to_video: { - key: 52 - value: { - papers: { - paper_id: "multiplayer-alphazero" - title: "Multiplayer AlphaZero" - arxiv_id: "1910.13012" - abstract: "The AlphaZero algorithm has achieved superhuman performance in two-player, deterministic, zero-sum games where perfect information of the game state is available. This success has been demonstrated in Chess, Shogi, and Go where learning occurs solely through self-play. Many real-world applications (e.g., equity trading) require the consideration of a multiplayer environment. In this work, we suggest novel modifications of the AlphaZero algorithm to support multiplayer environments, and evaluate the approach in two simple 3-player games. Our experiments show that multiplayer AlphaZero learns successfully and consistently outperforms a competing approach: Monte Carlo tree search. These results suggest that our modified AlphaZero can learn effective strategies in multiplayer game scenarios. Our work supports the use of AlphaZero in multiplayer games and suggests future research for more complex environments." - pub_date: { - seconds: 1572307200 - } - authors: "Nick Petosa" - authors: "Tucker Balch" - repositories: { - is_official: true - url: "https://github.com/petosa/multiplayer-alphazero" - framework: FRAMEWORK_PYTORCH - number_of_stars: 18 - description: "PyTorch AlphaZero implementation with multiplayer support [NeurIPS 2019 Deep Reinforcement Learning Workshop]" - } - methods: { - name: "AlphaZero" - full_name: "AlphaZero" - description: "**AlphaZero** is a reinforcement learning agent for playing board games such as Go, chess, and shogi. " - } - } - video: {} - } -} -pr_id_to_video: { - key: 53 - value: { - papers: { - paper_id: "grad-cam-visual-explanations-from-deep" - title: "Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization" - arxiv_id: "1610.02391" - abstract: "We propose a technique for producing \"visual explanations\" for decisions from a large class of CNN-based models, making them more transparent. Our approach - Gradient-weighted Class Activation Mapping (Grad-CAM), uses the gradients of any target concept, flowing into the final convolutional layer to produce a coarse localization map highlighting important regions in the image for predicting the concept. Grad-CAM is applicable to a wide variety of CNN model-families: (1) CNNs with fully-connected layers, (2) CNNs used for structured outputs, (3) CNNs used in tasks with multimodal inputs or reinforcement learning, without any architectural changes or re-training. We combine Grad-CAM with fine-grained visualizations to create a high-resolution class-discriminative visualization and apply it to off-the-shelf image classification, captioning, and visual question answering (VQA) models, including ResNet-based architectures. In the context of image classification models, our visualizations (a) lend insights into their failure modes, (b) are robust to adversarial images, (c) outperform previous methods on localization, (d) are more faithful to the underlying model and (e) help achieve generalization by identifying dataset bias. For captioning and VQA, we show that even non-attention based models can localize inputs. We devise a way to identify important neurons through Grad-CAM and combine it with neuron names to provide textual explanations for model decisions. Finally, we design and conduct human studies to measure if Grad-CAM helps users establish appropriate trust in predictions from models and show that Grad-CAM helps untrained users successfully discern a 'stronger' nodel from a 'weaker' one even when both make identical predictions. Our code is available at https://github.com/ramprs/grad-cam/, along with a demo at http://gradcam.cloudcv.org, and a video at youtu.be/COjUB9Izk6E." - pub_date: { - seconds: 1475798400 - } - authors: "Ramprasaath R. Selvaraju" - authors: "Michael Cogswell" - authors: "Abhishek Das" - authors: "Ramakrishna Vedantam" - authors: "Devi Parikh" - authors: "Dhruv Batra" - repositories: { - url: "https://github.com/CMU-CREATE-Lab/deep-smoke-machine" - framework: FRAMEWORK_PYTORCH - number_of_stars: 62 - description: "Deep learning models and dataset for recognizing industrial smoke emissions" - } - repositories: { - url: "https://github.com/novice03/timm-vis" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - description: "Visualizer for PyTorch image models" - } - repositories: { - url: "https://github.com/sauravmishra1710/EXPLAINABLE-AI---Skin-Cancer-Detection-explained-with-GRADCAM" - framework: FRAMEWORK_TENSORFLOW - description: "Diagnose the presence of skin cancer in a person using CNN and as well explain what led the CNN to arrive at the decision. Visual explanations are made utilizing the Gradient-weighted Class Activation Mapping (Grad-CAM), the gradients flowing into the final convolutional layer to produce a coarse localization map highlighting the important regions in the image for considered for arriving at the decision. The original paper for GRADCAM can be found @ https://arxiv.org/abs/1610.02391" - } - repositories: { - url: "https://github.com/xn1997/pytorch-grad-cam" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "特征图可视化(个人修改版)" - } - repositories: { - url: "https://github.com/priyavrat-misra/xrays-and-gradcam" - framework: FRAMEWORK_PYTORCH - number_of_stars: 14 - description: "Classification and Gradient-based Localization of Chest Radiographs using PyTorch." - } - repositories: { - url: "https://github.com/jordan-bird/synthetic-fruit-image-generator" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Use a CGAN to generate synthetic images of healthy and unhealthy lemons" - } - repositories: { - url: "https://github.com/FrancescoSaverioZuppichini/A-journey-into-Convolutional-Neural-Network-visualization-" - framework: FRAMEWORK_PYTORCH - number_of_stars: 184 - description: "A journey into Convolutional Neural Network visualization " - } - repositories: { - url: "https://github.com/samson6460/tf_keras_gradcamplusplus" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 10 - description: "tensorflow.keras implementation of gradcam and gradcam++" - } - repositories: { - url: "https://github.com/dtanoglidis/DeepShadows" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 10 - description: "Repository for the project \"DeepShadows: Separating LSBGs from artifacts using Deep Learning\"" - } - repositories: { - url: "https://github.com/ayulockin/interpretabilitycnn" - framework: FRAMEWORK_OTHERS - number_of_stars: 7 - description: "Custom Keras Callbacks for Feature Visualization, Class Activation Map, Grad-CAM" - } - } - video: { - video_id: "faGsrPX1yFM" - video_title: "PR-053: Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization" - number_of_likes: 34 - number_of_views: 6460 - published_date: { - seconds: 1512915707 - } - uploader: "taesu" - } - } -} -pr_id_to_video: { - key: 54 - value: { - papers: { - paper_id: "shufflenet-an-extremely-efficient" - title: "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" - arxiv_id: "1707.01083" - abstract: "We introduce an extremely computation-efficient CNN architecture named\nShuffleNet, which is designed specially for mobile devices with very limited\ncomputing power (e.g., 10-150 MFLOPs). The new architecture utilizes two new\noperations, pointwise group convolution and channel shuffle, to greatly reduce\ncomputation cost while maintaining accuracy. Experiments on ImageNet\nclassification and MS COCO object detection demonstrate the superior\nperformance of ShuffleNet over other structures, e.g. lower top-1 error\n(absolute 7.8%) than recent MobileNet on ImageNet classification task, under\nthe computation budget of 40 MFLOPs. On an ARM-based mobile device, ShuffleNet\nachieves ~13x actual speedup over AlexNet while maintaining comparable\naccuracy." - pub_date: { - seconds: 1499126400 - } - authors: "Xiangyu Zhang" - authors: "Xinyu Zhou" - authors: "Mengxiao Lin" - authors: "Jian Sun" - repositories: { - url: "https://github.com/tensorpack/tensorpack/tree/master/examples/ImageNetModels" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 6034 - description: "A Neural Net Training Interface on TensorFlow, with focus on speed + flexibility" - } - repositories: { - url: "https://github.com/afzalahmad0203/Tensorflow-Shufflenet" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Shufflenet implementation in tensorflow based on https://arxiv.org/abs/1707.01083" - } - repositories: { - url: "https://github.com/osmr/imgclsmob" - framework: FRAMEWORK_OTHERS - number_of_stars: 2200 - description: "Sandbox for training deep learning networks" - } - repositories: { - url: "https://github.com/eogussla12/Shufflenet_CIFAR10_Pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Pytorch-Shufflenet-CIFAR10" - } - repositories: { - url: "https://github.com/MrRen-sdhm/Embedded_Multi_Object_Detection_CNN" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 6 - description: "Multi-object detection by lightweight CNN on embedded system" - } - repositories: { - url: "https://github.com/alalagong/LEDNet" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "Small changed LEDNet" - } - repositories: { - url: "https://github.com/clavichord93/MENet" - framework: FRAMEWORK_PYTORCH - number_of_stars: 11 - description: "This repo contains code for *Merging and Evolution: Improving Convolutional Neural Networks for Mobile Applications*." - } - repositories: { - url: "https://github.com/europa1610/Tensorflow-Shufflenet" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Shufflenet implementation in tensorflow based on https://arxiv.org/abs/1707.01083" - } - repositories: { - url: "https://github.com/afzalahmad0203/Numpy-Shufflenet" - framework: FRAMEWORK_OTHERS - description: "Numpy implementation of shufflenet based on https://arxiv.org/abs/1707.01083" - } - repositories: { - url: "https://github.com/minhto2802/keras-shufflenet" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - } - methods: { - name: "Average Pooling" - full_name: "Average Pooling" - description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" - } - methods: { - name: "RPN" - full_name: "Region Proposal Network" - description: "A **Region Proposal Network**, or **RPN**, is a fully convolutional network that simultaneously predicts object bounds and objectness scores at each position. The RPN is trained end-to-end to generate high-quality region proposals. RPN and algorithms like Fast R-CNN can be merged into a single network by sharing their convolutional features - using the recently popular terminology of neural networks with attention mechanisms, the RPN component tells the unified network where to look.\r\n\r\nRPNs are designed to efficiently predict region proposals with a wide range of scales and aspect ratios. RPNs use anchor boxes that serve as references at multiple scales and aspect ratios. The scheme can be thought of as a pyramid of regression references, which avoids enumerating images or filters of multiple scales or aspect ratios." - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "AlexNet" - full_name: "AlexNet" - description: "**AlexNet** is a classic convolutional neural network architecture. It consists of convolutions, max pooling and dense layers as the basic building blocks. Grouped convolutions are used in order to fit the model across two GPUs." - } - methods: { - name: "Random Horizontal Flip" - full_name: "Random Horizontal Flip" - description: "**RandomHorizontalFlip** is a type of image data augmentation which horizontally flips a given image with a given probability.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" - } - methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." - } - methods: { - name: "RoIPool" - full_name: "RoIPool" - description: "**Region of Interest Pooling**, or **RoIPool**, is an operation for extracting a small feature map (e.g., $7×7$) from each RoI in detection and segmentation based tasks. Features are extracted from each candidate box, and thereafter in models like Fast R-CNN, are then classified and bounding box regression performed.\r\n\r\nThe actual scaling to, e.g., $7×7$, occurs by dividing the region proposal into equally sized sections, finding the largest value in each section, and then copying these max values to the output buffer. In essence, **RoIPool** is max pooling on a discrete grid based on a box.\r\n\r\nImage Source: [Joyce Xu](https://towardsdatascience.com/deep-learning-for-object-detection-a-comprehensive-review-73930816d8d9)" - } - methods: { - name: "Channel Shuffle" - full_name: "Channel Shuffle" - description: "**Channel Shuffle** is an operation to help information flow across feature channels in convolutional neural networks. It was used as part of the [ShuffleNet](https://paperswithcode.com/method/shufflenet) architecture. \r\n\r\nIf we allow a group convolution to obtain input data from different groups, the input and output channels will be fully related. Specifically, for the feature map generated from the previous group layer, we can first divide the channels in each group into several subgroups, then feed each group in the next layer with different subgroups. \r\n\r\nThe above can be efficiently and elegantly implemented by a channel shuffle operation: suppose a convolutional layer with $g$ groups whose output has $g \\times n$ channels; we first reshape the output channel dimension into $\\left(g, n\\right)$, transposing and then flattening it back as the input of next layer. Channel shuffle is also differentiable, which means it can be embedded into network structures for end-to-end training." - } - methods: { - name: "Faster R-CNN" - full_name: "Faster R-CNN" - description: "**Faster R-CNN** is an object detection model that improves on [Fast R-CNN](https://paperswithcode.com/method/fast-r-cnn) by utilising a region proposal network ([RPN](https://paperswithcode.com/method/rpn)) with the CNN model. The RPN shares full-image convolutional features with the detection network, enabling nearly cost-free region proposals. It is a fully convolutional network that simultaneously predicts object bounds and objectness scores at each position. The RPN is trained end-to-end to generate high-quality region proposals, which are used by [Fast R-CNN](https://paperswithcode.com/method/fast-r-cnn) for detection. RPN and Fast R-CNN are merged into a single network by sharing their convolutional features: the RPN component tells the unified network where to look.\r\n\r\nAs a whole, Faster R-CNN consists of two modules. The first module is a deep fully convolutional network that proposes regions, and the second module is the Fast R-CNN detector that uses the proposed regions." - } - methods: { - name: "Depthwise Convolution" - full_name: "Depthwise Convolution" - description: "**Depthwise Convolution** is a type of convolution where we apply a single convolutional filter for each input channel. In the regular 2D [convolution](https://paperswithcode.com/method/convolution) performed over multiple input channels, the filter is as deep as the input and lets us freely mix channels to generate each element in the output. In contrast, depthwise convolutions keep each channel separate. To summarize the steps, we:\r\n\r\n1. Split the input and filter into channels.\r\n2. We convolve each input with the respective filter.\r\n3. We stack the convolved outputs together.\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" - } - } - video: { - video_id: "pNuBdj53Hbc" - video_title: "PR-054: ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" - number_of_likes: 52 - number_of_views: 6167 - published_date: { - seconds: 1513005030 - } - uploader: "JinWon Lee" - } - } -} -pr_id_to_video: { - key: 55 - value: { - papers: { - paper_id: "neural-machine-translation-by-jointly" - title: "Neural Machine Translation by Jointly Learning to Align and Translate" - arxiv_id: "1409.0473" - abstract: "Neural machine translation is a recently proposed approach to machine\ntranslation. Unlike the traditional statistical machine translation, the neural\nmachine translation aims at building a single neural network that can be\njointly tuned to maximize the translation performance. The models proposed\nrecently for neural machine translation often belong to a family of\nencoder-decoders and consists of an encoder that encodes a source sentence into\na fixed-length vector from which a decoder generates a translation. In this\npaper, we conjecture that the use of a fixed-length vector is a bottleneck in\nimproving the performance of this basic encoder-decoder architecture, and\npropose to extend this by allowing a model to automatically (soft-)search for\nparts of a source sentence that are relevant to predicting a target word,\nwithout having to form these parts as a hard segment explicitly. With this new\napproach, we achieve a translation performance comparable to the existing\nstate-of-the-art phrase-based system on the task of English-to-French\ntranslation. Furthermore, qualitative analysis reveals that the\n(soft-)alignments found by the model agree well with our intuition." - pub_date: { - seconds: 1409529600 - } - authors: "Dzmitry Bahdanau" - authors: "Kyunghyun Cho" - authors: "Yoshua Bengio" - repositories: { - url: "https://github.com/dl4nlp-tuda2021/deep-learning-for-nlp-lectures" - framework: FRAMEWORK_PYTORCH - number_of_stars: 60 - description: "Deep Learning for Natural Language Processing - Lectures 2021" - } - repositories: { - url: "https://github.com/prakhargurawa/Neural-Machine-Translation-Keras-Attention" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Machine translation using LSTM Model. Created two translation models with/without attention mechanisms for translation between French-English and German-English." - } - repositories: { - url: "https://github.com/AMNAALMGLY/NLP" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/prakhargurawa/Neural-Machine-Translation-Keras-German-English" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Machine translation using LSTM Model. Created two translation models with/without attention mechanisms for translation between French-English and German-English." - } - repositories: { - url: "https://github.com/millenialSpirou/ift6010" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/yinghao1019/NLP_and_DL_practice" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "This repository is used for NLP Model practice and learning" - } - repositories: { - url: "https://github.com/tree-park/kor-to-eng-translation" - framework: FRAMEWORK_PYTORCH - description: "Translator by transforemer and seq2seq (with attention mechanism) - Pytorch" - } - repositories: { - url: "https://github.com/hiun/learning-transformers" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Transformers Tutorials with Open Source Implementations" - } - repositories: { - url: "https://github.com/chenqianqianxiaoxiannv/seq2seq" - framework: FRAMEWORK_TENSORFLOW - description: "seq2seq" - } - repositories: { - url: "https://github.com/xhlulu/arxiv-assistant" - framework: FRAMEWORK_OTHERS - description: "A simple webapp for helping you navigate Arxiv.org" - } - methods: { - name: "Tanh Activation" - full_name: "Tanh Activation" - description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" - } - methods: { - name: "Additive Attention" - full_name: "Additive Attention" - description: "**Additive Attention**, also known as **Bahdanau Attention**, uses a one-hidden layer feed-forward network to calculate the attention alignment score:\r\n\r\n$$f_{att}\\left(\\textbf{h}_{i}, \\textbf{s}\\_{j}\\right) = v\\_{a}^{T}\\tanh\\left(\\textbf{W}\\_{a}\\left[\\textbf{h}\\_{i};\\textbf{s}\\_{j}\\right]\\right)$$\r\n\r\nwhere $\\textbf{v}\\_{a}$ and $\\textbf{W}\\_{a}$ are learned attention parameters. Here $\\textbf{h}$ refers to the hidden states for the encoder, and $\\textbf{s}$ is the hidden states for the decoder. The function above is thus a type of alignment score function. We can use a matrix of alignment scores to show the correlation between source and target words, as the Figure to the right shows.\r\n\r\nWithin a neural network, once we have the alignment scores, we calculate the final scores using a softmax function of these alignment scores (ensuring it sums to 1)." - } - } - video: { - video_id: "upskBSbA9cA" - video_title: "PR-055: Neural Machine Translation by Jointly Learning to Align and Translate" - number_of_likes: 27 - number_of_views: 2831 - published_date: { - seconds: 1513516897 - } - uploader: "Jiyang Kang" - } - } -} -pr_id_to_video: { - key: 56 - value: { - papers: { - paper_id: "dynamic-routing-between-capsules" - title: "Dynamic Routing Between Capsules" - arxiv_id: "1710.09829" - abstract: "A capsule is a group of neurons whose activity vector represents the\ninstantiation parameters of a specific type of entity such as an object or an\nobject part. We use the length of the activity vector to represent the\nprobability that the entity exists and its orientation to represent the\ninstantiation parameters. Active capsules at one level make predictions, via\ntransformation matrices, for the instantiation parameters of higher-level\ncapsules. When multiple predictions agree, a higher level capsule becomes\nactive. We show that a discrimininatively trained, multi-layer capsule system\nachieves state-of-the-art performance on MNIST and is considerably better than\na convolutional net at recognizing highly overlapping digits. To achieve these\nresults we use an iterative routing-by-agreement mechanism: A lower-level\ncapsule prefers to send its output to higher level capsules whose activity\nvectors have a big scalar product with the prediction coming from the\nlower-level capsule." - pub_date: { - seconds: 1508976000 - } - authors: "Sara Sabour" - authors: "Nicholas Frosst" - authors: "Geoffrey E Hinton" - repositories: { - url: "https://github.com/Egesabanci/capsuleNetworks" - framework: FRAMEWORK_TENSORFLOW - description: ":pill: CapsNets implementation according to the paper: Dynamic Routing Between Capsules - Sara Sabour, Nicholas Frosst, Geoffrey E Hinton" - } - repositories: { - url: "https://github.com/ecstayalive/Degenerate-capsule-neural-network" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "退化胶囊神经网络是通过改变极少的胶囊神经网络结构并将其应用与一些对位置要求不敏感的一些识别场合,但是保留了其快速泛化的特性" - } - repositories: { - url: "https://github.com/razvanalex/CapsLayer" - framework: FRAMEWORK_TENSORFLOW - description: "CapsLayer: An advanced library for capsule theory" - } - repositories: { - url: "https://github.com/naturomics/CapsLayer" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 352 - description: "CapsLayer: An advanced library for capsule theory" - } - repositories: { - url: "https://github.com/lab-ml/nn/tree/master/labml_nn/capsule_networks" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3053 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." - } - repositories: { - url: "https://github.com/jelifysh/Capsule-Networks" - framework: FRAMEWORK_PYTORCH - number_of_stars: 12 - description: "Pytorch Implementation of Capsule Networks" - } - repositories: { - url: "https://github.com/EscVM/Efficient-CapsNet" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 125 - description: "Official TensorFlow code for the forthcoming paper \"Efficient-CapsNet: Capsule Network with Self-Attention Routing\"." - } - repositories: { - url: "https://github.com/OwenLeng/pytorch-capsule" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/noureldinalaa/Capsule-Networks" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - description: "Simply explain and implement Capsule Networks on MNIST dataset using Pytorch." - } - repositories: { - url: "https://github.com/dedhiaparth98/capsule-network" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 8 - description: "A TensorFlow implementation of Capsule Network as described in the paper Dynamic Routing Between Capsules" - } - } - video: { - video_id: "_YT_8CT2w_Q" - video_title: "PR-056: Capsule Network" - number_of_likes: 67 - number_of_views: 5738 - published_date: { - seconds: 1513522378 - } - uploader: "Jaejun Yoo" - } - } -} -pr_id_to_video: { - key: 57 - value: { - papers: { - paper_id: "mask-r-cnn" - title: "Mask R-CNN" - arxiv_id: "1703.06870" - abstract: "We present a conceptually simple, flexible, and general framework for object\ninstance segmentation. Our approach efficiently detects objects in an image\nwhile simultaneously generating a high-quality segmentation mask for each\ninstance. The method, called Mask R-CNN, extends Faster R-CNN by adding a\nbranch for predicting an object mask in parallel with the existing branch for\nbounding box recognition. Mask R-CNN is simple to train and adds only a small\noverhead to Faster R-CNN, running at 5 fps. Moreover, Mask R-CNN is easy to\ngeneralize to other tasks, e.g., allowing us to estimate human poses in the\nsame framework. We show top results in all three tracks of the COCO suite of\nchallenges, including instance segmentation, bounding-box object detection, and\nperson keypoint detection. Without bells and whistles, Mask R-CNN outperforms\nall existing, single-model entries on every task, including the COCO 2016\nchallenge winners. We hope our simple and effective approach will serve as a\nsolid baseline and help ease future research in instance-level recognition.\nCode has been made available at: https://github.com/facebookresearch/Detectron" - pub_date: { - seconds: 1489968000 - } - authors: "Kaiming He" - authors: "Georgia Gkioxari" - authors: "Piotr Dollár" - authors: "Ross Girshick" - repositories: { - url: "https://github.com/SonginCV/GMPHD_MAF" - framework: FRAMEWORK_OTHERS - number_of_stars: 10 - description: "The official implementation of the GMPHD_MAF Tracker" - } - repositories: { - url: "https://github.com/miaohua1982/simple_fasterrcnn_pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/tuguldurs/vivus" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "venous intravascular ultrasound image processing" - } - repositories: { - url: "https://github.com/SonginCV/GMPHD_SAF" - framework: FRAMEWORK_OTHERS - number_of_stars: 10 - description: "The official implementation of the GMPHD_MAF Tracker" - } - repositories: { - url: "https://github.com/alexalm4190/Mask_RCNN-Vizzy_Hand" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - } - repositories: { - url: "https://github.com/deolipankaj/Stone_Detection_MRCNN" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Stone detection in an off-road environment with Mask R-CNN" - } - repositories: { - url: "https://github.com/EmGarr/kerod" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 34 - description: "DETR - Faster RCNN implementation in tensorflow 2" - } - repositories: { - url: "https://github.com/polospeter/TensorFlow-Advanced-Techniques-Specialization" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5 - } - repositories: { - is_official: true - url: "https://github.com/facebookresearch/detectron2" - framework: FRAMEWORK_PYTORCH - number_of_stars: 16936 - description: "Detectron2 is FAIR's next-generation platform for object detection, segmentation and other visual recognition tasks." - } - repositories: { - url: "https://github.com/raymon-tian/hourglass-facekeypoints-detection" - framework: FRAMEWORK_PYTORCH - number_of_stars: 214 - description: "face keypoints deteciton based on stackedhourglass" - } - methods: { - name: "Mask R-CNN" - full_name: "Mask R-CNN" - description: "**Mask R-CNN** extends [Faster R-CNN](http://paperswithcode.com/method/faster-r-cnn) to solve instance segmentation tasks. It achieves this by adding a branch for predicting an object mask in parallel with the existing branch for bounding box recognition. In principle, Mask R-CNN is an intuitive extension of Faster R-CNN, but constructing the mask branch properly is critical for good results. \r\n\r\nMost importantly, Faster R-CNN was not designed for pixel-to-pixel alignment between network inputs and outputs. This is evident in how [RoIPool](http://paperswithcode.com/method/roi-pooling), the *de facto* core operation for attending to instances, performs coarse spatial quantization for feature extraction. To fix the misalignment, Mask R-CNN utilises a simple, quantization-free layer, called [RoIAlign](http://paperswithcode.com/method/roi-align), that faithfully preserves exact spatial locations. \r\n\r\nSecondly, Mask R-CNN *decouples* mask and class prediction: it predicts a binary mask for each class independently, without competition among classes, and relies on the network's RoI classification branch to predict the category. In contrast, an [FCN](http://paperswithcode.com/method/fcn) usually perform per-pixel multi-class categorization, which couples segmentation and classification." - } - methods: { - name: "RoIAlign" - full_name: "RoIAlign" - description: "**Region of Interest Align**, or **RoIAlign**, is an operation for extracting a small feature map from each RoI in detection and segmentation based tasks. It removes the harsh quantization of [RoI Pool](https://paperswithcode.com/method/roi-pooling), properly *aligning* the extracted features with the input. To avoid any quantization of the RoI boundaries or bins (using $x/16$ instead of $[x/16]$), RoIAlign uses bilinear interpolation to compute the exact values of the input features at four regularly sampled locations in each RoI bin, and the result is then aggregated (using max or average)." - } - methods: { - name: "Residual Block" - full_name: "Residual Block" - description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." - } - methods: { - name: "Bottleneck Residual Block" - full_name: "Bottleneck Residual Block" - description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." - } - methods: { - name: "Kaiming Initialization" - full_name: "Kaiming Initialization" - description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - methods: { - name: "ResNeXt Block" - full_name: "ResNeXt Block" - description: "A **ResNeXt Block** is a type of residual block used as part of the [ResNeXt](https://paperswithcode.com/method/resnext) CNN architecture. It uses a \"split-transform-merge\" strategy (branched paths within a single module) similar to an [Inception module](https://paperswithcode.com/method/inception-module), i.e. it aggregates a set of transformations. Compared to a Residual Block, it exposes a new dimension, *cardinality* (size of set of transformations) $C$, as an essential factor in addition to depth and width. \r\n\r\nFormally, a set of aggregated transformations can be represented as: $\\mathcal{F}(x)=\\sum_{i=1}^{C}\\mathcal{T}_i(x)$, where $\\mathcal{T}_i(x)$ can be an arbitrary function. Analogous to a simple neuron, $\\mathcal{T}_i$ should project $x$ into an (optionally low-dimensional) embedding and then transform it." - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." - } - methods: { - name: "Grouped Convolution" - full_name: "Grouped Convolution" - description: "A **Grouped Convolution** uses a group of convolutions - multiple kernels per layer - resulting in multiple channel outputs per layer. This leads to wider networks helping a network learn a varied set of low level and high level features. The original motivation of using Grouped Convolutions in [AlexNet](https://paperswithcode.com/method/alexnet) was to distribute the model over multiple GPUs as an engineering compromise. But later, with models such as [ResNeXt](https://paperswithcode.com/method/alexnet), it was shown this module could be used to improve classification accuracy. Specifically by exposing a new dimension through grouped convolutions, *cardinality* (the size of set of transformations), we can increase accuracy by increasing it." - } - } - video: { - video_id: "RtSZALC9DlU" - video_title: "PR-057: Mask R-CNN" - number_of_likes: 133 - number_of_views: 10986 - published_date: { - seconds: 1515330928 - } - uploader: "Taeoh Kim" - } - } -} -pr_id_to_video: { - key: 58 - value: { - papers: { - paper_id: "the-consciousness-prior" - title: "The Consciousness Prior" - arxiv_id: "1709.08568" - abstract: "A new prior is proposed for learning representations of high-level concepts of the kind we manipulate with language. This prior can be combined with other priors in order to help disentangling abstract factors from each other. It is inspired by cognitive neuroscience theories of consciousness, seen as a bottleneck through which just a few elements, after having been selected by attention from a broader pool, are then broadcast and condition further processing, both in perception and decision-making. The set of recently selected elements one becomes aware of is seen as forming a low-dimensional conscious state. This conscious state is combining the few concepts constituting a conscious thought, i.e., what one is immediately conscious of at a particular moment. We claim that this architectural and information-processing constraint corresponds to assumptions about the joint distribution between high-level concepts. To the extent that these assumptions are generally true (and the form of natural language seems consistent with them), they can form a useful prior for representation learning. A low-dimensional thought or conscious state is analogous to a sentence: it involves only a few variables and yet can make a statement with very high probability of being true. This is consistent with a joint distribution (over high-level concepts) which has the form of a sparse factor graph, i.e., where the dependencies captured by each factor of the factor graph involve only very few variables while creating a strong dip in the overall energy function. The consciousness prior also makes it natural to map conscious states to natural language utterances or to express classical AI knowledge in a form similar to facts and rules, albeit capturing uncertainty as well as efficient search mechanisms implemented by attention mechanisms." - pub_date: { - seconds: 1506297600 - } - authors: "Yoshua Bengio" - } - video: { - video_id: "7fIAdhl0KYc" - video_title: "PR-058: The Consciousness Prior" - number_of_views: 1151 - published_date: { - seconds: 1515333966 - } - uploader: "Byung-Hak Kim" - } - } -} -pr_id_to_video: { - key: 59 - value: { - papers: { - paper_id: "style-transfer-from-non-parallel-text-by" - title: "Style Transfer from Non-Parallel Text by Cross-Alignment" - arxiv_id: "1705.09655" - abstract: "This paper focuses on style transfer on the basis of non-parallel text. This\nis an instance of a broad family of problems including machine translation,\ndecipherment, and sentiment modification. The key challenge is to separate the\ncontent from other aspects such as style. We assume a shared latent content\ndistribution across different text corpora, and propose a method that leverages\nrefined alignment of latent representations to perform style transfer. The\ntransferred sentences from one style should match example sentences from the\nother style as a population. We demonstrate the effectiveness of this\ncross-alignment method on three tasks: sentiment modification, decipherment of\nword substitution ciphers, and recovery of word order." - pub_date: { - seconds: 1495756800 - } - authors: "Tianxiao Shen" - authors: "Tao Lei" - authors: "Regina Barzilay" - authors: "Tommi Jaakkola" - repositories: { - url: "https://github.com/jpark621/language-style-transfer" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 16 - description: "Reimplementation of NLP Style Transfer from Non-parallel Text with Adversarial Alignment (https://arxiv.org/abs/1705.09655)" - } - repositories: { - url: "https://github.com/jishavm/TextStyleTransfer" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/kyuer/language-style-transfer" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "\"Style Transfer from Non-Parallel Text by Cross-Alignment\". Tianxiao Shen, Tao Lei, Regina Barzilay, and Tommi Jaakkola. NIPS 2017." - } - repositories: { - url: "https://github.com/kaletap/language-style-transfer-pytorch" - framework: FRAMEWORK_TENSORFLOW - description: "Experiments to rewrite style transfer code from tensorflow to pytorch (not finished yet)" - } - repositories: { - url: "https://github.com/qfzhu/st" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - is_official: true - url: "https://github.com/shentianxiao/language-style-transfer" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 494 - } - repositories: { - url: "https://github.com/mariob6/style_text" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - } - repositories: { - url: "https://github.com/sy-sunmoon/Clever-Commenter-Let-s-Try-More-Apps" - framework: FRAMEWORK_PYTORCH - description: "This project contrains of the Clever Commenter: Let's Try More Apps project in Google AI ML Winter Camp. by 赶论文ing" - } - repositories: { - url: "https://github.com/WhiskyChoy/language-style-transfer" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Forked from https://github.com/shentianxiao/language-style-transfer" - } - repositories: { - url: "https://github.com/nlahlaf/Text-Style-Transfer" - framework: FRAMEWORK_TENSORFLOW - description: "Final Project for Deep Learning on Text Style Transfer" - } - } - video: { - video_id: "w-P2V2LlrHg" - video_title: "PR-059: Style Transfer from Non-Parallel Text by Cross-Alignment" - number_of_likes: 12 - number_of_views: 1050 - published_date: { - seconds: 1515977170 - } - uploader: "Suh Kiho" - } - } -} -pr_id_to_video: { - key: 60 - value: { - papers: { - paper_id: "deep-learning-based-recommender-system-a" - title: "Deep Learning based Recommender System: A Survey and New Perspectives" - arxiv_id: "1707.07435" - abstract: "With the ever-growing volume of online information, recommender systems have been an effective strategy to overcome such information overload. The utility of recommender systems cannot be overstated, given its widespread adoption in many web applications, along with its potential impact to ameliorate many problems related to over-choice. In recent years, deep learning has garnered considerable interest in many research fields such as computer vision and natural language processing, owing not only to stellar performance but also the attractive property of learning feature representations from scratch. The influence of deep learning is also pervasive, recently demonstrating its effectiveness when applied to information retrieval and recommender systems research. Evidently, the field of deep learning in recommender system is flourishing. This article aims to provide a comprehensive review of recent research efforts on deep learning based recommender systems. More concretely, we provide and devise a taxonomy of deep learning based recommendation models, along with providing a comprehensive summary of the state-of-the-art. Finally, we expand on current trends and provide new perspectives pertaining to this new exciting development of the field." - pub_date: { - seconds: 1500854400 - } - authors: "Shuai Zhang" - authors: "Lina Yao" - authors: "Aixin Sun" - authors: "Yi Tay" - repositories: { - url: "https://github.com/YichenLin/MATH-80600A-Project" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/abmitra84/recommender_system" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - } - repositories: { - url: "https://github.com/anuragreddygv323/Important-stuff" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - } - repositories: { - url: "https://github.com/ginobaltazar7/DS" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Data Science, Deep Learning, Databases" - } - repositories: { - url: "https://github.com/DreamingRaven/Nemesyst" - framework: FRAMEWORK_OTHERS - number_of_stars: 13 - description: "Generalised and highly customisable, hybrid-parallelism, database based, deep learning framework." - } - repositories: { - url: "https://github.com/ginobaltazar7/Data-Science" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Data Science, Deep Learning, Databases" - } - repositories: { - url: "https://github.com/sunhwan-lee/recommender_system" - framework: FRAMEWORK_TENSORFLOW - description: "Collection of codes and papers in the topic of recommender system" - } - } - video: { - video_id: "V6zixdCIOqw" - video_title: "PR-060: Deep Neural Networks for YouTube Recommendations" - number_of_likes: 49 - number_of_views: 4013 - published_date: { - seconds: 1516540254 - } - uploader: "keun bong Kwak" - } - } -} -pr_id_to_video: { - key: 61 - value: { - papers: { - paper_id: "understanding-deep-learning-requires" - title: "Understanding deep learning requires rethinking generalization" - arxiv_id: "1611.03530" - abstract: "Despite their massive size, successful deep artificial neural networks can\nexhibit a remarkably small difference between training and test performance.\nConventional wisdom attributes small generalization error either to properties\nof the model family, or to the regularization techniques used during training.\n Through extensive systematic experiments, we show how these traditional\napproaches fail to explain why large neural networks generalize well in\npractice. Specifically, our experiments establish that state-of-the-art\nconvolutional networks for image classification trained with stochastic\ngradient methods easily fit a random labeling of the training data. This\nphenomenon is qualitatively unaffected by explicit regularization, and occurs\neven if we replace the true images by completely unstructured random noise. We\ncorroborate these experimental findings with a theoretical construction showing\nthat simple depth two neural networks already have perfect finite sample\nexpressivity as soon as the number of parameters exceeds the number of data\npoints as it usually does in practice.\n We interpret our experimental findings by comparison with traditional models." - pub_date: { - seconds: 1478736000 - } - authors: "Chiyuan Zhang" - authors: "Samy Bengio" - authors: "Moritz Hardt" - authors: "Benjamin Recht" - authors: "Oriol Vinyals" - repositories: { - url: "https://github.com/randyshee/TensorFlow-Projects" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/iwzy7071/graph_neural_network" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/2xic/notebooks" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "machine learning and computer vision is just algorithms and data structures with more fancy properties. be real about the hype" - } - repositories: { - url: "https://github.com/pluskid/fitting-random-labels" - framework: FRAMEWORK_PYTORCH - number_of_stars: 144 - description: "Example code for the paper \"Understanding deep learning requires rethinking generalization\"" - } - repositories: { - url: "https://github.com/aaronpeikert/methods-meetup" - framework: FRAMEWORK_OTHERS - number_of_stars: 10 - description: "Meeting of some friends to discuss methods, philosophy, stats, psychology and surrounding topics." - } - repositories: { - url: "https://github.com/glouppe/info8010-deep-learning" - framework: FRAMEWORK_PYTORCH - number_of_stars: 665 - description: "Lectures for INFO8010 - Deep Learning, ULiège" - } - repositories: { - url: "https://github.com/jessemzhang/dl_spectral_normalization" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 10 - } - repositories: { - url: "https://github.com/KellyHwong/rethinking_generalization" - framework: FRAMEWORK_TENSORFLOW - description: "UNDERSTANDING DEEP LEARNING REQUIRES RETHINKING GENERALIZATION" - } - } - video: { - video_id: "UxJNG7ENRNg" - video_title: "PR-061: Understanding Deep Learning Requires Rethinking Generalization" - number_of_likes: 49 - number_of_views: 3236 - published_date: { - seconds: 1516543607 - } - uploader: "Jaejun Yoo" - } - } -} -pr_id_to_video: { - key: 62 - value: { - papers: { - paper_id: "deep-learning-a-critical-appraisal" - title: "Deep Learning: A Critical Appraisal" - arxiv_id: "1801.00631" - abstract: "Although deep learning has historical roots going back decades, neither the\nterm \"deep learning\" nor the approach was popular just over five years ago,\nwhen the field was reignited by papers such as Krizhevsky, Sutskever and\nHinton's now classic (2012) deep network model of Imagenet. What has the field\ndiscovered in the five subsequent years? Against a background of considerable\nprogress in areas such as speech recognition, image recognition, and game\nplaying, and considerable enthusiasm in the popular press, I present ten\nconcerns for deep learning, and suggest that deep learning must be supplemented\nby other techniques if we are to reach artificial general intelligence." - pub_date: { - seconds: 1514851200 - } - authors: "Gary Marcus" - repositories: { - url: "https://github.com/astoycos/Mini_Project2" - framework: FRAMEWORK_TENSORFLOW - } - } - video: { - video_id: "6hg5d10SZr0" - video_title: "PR-062: Deep Learning: A Critical Appraisal (2018)" - number_of_likes: 56 - number_of_views: 3615 - published_date: { - seconds: 1517147263 - } - uploader: "Terry TaeWoong Um" - } - } -} -pr_id_to_video: { - key: 63 - value: { - papers: { - paper_id: "peephole-predicting-network-performance" - title: "Peephole: Predicting Network Performance Before Training" - arxiv_id: "1712.03351" - abstract: "The quest for performant networks has been a significant force that drives\nthe advancements of deep learning in recent years. While rewarding, improving\nnetwork design has never been an easy journey. The large design space combined\nwith the tremendous cost required for network training poses a major obstacle\nto this endeavor. In this work, we propose a new approach to this problem,\nnamely, predicting the performance of a network before training, based on its\narchitecture. Specifically, we develop a unified way to encode individual\nlayers into vectors and bring them together to form an integrated description\nvia LSTM. Taking advantage of the recurrent network's strong expressive power,\nthis method can reliably predict the performances of various network\narchitectures. Our empirical studies showed that it not only achieved accurate\npredictions but also produced consistent rankings across datasets -- a key\ndesideratum in performance prediction." - pub_date: { - seconds: 1512777600 - } - authors: "Boyang Deng" - authors: "Junjie Yan" - authors: "Dahua Lin" - methods: { - name: "Tanh Activation" - full_name: "Tanh Activation" - description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" - } - methods: { - name: "LSTM" - full_name: "Long Short-Term Memory" - description: "An **LSTM** is a type of [recurrent neural network](https://paperswithcode.com/methods/category/recurrent-neural-networks) that addresses the vanishing gradient problem in vanilla RNNs through additional cells, input and output gates. Intuitively, vanishing gradients are solved through additional *additive* components, and forget gate activations, that allow the gradients to flow through the network without vanishing as quickly.\r\n\r\n(Image Source [here](https://medium.com/datadriveninvestor/how-do-lstm-networks-solve-the-problem-of-vanishing-gradients-a6784971a577))\r\n\r\n(Introduced by Hochreiter and Schmidhuber)" - } - methods: { - name: "Sigmoid Activation" - full_name: "Sigmoid Activation" - description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." - } - } - video: { - video_id: "ZO4bXgdcCQA" - video_title: "PR-063 Peephole: Predicting Network Performance Before Training" - number_of_likes: 5 - number_of_views: 778 - published_date: { - seconds: 1517147277 - } - uploader: "Taegyun Jeon" - } - } -} -pr_id_to_video: { - key: 64 - value: { - papers: { - paper_id: "wide-deep-learning-for-recommender-systems" - title: "Wide & Deep Learning for Recommender Systems" - arxiv_id: "1606.07792" - abstract: "Generalized linear models with nonlinear feature transformations are widely\nused for large-scale regression and classification problems with sparse inputs.\nMemorization of feature interactions through a wide set of cross-product\nfeature transformations are effective and interpretable, while generalization\nrequires more feature engineering effort. With less feature engineering, deep\nneural networks can generalize better to unseen feature combinations through\nlow-dimensional dense embeddings learned for the sparse features. However, deep\nneural networks with embeddings can over-generalize and recommend less relevant\nitems when the user-item interactions are sparse and high-rank. In this paper,\nwe present Wide & Deep learning---jointly trained wide linear models and deep\nneural networks---to combine the benefits of memorization and generalization\nfor recommender systems. We productionized and evaluated the system on Google\nPlay, a commercial mobile app store with over one billion active users and over\none million apps. Online experiment results show that Wide & Deep significantly\nincreased app acquisitions compared with wide-only and deep-only models. We\nhave also open-sourced our implementation in TensorFlow." - pub_date: { - seconds: 1466726400 - } - authors: "Heng-Tze Cheng" - authors: "Levent Koc" - authors: "Jeremiah Harmsen" - authors: "Tal Shaked" - authors: "Tushar Chandra" - authors: "Hrishi Aradhye" - authors: "Glen Anderson" - authors: "Greg Corrado" - authors: "Wei Chai" - authors: "Mustafa Ispir" - authors: "Rohan Anil" - authors: "Zakaria Haque" - authors: "Lichan Hong" - authors: "Vihan Jain" - authors: "Xiaobing Liu" - authors: "Hemal Shah" - repositories: { - url: "https://github.com/shenweichen/DeepCTR" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5043 - description: "Easy-to-use,Modular and Extendible package of deep-learning based CTR models ." - } - repositories: { - url: "https://github.com/PaddlePaddle/PaddleRec/tree/release/2.1.0/models/rank/wide_deep" - framework: FRAMEWORK_OTHERS - number_of_stars: 527 - description: "大规模推荐模型训练工具" - } - repositories: { - url: "https://github.com/fengtong-xiao/DMBGN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - description: "The implementation of the submitted paper \"Deep Multi-Behaviors Graph Network for Voucher Redemption Rate Prediction\" in SIGKDD 2021 Applied Data Science Track." - } - repositories: { - url: "https://github.com/aivolcano/RecSys_tf2" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - } - repositories: { - url: "https://github.com/NVIDIA/HugeCTR" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 432 - description: "HugeCTR is a high efficiency GPU framework designed for Click-Through-Rate (CTR) estimating training" - } - repositories: { - url: "https://github.com/jsleroux/Recommender-Systems" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/codlife/NLP" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/vinaymittal22/Income_Prediction_US" - framework: FRAMEWORK_OTHERS - description: "Adult data set solve for predict income of US population" - } - repositories: { - url: "https://github.com/yil479/yelp_review" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/sandeepnair2812/Deep-Learning-Based-Search-and-Recommendation-System" - framework: FRAMEWORK_TENSORFLOW - } - } - video: { - video_id: "hKoJPqWLrI4" - video_title: "PR-064: Wide&Deep Learning for Recommender Systems" - number_of_likes: 31 - number_of_views: 2140 - published_date: { - seconds: 1517749978 - } - uploader: "keun bong Kwak" - } - } -} -pr_id_to_video: { - key: 65 - value: { - papers: { - paper_id: "high-resolution-image-synthesis-and-semantic" - title: "High-Resolution Image Synthesis and Semantic Manipulation with Conditional GANs" - arxiv_id: "1711.11585" - abstract: "We present a new method for synthesizing high-resolution photo-realistic\nimages from semantic label maps using conditional generative adversarial\nnetworks (conditional GANs). Conditional GANs have enabled a variety of\napplications, but the results are often limited to low-resolution and still far\nfrom realistic. In this work, we generate 2048x1024 visually appealing results\nwith a novel adversarial loss, as well as new multi-scale generator and\ndiscriminator architectures. Furthermore, we extend our framework to\ninteractive visual manipulation with two additional features. First, we\nincorporate object instance segmentation information, which enables object\nmanipulations such as removing/adding objects and changing the object category.\nSecond, we propose a method to generate diverse results given the same input,\nallowing users to edit the object appearance interactively. Human opinion\nstudies demonstrate that our method significantly outperforms existing methods,\nadvancing both the quality and the resolution of deep image synthesis and\nediting." - pub_date: { - seconds: 1512000000 - } - authors: "Ting-Chun Wang" - authors: "Ming-Yu Liu" - authors: "Jun-Yan Zhu" - authors: "Andrew Tao" - authors: "Jan Kautz" - authors: "Bryan Catanzaro" - repositories: { - url: "https://github.com/JeongHyunJin/Pix2PixHD" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - } - repositories: { - url: "https://github.com/ubc-vision/DwNet" - framework: FRAMEWORK_PYTORCH - number_of_stars: 19 - } - repositories: { - url: "https://github.com/haru-256/pix2pixHD.pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/SeniorDev009/ONNX-project" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - } - repositories: { - is_official: true - url: "https://github.com/NVIDIA/pix2pixHD" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5315 - description: "Synthesizing and manipulating 2048x1024 images with conditional GANs" - } - repositories: { - url: "https://github.com/rickyHong/pix2pixHD-repl" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/edricwu/Testing-1" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/mingyuliutw/UNIT" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1757 - description: "Unsupervised Image-to-Image Translation" - } - repositories: { - url: "https://github.com/wentao99/pix2pixHD" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/LiuNull/pix2pix_Liu" - framework: FRAMEWORK_PYTORCH - } - } - video: { - video_id: "_5ofbwltEKU" - video_title: "PR-065 : High-Resolution Image Synthesis and Semantic Manipulation with Conditional GANs" - number_of_likes: 16 - number_of_views: 1891 - published_date: { - seconds: 1517753318 - } - uploader: "이광희" - } - } -} -pr_id_to_video: { - key: 66 - value: { - papers: { - paper_id: "dont-decay-the-learning-rate-increase-the" - title: "Don't Decay the Learning Rate, Increase the Batch Size" - arxiv_id: "1711.00489" - abstract: "It is common practice to decay the learning rate. Here we show one can\nusually obtain the same learning curve on both training and test sets by\ninstead increasing the batch size during training. This procedure is successful\nfor stochastic gradient descent (SGD), SGD with momentum, Nesterov momentum,\nand Adam. It reaches equivalent test accuracies after the same number of\ntraining epochs, but with fewer parameter updates, leading to greater\nparallelism and shorter training times. We can further reduce the number of\nparameter updates by increasing the learning rate $\\epsilon$ and scaling the\nbatch size $B \\propto \\epsilon$. Finally, one can increase the momentum\ncoefficient $m$ and scale $B \\propto 1/(1-m)$, although this tends to slightly\nreduce the test accuracy. Crucially, our techniques allow us to repurpose\nexisting training schedules for large batch training with no hyper-parameter\ntuning. We train ResNet-50 on ImageNet to $76.1\\%$ validation accuracy in under\n30 minutes." - pub_date: { - seconds: 1509494400 - } - authors: "Samuel L. Smith" - authors: "Pieter-Jan Kindermans" - authors: "Chris Ying" - authors: "Quoc V. Le" - repositories: { - url: "https://github.com/rbkim1990/capstone-age-estimation" - framework: FRAMEWORK_OTHERS - } - methods: { - name: "SGD" - full_name: "Stochastic Gradient Descent" - description: "**Stochastic Gradient Descent** is an iterative optimization technique that uses minibatches of data to form an expectation of the gradient, rather than the full gradient using all available data. That is for weights $w$ and a loss function $L$ we have:\r\n\r\n$$ w\\_{t+1} = w\\_{t} - \\eta\\hat{\\nabla}\\_{w}{L(w\\_{t})} $$\r\n\r\nWhere $\\eta$ is a learning rate. SGD reduces redundancy compared to batch gradient descent - which recomputes gradients for similar examples before each parameter update - so it is usually much faster.\r\n\r\n(Image Source: [here](http://rasbt.github.io/mlxtend/user_guide/general_concepts/gradient-optimization/))" - } - methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." - } - } - video: { - video_id: "jFpO-E4RPhQ" - video_title: "PR-066: Don't decay the learning rate, increase the batch size" - number_of_likes: 19 - number_of_views: 2360 - published_date: { - seconds: 1518357854 - } - uploader: "차준범" - } - } -} -pr_id_to_video: { - key: 67 - value: { - papers: { - paper_id: "audio-super-resolution-using-neural-networks" - title: "Audio Super Resolution using Neural Networks" - arxiv_id: "1708.00853" - abstract: "We introduce a new audio processing technique that increases the sampling\nrate of signals such as speech or music using deep convolutional neural\nnetworks. Our model is trained on pairs of low and high-quality audio examples;\nat test-time, it predicts missing samples within a low-resolution signal in an\ninterpolation process similar to image super-resolution. Our method is simple\nand does not involve specialized audio processing techniques; in our\nexperiments, it outperforms baselines on standard speech and music benchmarks\nat upscaling ratios of 2x, 4x, and 6x. The method has practical applications in\ntelephony, compression, and text-to-speech generation; it demonstrates the\neffectiveness of feed-forward convolutional architectures on an audio\ngeneration task." - pub_date: { - seconds: 1501632000 - } - authors: "Volodymyr Kuleshov" - authors: "S. Zayd Enam" - authors: "Stefano Ermon" - repositories: { - url: "https://github.com/johnathanchiu/audio-upsampling" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Uses CNN to upsample low-res audio files" - } - repositories: { - url: "https://github.com/kuleshov/audio-super-res" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 545 - description: "Audio super resolution using neural networks" - } - repositories: { - url: "https://github.com/Amuzak-NTL/ASR-for-Speech-Recog" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - } - repositories: { - url: "https://github.com/TrizteX/Audio-SuperRes" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Audio Super-Resolution performed on VCTK corpus" - } - } - video: { - video_id: "iqN08EPMjSs" - video_title: "PR-067: Audio Super Resolution using Neural Nets" - number_of_likes: 21 - number_of_views: 3366 - published_date: { - seconds: 1518357824 - } - uploader: "Seungil Kim" - } - } -} -pr_id_to_video: { - key: 68 - value: { - papers: { - paper_id: "deepar-probabilistic-forecasting-with" - title: "DeepAR: Probabilistic Forecasting with Autoregressive Recurrent Networks" - arxiv_id: "1704.04110" - abstract: "Probabilistic forecasting, i.e. estimating the probability distribution of a\ntime series' future given its past, is a key enabler for optimizing business\nprocesses. In retail businesses, for example, forecasting demand is crucial for\nhaving the right inventory available at the right time at the right place. In\nthis paper we propose DeepAR, a methodology for producing accurate\nprobabilistic forecasts, based on training an auto regressive recurrent network\nmodel on a large number of related time series. We demonstrate how by applying\ndeep learning techniques to forecasting, one can overcome many of the\nchallenges faced by widely-used classical approaches to the problem. We show\nthrough extensive empirical evaluation on several real-world forecasting data\nsets accuracy improvements of around 15% compared to state-of-the-art methods." - pub_date: { - seconds: 1492041600 - } - authors: "David Salinas" - authors: "Valentin Flunkert" - authors: "Jan Gasthaus" - repositories: { - url: "https://github.com/kshmawj111/solar_energy_forecast" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/jdb78/pytorch-forecasting" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1132 - description: "Time series forecasting with PyTorch" - } - repositories: { - url: "https://github.com/bingblackbean/water_supply_network_pressure_pred_deepar" - framework: FRAMEWORK_OTHERS - number_of_stars: 8 - description: "use deepar to predict water supply network pressure " - } - repositories: { - url: "https://github.com/ensembles4612/product_demand_forecast_using_DeepAR_Amazon_SageMaker" - framework: FRAMEWORK_OTHERS - description: "I built a forecast tool using DeepAR (autoregressive RNN with LSTM cells) in Sagemaker that can predict the demand of hundreds of products simultaneously." - } - repositories: { - url: "https://github.com/skp2/Electricity-Load" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "Predict Electricity load from historical time series" - } - repositories: { - url: "https://github.com/Yonder-OSS/D3M-Primitives" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/Timbasa/Sample_GluonTS" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/nuankw/Summer-Research-2018-Part-One" - framework: FRAMEWORK_PYTORCH - number_of_stars: 8 - description: "PART I DeepAR implementation based on paper: https://arxiv.org/pdf/1704.04110.pdf" - } - repositories: { - url: "https://github.com/husnejahan/DeepAR-pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 44 - } - repositories: { - url: "https://github.com/zhykoties/DeepAR" - framework: FRAMEWORK_PYTORCH - number_of_stars: 160 - description: "Implementation of deep learning models for time series in PyTorch." - } - } - video: { - video_id: "okyo61ZZivA" - video_title: "PR-068: DeepAR: Probabilistic Forecasting with Autoregressive Recurrent Networks" - number_of_likes: 23 - number_of_views: 3936 - published_date: { - seconds: 1519565309 - } - uploader: "Jiyang Kang" - } - } -} -pr_id_to_video: { - key: 69 - value: { - papers: { - paper_id: "efficient-neural-architecture-search-via-1" - title: "Efficient Neural Architecture Search via Parameter Sharing" - arxiv_id: "1802.03268" - abstract: "We propose Efficient Neural Architecture Search (ENAS), a fast and\ninexpensive approach for automatic model design. In ENAS, a controller learns\nto discover neural network architectures by searching for an optimal subgraph\nwithin a large computational graph. The controller is trained with policy\ngradient to select a subgraph that maximizes the expected reward on the\nvalidation set. Meanwhile the model corresponding to the selected subgraph is\ntrained to minimize a canonical cross entropy loss. Thanks to parameter sharing\nbetween child models, ENAS is fast: it delivers strong empirical performances\nusing much fewer GPU-hours than all existing automatic model design approaches,\nand notably, 1000x less expensive than standard Neural Architecture Search. On\nthe Penn Treebank dataset, ENAS discovers a novel architecture that achieves a\ntest perplexity of 55.8, establishing a new state-of-the-art among all methods\nwithout post-training processing. On the CIFAR-10 dataset, ENAS designs novel\narchitectures that achieve a test error of 2.89%, which is on par with NASNet\n(Zoph et al., 2018), whose test error is 2.65%." - pub_date: { - seconds: 1518134400 - } - authors: "Hieu Pham" - authors: "Melody Y. Guan" - authors: "Barret Zoph" - authors: "Quoc V. Le" - authors: "Jeff Dean" - repositories: { - url: "https://github.com/distrue/enas_tensorflow" - framework: FRAMEWORK_TENSORFLOW - description: "Implementation of Multi-Objective reward based on ENAS backbone" - } - repositories: { - url: "https://github.com/guoyongcs/NATv2" - framework: FRAMEWORK_PYTORCH - number_of_stars: 20 - description: "Implementation for NATv2." - } - repositories: { - url: "https://github.com/f51980280/ENAS-Implement" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "DeepLearning Systems and Inference Realization" - } - repositories: { - url: "https://github.com/nikitati/Nas.jl" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Programmable Neural Architecture Search" - } - repositories: { - url: "https://github.com/invisibleForce/ENAS-Pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "a pytorch implementation of ENAS " - } - repositories: { - url: "https://github.com/senthilva/Keras_functional_API_CNN" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/WillButAgain/ENAS" - framework: FRAMEWORK_PYTORCH - description: "scratch implementation of \"Efficient Neural Architecture Search via Parameter Sharing\" https://arxiv.org/pdf/1802.03268.pdf" - } - repositories: { - url: "https://github.com/melodyguan/enas" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1514 - description: "TensorFlow Code for paper \"Efficient Neural Architecture Search via Parameter Sharing\"" - } - repositories: { - url: "https://github.com/cshannonn/blackscholes_nas" - framework: FRAMEWORK_OTHERS - number_of_stars: 3 - description: "Can a neural network learn Black Scholes, yes..." - } - repositories: { - url: "https://github.com/ahundt/enas" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 12 - description: "TensorFlow code for paper \"Training Frankenstein's Creature to Stack: HyperTree Architecture Search\"" - } - methods: { - name: "Tanh Activation" - full_name: "Tanh Activation" - description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "LSTM" - full_name: "Long Short-Term Memory" - description: "An **LSTM** is a type of [recurrent neural network](https://paperswithcode.com/methods/category/recurrent-neural-networks) that addresses the vanishing gradient problem in vanilla RNNs through additional cells, input and output gates. Intuitively, vanishing gradients are solved through additional *additive* components, and forget gate activations, that allow the gradients to flow through the network without vanishing as quickly.\r\n\r\n(Image Source [here](https://medium.com/datadriveninvestor/how-do-lstm-networks-solve-the-problem-of-vanishing-gradients-a6784971a577))\r\n\r\n(Introduced by Hochreiter and Schmidhuber)" - } - methods: { - name: "Sigmoid Activation" - full_name: "Sigmoid Activation" - description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." - } - } - video: { - video_id: "fbCcJaSQPPA" - video_title: "PR-069: Efficient Neural Architecture Search via Parameter Sharing" - number_of_likes: 44 - number_of_views: 4295 - published_date: { - seconds: 1520088191 - } - uploader: "JinWon Lee" - } - } -} -pr_id_to_video: { - key: 70 - value: { - papers: { - paper_id: "safetynets-verifiable-execution-of-deep" - title: "SafetyNets: Verifiable Execution of Deep Neural Networks on an Untrusted Cloud" - arxiv_id: "1706.10268" - abstract: "Inference using deep neural networks is often outsourced to the cloud since\nit is a computationally demanding task. However, this raises a fundamental\nissue of trust. How can a client be sure that the cloud has performed inference\ncorrectly? A lazy cloud provider might use a simpler but less accurate model to\nreduce its own computational load, or worse, maliciously modify the inference\nresults sent to the client. We propose SafetyNets, a framework that enables an\nuntrusted server (the cloud) to provide a client with a short mathematical\nproof of the correctness of inference tasks that they perform on behalf of the\nclient. Specifically, SafetyNets develops and implements a specialized\ninteractive proof (IP) protocol for verifiable execution of a class of deep\nneural networks, i.e., those that can be represented as arithmetic circuits.\nOur empirical results on three- and four-layer deep neural networks demonstrate\nthe run-time costs of SafetyNets for both the client and server are low.\nSafetyNets detects any incorrect computations of the neural network by the\nuntrusted server with high probability, while achieving state-of-the-art\naccuracy on the MNIST digit recognition (99.4%) and TIMIT speech recognition\ntasks (75.22%)." - pub_date: { - seconds: 1498780800 - } - authors: "Zahra Ghodsi" - authors: "Tianyu Gu" - authors: "Siddharth Garg" - } - video: { - video_id: "CtaPFqq8P00" - video_title: "PR-070: SafetyNets: Verifiable Execution of Deep Neural Networks on an Untrusted Cloud" - number_of_likes: 2 - number_of_views: 447 - published_date: { - seconds: 1520171150 - } - uploader: "taesu" - } - } -} -pr_id_to_video: { - key: 71 - value: { - papers: { - paper_id: "categorical-reparameterization-with-gumbel" - title: "Categorical Reparameterization with Gumbel-Softmax" - arxiv_id: "1611.01144" - abstract: "Categorical variables are a natural choice for representing discrete\nstructure in the world. However, stochastic neural networks rarely use\ncategorical latent variables due to the inability to backpropagate through\nsamples. In this work, we present an efficient gradient estimator that replaces\nthe non-differentiable sample from a categorical distribution with a\ndifferentiable sample from a novel Gumbel-Softmax distribution. This\ndistribution has the essential property that it can be smoothly annealed into a\ncategorical distribution. We show that our Gumbel-Softmax estimator outperforms\nstate-of-the-art gradient estimators on structured output prediction and\nunsupervised generative modeling tasks with categorical latent variables, and\nenables large speedups on semi-supervised classification." - pub_date: { - seconds: 1478131200 - } - authors: "Eric Jang" - authors: "Shixiang Gu" - authors: "Ben Poole" - repositories: { - url: "https://github.com/karpathy/deep-vector-quantization" - framework: FRAMEWORK_PYTORCH - number_of_stars: 241 - description: "VQVAEs, GumbelSoftmaxes and friends" - } - repositories: { - url: "https://github.com/tensorflow/models/tree/master/research/rebar" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70334 - description: "Models and examples built with TensorFlow" - } - repositories: { - url: "https://github.com/Jmkernes/PAR-Transformer-XL" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 - description: "An implementation of the Pay Attention when Required transformer: https://arxiv.org/pdf/2009.04534.pdf" - } - repositories: { - url: "https://github.com/EddieCunningham/GraphLSSM" - framework: FRAMEWORK_OTHERS - number_of_stars: 5 - description: "Graphical Latent State Space Models" - } - repositories: { - url: "https://github.com/stefanthaler/gumbel-softmax-exploration" - framework: FRAMEWORK_TENSORFLOW - description: "Exploration of the Gumbel Softmax Paper https://arxiv.org/pdf/1611.01144.pdf" - } - repositories: { - url: "https://github.com/kampta/pytorch-distributions" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "Basic VAE flow using pytorch distributions" - } - repositories: { - url: "https://github.com/OlivierAlgoet/Tensorflow2-GMM" - framework: FRAMEWORK_TENSORFLOW - description: "Gaussian mixture model" - } - repositories: { - url: "https://github.com/tensorflow/models" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70333 - description: "Models and examples built with TensorFlow" - } - repositories: { - url: "https://github.com/GuyLor/direct_vae" - framework: FRAMEWORK_PYTORCH - number_of_stars: 12 - description: "Implementation of the paper \"Direct Optimization through argmax for discrete Variational Auto-Encoder\"" - } - repositories: { - url: "https://github.com/crowdflowTUe/stampnet" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 - description: "code for \"StampNet: unsupervised multi-class object discovery\" by Visser, Corbetta, Menkovski and Toschi (https://arxiv.org/abs/1902.02693)" - } - methods: { - name: "Gumbel Softmax" - full_name: "Gumbel Softmax" - description: "**Gumbel-Softmax** is a continuous distribution that has the property that it can be smoothly annealed into a categorical distribution, and whose parameter gradients can be easily computed via the reparameterization trick." - } - } - video: { - video_id: "ty3SciyoIyk" - video_title: "PR-071: Categorical Reparameterization with Gumbel Softmax" - number_of_likes: 41 - number_of_views: 4262 - published_date: { - seconds: 1520172922 - } - uploader: "Jaejun Yoo" - } - } -} -pr_id_to_video: { - key: 72 - value: { - papers: { - paper_id: "deep-compression-compressing-deep-neural" - title: "Deep Compression: Compressing Deep Neural Networks with Pruning, Trained Quantization and Huffman Coding" - arxiv_id: "1510.00149" - abstract: "Neural networks are both computationally intensive and memory intensive,\nmaking them difficult to deploy on embedded systems with limited hardware\nresources. To address this limitation, we introduce \"deep compression\", a three\nstage pipeline: pruning, trained quantization and Huffman coding, that work\ntogether to reduce the storage requirement of neural networks by 35x to 49x\nwithout affecting their accuracy. Our method first prunes the network by\nlearning only the important connections. Next, we quantize the weights to\nenforce weight sharing, finally, we apply Huffman coding. After the first two\nsteps we retrain the network to fine tune the remaining connections and the\nquantized centroids. Pruning, reduces the number of connections by 9x to 13x;\nQuantization then reduces the number of bits that represent each connection\nfrom 32 to 5. On the ImageNet dataset, our method reduced the storage required\nby AlexNet by 35x, from 240MB to 6.9MB, without loss of accuracy. Our method\nreduced the size of VGG-16 by 49x from 552MB to 11.3MB, again with no loss of\naccuracy. This allows fitting the model into on-chip SRAM cache rather than\noff-chip DRAM memory. Our compression method also facilitates the use of\ncomplex neural networks in mobile applications where application size and\ndownload bandwidth are constrained. Benchmarked on CPU, GPU and mobile GPU,\ncompressed network has 3x to 4x layerwise speedup and 3x to 7x better energy\nefficiency." - pub_date: { - seconds: 1443657600 - } - authors: "Song Han" - authors: "Huizi Mao" - authors: "William J. Dally" - repositories: { - url: "https://github.com/songhan/Deep-Compression-AlexNet" - framework: FRAMEWORK_OTHERS - number_of_stars: 571 - description: "Deep Compression on AlexNet" - } - repositories: { - url: "https://github.com/heguixiang/caffe_deep_compression" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/may0324/DeepCompression-caffe" - framework: FRAMEWORK_OTHERS - number_of_stars: 234 - description: "Caffe for Deep Compression" - } - repositories: { - url: "https://github.com/bemova/Deep-Compression-Compressing-Deep-Neural-Networks-with-Pruning-Trained-Quantization-and-Huffman" - framework: FRAMEWORK_PYTORCH - number_of_stars: 14 - description: "It is a pytorch implementation of https://arxiv.org/abs/1510.00149 paper." - } - repositories: { - url: "https://github.com/songhan/SqueezeNet-Deep-Compression" - framework: FRAMEWORK_OTHERS - number_of_stars: 398 - } - repositories: { - url: "https://github.com/isha-garg/Deep_Compression" - framework: FRAMEWORK_OTHERS - number_of_stars: 8 - description: "Recreated deep compression's pruning, quantization and huffman encoding pipeline" - } - repositories: { - url: "https://github.com/KarenUllrich/Tutorial_BayesianCompressionForDL" - framework: FRAMEWORK_PYTORCH - number_of_stars: 183 - description: "A tutorial on \"Bayesian Compression for Deep Learning\" published at NIPS (2017)." - } - repositories: { - url: "https://github.com/cambridge-mlg/variational-shannon-coding" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 15 - description: "This repository contains the code for our recent paper `Minimal Random Code Learning: Getting Bits Back from Compressed Model Parameters'" - } - repositories: { - url: "https://github.com/cambridge-mlg/miracle" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 15 - description: "This repository contains the code for our recent paper `Minimal Random Code Learning: Getting Bits Back from Compressed Model Parameters'" - } - repositories: { - url: "https://github.com/lovepan1/caffe_ssd_traffic" - framework: FRAMEWORK_OTHERS - description: " Updated a minute ago used ssd by caffe in transportation object detection , included car bus minbus persopn minibus bicycle." - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "AlexNet" - full_name: "AlexNet" - description: "**AlexNet** is a classic convolutional neural network architecture. It consists of convolutions, max pooling and dense layers as the basic building blocks. Grouped convolutions are used in order to fit the model across two GPUs." - } - methods: { - name: "VGG-16" - full_name: "VGG-16" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "Local Response Normalization" - full_name: "Local Response Normalization" - description: "**Local Response Normalization** is a normalization layer that implements the idea of lateral inhibition. Lateral inhibition is a concept in neurobiology that refers to the phenomenon of an excited neuron inhibiting its neighbours: this leads to a peak in the form of a local maximum, creating contrast in that area and increasing sensory perception. In practice, we can either normalize within the same channel or normalize across channels when we apply LRN to convolutional neural networks.\r\n\r\n$$ b_{c} = a_{c}\\left(k + \\frac{\\alpha}{n}\\sum_{c'=\\max(0, c-n/2)}^{\\min(N-1,c+n/2)}a_{c'}^2\\right)^{-\\beta} $$\r\n\r\nWhere the size is the number of neighbouring channels used for normalization, $\\alpha$ is multiplicative factor, $\\beta$ an exponent and $k$ an additive factor" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Max Pooling" - full_name: "Max Pooling" - description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - methods: { - name: "Dropout" - full_name: "Dropout" - description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." - } - } - video: { - video_id: "9mFZmpIbMDs" - video_title: "PR-072: Deep Compression" - number_of_likes: 27 - number_of_views: 2163 - published_date: { - seconds: 1520777304 - } - uploader: "Taeoh Kim" - } - } -} -pr_id_to_video: { - key: 73 - value: { - papers: { - paper_id: "generative-semantic-manipulation-with" - title: "Generative Semantic Manipulation with Contrasting GAN" - arxiv_id: "1708.00315" - abstract: "Generative Adversarial Networks (GANs) have recently achieved significant\nimprovement on paired/unpaired image-to-image translation, such as\nphoto$\\rightarrow$ sketch and artist painting style transfer. However, existing\nmodels can only be capable of transferring the low-level information (e.g.\ncolor or texture changes), but fail to edit high-level semantic meanings (e.g.,\ngeometric structure or content) of objects. On the other hand, while some\nresearches can synthesize compelling real-world images given a class label or\ncaption, they cannot condition on arbitrary shapes or structures, which largely\nlimits their application scenarios and interpretive capability of model\nresults. In this work, we focus on a more challenging semantic manipulation\ntask, which aims to modify the semantic meaning of an object while preserving\nits own characteristics (e.g. viewpoints and shapes), such as\ncow$\\rightarrow$sheep, motor$\\rightarrow$ bicycle, cat$\\rightarrow$dog. To\ntackle such large semantic changes, we introduce a contrasting GAN\n(contrast-GAN) with a novel adversarial contrasting objective. Instead of\ndirectly making the synthesized samples close to target data as previous GANs\ndid, our adversarial contrasting objective optimizes over the distance\ncomparisons between samples, that is, enforcing the manipulated data be\nsemantically closer to the real data with target category than the input data.\nEquipped with the new contrasting objective, a novel mask-conditional\ncontrast-GAN architecture is proposed to enable disentangle image background\nwith object semantic changes. Experiments on several semantic manipulation\ntasks on ImageNet and MSCOCO dataset show considerable performance gain by our\ncontrast-GAN over other conditional GANs. Quantitative results further\ndemonstrate the superiority of our model on generating manipulated results with\nhigh visual fidelity and reasonable object semantics." - pub_date: { - seconds: 1501545600 - } - authors: "Xiaodan Liang" - authors: "Hao Zhang" - authors: "Eric P. Xing" - } - video: { - video_id: "U8IpNf1b57w" - video_title: "PR-073: Generative Semantic Manipulation with Contrasting GAN" - number_of_likes: 4 - number_of_views: 757 - published_date: { - seconds: 1520778031 - } - uploader: "이광희" - } - } -} -pr_id_to_video: { - key: 74 - value: { - papers: { - paper_id: "obamanet-photo-realistic-lip-sync-from-text" - title: "ObamaNet: Photo-realistic lip-sync from text" - arxiv_id: "1801.01442" - abstract: "We present ObamaNet, the first architecture that generates both audio and\nsynchronized photo-realistic lip-sync videos from any new text. Contrary to\nother published lip-sync approaches, ours is only composed of fully trainable\nneural modules and does not rely on any traditional computer graphics methods.\nMore precisely, we use three main modules: a text-to-speech network based on\nChar2Wav, a time-delayed LSTM to generate mouth-keypoints synced to the audio,\nand a network based on Pix2Pix to generate the video frames conditioned on the\nkeypoints." - pub_date: { - seconds: 1512518400 - } - authors: "Rithesh Kumar" - authors: "Jose Sotelo" - authors: "Kundan Kumar" - authors: "Alexandre de Brebisson" - authors: "Yoshua Bengio" - repositories: { - url: "https://github.com/ung200/thats-what-obama-said" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 18 - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - methods: { - name: "Dropout" - full_name: "Dropout" - description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." - } - methods: { - name: "Batch Normalization" - full_name: "Batch Normalization" - description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." - } - methods: { - name: "Tanh Activation" - full_name: "Tanh Activation" - description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" - } - methods: { - name: "Pix2Pix" - full_name: "Pix2Pix" - description: "**Pix2Pix** is a conditional image-to-image translation architecture that uses a conditional GAN objective combined with a reconstruction loss. The conditional GAN objective for observed images $x$, output images $y$ and the random noise vector $z$ is:\r\n\r\n$$ \\mathcal{L}\\_{cGAN}\\left(G, D\\right) =\\mathbb{E}\\_{x,y}\\left[\\log D\\left(x, y\\right)\\right]+\r\n\\mathbb{E}\\_{x,z}\\left[log(1 − D\\left(x, G\\left(x, z\\right)\\right)\\right] $$\r\n\r\nWe augment this with a reconstruction term:\r\n\r\n$$ \\mathcal{L}\\_{L1}\\left(G\\right) = \\mathbb{E}\\_{x,y,z}\\left[||y - G\\left(x, z\\right)||\\_{1}\\right] $$\r\n\r\nand we get the final objective as:\r\n\r\n$$ G^{*} = \\arg\\min\\_{G}\\max\\_{D}\\mathcal{L}\\_{cGAN}\\left(G, D\\right) + \\lambda\\mathcal{L}\\_{L1}\\left(G\\right) $$\r\n\r\nThe architectures employed for the generator and discriminator closely follow [DCGAN](https://paperswithcode.com/method/dcgan), with a few modifications:\r\n\r\n- Concatenated skip connections are used to \"shuttle\" low-level information between the input and output, similar to a [U-Net](https://paperswithcode.com/method/u-net).\r\n- The use of a PatchGAN discriminator that only penalizes structure at the scale of patches." - } - methods: { - name: "LSTM" - full_name: "Long Short-Term Memory" - description: "An **LSTM** is a type of [recurrent neural network](https://paperswithcode.com/methods/category/recurrent-neural-networks) that addresses the vanishing gradient problem in vanilla RNNs through additional cells, input and output gates. Intuitively, vanishing gradients are solved through additional *additive* components, and forget gate activations, that allow the gradients to flow through the network without vanishing as quickly.\r\n\r\n(Image Source [here](https://medium.com/datadriveninvestor/how-do-lstm-networks-solve-the-problem-of-vanishing-gradients-a6784971a577))\r\n\r\n(Introduced by Hochreiter and Schmidhuber)" - } - methods: { - name: "Sigmoid Activation" - full_name: "Sigmoid Activation" - description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." - } - methods: { - name: "Leaky ReLU" - full_name: "Leaky ReLU" - description: "**Leaky Rectified Linear Unit**, or **Leaky ReLU**, is a type of activation function based on a [ReLU](https://paperswithcode.com/method/relu), but it has a small slope for negative values instead of a flat slope. The slope coefficient is determined before training, i.e. it is not learnt during training. This type of activation function is popular in tasks where we we may suffer from sparse gradients, for example training generative adversarial networks." - } - methods: { - name: "PatchGAN" - full_name: "PatchGAN" - description: "**PatchGAN** is a type of discriminator for generative adversarial networks which only penalizes structure at the scale of local image patches. The PatchGAN discriminator tries to classify if each $N \\times N$ patch in an image is real or fake. This discriminator is run convolutionally across the image, averaging all responses to provide the ultimate output of $D$. Such a discriminator effectively models the image as a Markov random field, assuming independence between pixels separated by more than a patch diameter. It can be understood as a type of texture/style loss." - } - methods: { - name: "Concatenated Skip Connection" - full_name: "Concatenated Skip Connection" - description: "A **Concatenated Skip Connection** is a type of skip connection that seeks to reuse features by concatenating them to new layers, allowing more information to be retained from previous layers of the network. This contrasts with say, residual connections, where element-wise summation is used instead to incorporate information from previous layers. This type of skip connection is prominently used in DenseNets (and also Inception networks), which the Figure to the right illustrates." - } - } - video: { - video_id: "A1o6SUsWd98" - video_title: "PR-074: ObamaNet: Photo-realistic lip-sync from text" - number_of_views: 2008 - published_date: { - seconds: 1521381942 - } - uploader: "Byung-Hak Kim" - } - } -} -pr_id_to_video: { - key: 75 - value: { - papers: { - paper_id: "on-calibration-of-modern-neural-networks" - title: "On Calibration of Modern Neural Networks" - arxiv_id: "1706.04599" - abstract: "Confidence calibration -- the problem of predicting probability estimates\nrepresentative of the true correctness likelihood -- is important for\nclassification models in many applications. We discover that modern neural\nnetworks, unlike those from a decade ago, are poorly calibrated. Through\nextensive experiments, we observe that depth, width, weight decay, and Batch\nNormalization are important factors influencing calibration. We evaluate the\nperformance of various post-processing calibration methods on state-of-the-art\narchitectures with image and document classification datasets. Our analysis and\nexperiments not only offer insights into neural network learning, but also\nprovide a simple and straightforward recipe for practical settings: on most\ndatasets, temperature scaling -- a single-parameter variant of Platt Scaling --\nis surprisingly effective at calibrating predictions." - pub_date: { - seconds: 1497398400 - } - authors: "Chuan Guo" - authors: "Geoff Pleiss" - authors: "Yu Sun" - authors: "Kilian Q. Weinberger" - repositories: { - url: "https://github.com/sleep3r/garrus" - framework: FRAMEWORK_OTHERS - number_of_stars: 13 - description: "Python framework for high quality confidence estimation of deep neural networks, providing methods such as confidence calibration and ordinal ranking" - } - repositories: { - url: "https://github.com/bayesgroup/pytorch-ensembles" - framework: FRAMEWORK_PYTORCH - number_of_stars: 140 - description: "Pitfalls of In-Domain Uncertainty Estimation and Ensembling in Deep Learning, ICLR 2020" - } - repositories: { - url: "https://github.com/artnitolog/diary" - framework: FRAMEWORK_OTHERS - description: "Accompanying repository for the 3rd year corsework. CMC MSU, MMF, 2020-2021." - } - repositories: { - url: "https://github.com/johntd54/stanford_car" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - description: "Classification model for fine-grained visual classification on the Stanford Car dataset." - } - repositories: { - is_official: true - url: "https://github.com/gpleiss/temperature_scaling" - framework: FRAMEWORK_PYTORCH - number_of_stars: 550 - description: "A simple way to calibrate your neural network." - } - repositories: { - url: "https://github.com/AnanyaKumar/verified_calibration" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 58 - description: "Calibration library and code for the paper: Verified Uncertainty Calibration. Ananya Kumar, Percy Liang, Tengyu Ma. NeurIPS 2019 (Spotlight)." - } - repositories: { - url: "https://github.com/Andreas12321/Est-Cert-Final" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/Jonathan-Pearce/calibration_library" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "Pytorch library for model calibration metrics and visualizations as well as recalibration methods. In progress!" - } - repositories: { - url: "https://github.com/Eric-Wallace/deep-knn" - framework: FRAMEWORK_PYTORCH - number_of_stars: 32 - description: "Code for the 2018 EMNLP Interpretability Workshop Paper \"Interpreting Neural Networks with Nearest Neighbors\"" - } - repositories: { - url: "https://github.com/Jonathan-Pearce/cnn_calibration" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "Pytorch library for model calibration metrics and visualizations as well as recalibration methods. In progress!" - } - } - video: { - video_id: "odNHEkfJAc4" - video_title: "PR-075: On Calibration of Modern Neural Networks (2017)" - number_of_likes: 27 - number_of_views: 2882 - published_date: { - seconds: 1521987100 - } - uploader: "Terry TaeWoong Um" - } - } -} -pr_id_to_video: { - key: 76 - value: { - papers: { - paper_id: "distributed-representations-of-sentences-and" - title: "Distributed Representations of Sentences and Documents" - arxiv_id: "1405.4053" - abstract: "Many machine learning algorithms require the input to be represented as a\nfixed-length feature vector. When it comes to texts, one of the most common\nfixed-length features is bag-of-words. Despite their popularity, bag-of-words\nfeatures have two major weaknesses: they lose the ordering of the words and\nthey also ignore semantics of the words. For example, \"powerful,\" \"strong\" and\n\"Paris\" are equally distant. In this paper, we propose Paragraph Vector, an\nunsupervised algorithm that learns fixed-length feature representations from\nvariable-length pieces of texts, such as sentences, paragraphs, and documents.\nOur algorithm represents each document by a dense vector which is trained to\npredict words in the document. Its construction gives our algorithm the\npotential to overcome the weaknesses of bag-of-words models. Empirical results\nshow that Paragraph Vectors outperform bag-of-words models as well as other\ntechniques for text representations. Finally, we achieve new state-of-the-art\nresults on several text classification and sentiment analysis tasks." - pub_date: { - seconds: 1400198400 - } - authors: "Quoc V. Le" - authors: "Tomas Mikolov" - repositories: { - url: "https://github.com/Antonildo43/Classifica-o-de-textos-com-doc2Vec" - framework: FRAMEWORK_OTHERS - description: "Classificação de Documentos com doc2Vec" - } - repositories: { - url: "https://github.com/jimmy6727/Informd" - framework: FRAMEWORK_TENSORFLOW - description: "Project repo for Mozilla Spring Incubator Lab 2020 Project " - } - repositories: { - url: "https://github.com/wiflore/IBM_Articles_Recomender" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/hithisisdhara/doc2vec" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/kr900910/supreme_court_opinion" - framework: FRAMEWORK_TENSORFLOW - description: "Predicting party of author for different supreme court opinions based on natural language features" - } - repositories: { - url: "https://github.com/dhyeon/ingredient-vectors" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/kinimod23/NMT_Project" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/ibrahimsharaf/doc2vec" - framework: FRAMEWORK_OTHERS - number_of_stars: 93 - description: ":notebook: Long(er) text representation and classification using Doc2Vec embeddings" - } - repositories: { - url: "https://github.com/tsandefer/capstone_2" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5 - description: "Doc2Vec and Annotated Lyrics: Are they \"Genius\"? (DSI Capstone II Project)" - } - repositories: { - url: "https://github.com/bombdiggity/paper-bag" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - } - } - video: { - video_id: "NxKpgY6sWOQ" - video_title: "PR-076: Distributed Representations of Sentences and Documents" - number_of_likes: 18 - number_of_views: 1744 - published_date: { - seconds: 1522587607 - } - uploader: "keun bong Kwak" - } - } -} -pr_id_to_video: { - key: 77 - value: { - papers: { - paper_id: "seqgan-sequence-generative-adversarial-nets" - title: "SeqGAN: Sequence Generative Adversarial Nets with Policy Gradient" - arxiv_id: "1609.05473" - abstract: "As a new way of training generative models, Generative Adversarial Nets (GAN)\nthat uses a discriminative model to guide the training of the generative model\nhas enjoyed considerable success in generating real-valued data. However, it\nhas limitations when the goal is for generating sequences of discrete tokens. A\nmajor reason lies in that the discrete outputs from the generative model make\nit difficult to pass the gradient update from the discriminative model to the\ngenerative model. Also, the discriminative model can only assess a complete\nsequence, while for a partially generated sequence, it is non-trivial to\nbalance its current score and the future one once the entire sequence has been\ngenerated. In this paper, we propose a sequence generation framework, called\nSeqGAN, to solve the problems. Modeling the data generator as a stochastic\npolicy in reinforcement learning (RL), SeqGAN bypasses the generator\ndifferentiation problem by directly performing gradient policy update. The RL\nreward signal comes from the GAN discriminator judged on a complete sequence,\nand is passed back to the intermediate state-action steps using Monte Carlo\nsearch. Extensive experiments on synthetic data and real-world tasks\ndemonstrate significant improvements over strong baselines." - pub_date: { - seconds: 1474156800 - } - authors: "Lantao Yu" - authors: "Weinan Zhang" - authors: "Jun Wang" - authors: "Yong Yu" - repositories: { - url: "https://github.com/lina2360/HiSeqGan" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/willspag/SeqGan" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Attempt at Tensorflow 2.3 version of Sequence Gan" - } - repositories: { - url: "https://github.com/desire2020/RankGAN" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 24 - description: "Implementation of Adversarial Ranking for Language Generation [ArxiV 1705.11001]" - } - repositories: { - url: "https://github.com/medtray/SeqGAN-vs-MLE-vs-PG-BLEU-vs-ScheduleSampling" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/chaneeh/SeqGAN_experiment" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/LiangqunLu/DLForChatbot" - framework: FRAMEWORK_OTHERS - description: "Deep learning for chatbot" - } - repositories: { - url: "https://github.com/yuanfeisiyuetian/seqgan-modbusTCP" - framework: FRAMEWORK_TENSORFLOW - description: "使用seqgan进行ModbusTCP协议的模糊测试" - } - repositories: { - is_official: true - url: "https://github.com/LantaoYu/SeqGAN" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1915 - description: "Implementation of Sequence Generative Adversarial Nets with Policy Gradient" - } - repositories: { - url: "https://github.com/suhoy901/SeqGAN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "pytorch SeqGAN" - } - repositories: { - url: "https://github.com/bgenchel/MusicalSeqGAN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Adapt and evaluate SeqGAN for music. Developed in PyTorch, using https://github.com/ZiJianZhao/SeqGAN-PyTorch as a base" - } - methods: { - name: "GAN" - full_name: "Generative Adversarial Network" - description: "A **GAN**, or **Generative Adversarial Network**, is a generative model that simultaneously trains\r\ntwo models: a generative model $G$ that captures the data distribution, and a discriminative model $D$ that estimates the\r\nprobability that a sample came from the training data rather than $G$.\r\n\r\nThe training procedure for $G$ is to maximize the probability of $D$ making\r\na mistake. This framework corresponds to a minimax two-player game. In the\r\nspace of arbitrary functions $G$ and $D$, a unique solution exists, with $G$\r\nrecovering the training data distribution and $D$ equal to $\\frac{1}{2}$\r\neverywhere. In the case where $G$ and $D$ are defined by multilayer perceptrons,\r\nthe entire system can be trained with backpropagation. \r\n\r\n(Image Source: [here](http://www.kdnuggets.com/2017/01/generative-adversarial-networks-hot-topic-machine-learning.html))" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - } - video: { - video_id: "BXODIP3QjJI" - video_title: "PR-077: SeqGAN: Sequence Generative Adversarial Nets with Policy Gradient" - number_of_likes: 15 - number_of_views: 2084 - published_date: { - seconds: 1523239176 - } - uploader: "Suh Kiho" - } - } -} -pr_id_to_video: { - key: 78 - value: { - papers: { - paper_id: "net2net-accelerating-learning-via-knowledge" - title: "Net2Net: Accelerating Learning via Knowledge Transfer" - arxiv_id: "1511.05641" - abstract: "We introduce techniques for rapidly transferring the information stored in\none neural net into another neural net. The main purpose is to accelerate the\ntraining of a significantly larger neural net. During real-world workflows, one\noften trains very many different neural networks during the experimentation and\ndesign process. This is a wasteful process in which each new model is trained\nfrom scratch. Our Net2Net technique accelerates the experimentation process by\ninstantaneously transferring the knowledge from a previous network to each new\ndeeper or wider network. Our techniques are based on the concept of\nfunction-preserving transformations between neural network specifications. This\ndiffers from previous approaches to pre-training that altered the function\nrepresented by a neural net when adding layers to it. Using our knowledge\ntransfer mechanism to add depth to Inception modules, we demonstrate a new\nstate of the art accuracy rating on the ImageNet dataset." - pub_date: { - seconds: 1447804800 - } - authors: "Tianqi Chen" - authors: "Ian Goodfellow" - authors: "Jonathon Shlens" - repositories: { - url: "https://github.com/hxtruong/net2net" - framework: FRAMEWORK_TENSORFLOW - description: "Library to increasing size of model. Wider and Deeper any layer of model." - } - repositories: { - url: "https://github.com/agongt408/vbranch" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "vbranch is a deep-learning framework to improve the accuracy and efficiency of neural networks by sharing parameters between multiple branches." - } - repositories: { - url: "https://github.com/soumith/net2net.torch" - framework: FRAMEWORK_OTHERS - number_of_stars: 153 - description: "Implementation of http://arxiv.org/abs/1511.05641 that lets one build a larger net starting from a smaller one." - } - } - video: { - video_id: "btsZOMsyH_o" - video_title: "PR-078: Net2Net: Accelerating Learning via Knowledge Transfer" - number_of_likes: 14 - number_of_views: 1001 - published_date: { - seconds: 1523878774 - } - uploader: "차준범" - } - } -} -pr_id_to_video: { - key: 79 - value: { - papers: { - paper_id: "adversarial-audio-synthesis" - title: "Adversarial Audio Synthesis" - arxiv_id: "1802.04208" - abstract: "Audio signals are sampled at high temporal resolutions, and learning to\nsynthesize audio requires capturing structure across a range of timescales.\nGenerative adversarial networks (GANs) have seen wide success at generating\nimages that are both locally and globally coherent, but they have seen little\napplication to audio generation. In this paper we introduce WaveGAN, a first\nattempt at applying GANs to unsupervised synthesis of raw-waveform audio.\nWaveGAN is capable of synthesizing one second slices of audio waveforms with\nglobal coherence, suitable for sound effect generation. Our experiments\ndemonstrate that, without labels, WaveGAN learns to produce intelligible words\nwhen trained on a small-vocabulary speech dataset, and can also synthesize\naudio from other domains such as drums, bird vocalizations, and piano. We\ncompare WaveGAN to a method which applies GANs designed for image generation on\nimage-like audio feature representations, finding both approaches to be\npromising." - pub_date: { - seconds: 1518393600 - } - authors: "Chris Donahue" - authors: "Julian McAuley" - authors: "Miller Puckette" - repositories: { - url: "https://github.com/zassou65535/WaveGAN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "WaveGANによる音声生成器" - } - repositories: { - url: "https://github.com/mahotani/ADVERSARIAL-AUDIO-SYNTHESIS" - framework: FRAMEWORK_OTHERS - description: "ICLR2019で採択されたADVERSARIAL AUDIO SYNTHESISを読んだメモ的なもの" - } - repositories: { - url: "https://github.com/MaxHolmberg96/WaveGAN" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Implementation of the paper https://arxiv.org/pdf/1802.04208.pdf" - } - repositories: { - url: "https://github.com/Yotsuyubi/wave-nr-gan" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/fromme0528/pytorch-WaveGAN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "A pytorch implementation of WaveGAN" - } - repositories: { - url: "https://github.com/MurreyCode/wavegan" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 - description: "WaveGAN: using GANs to synthesize raw audio" - } - repositories: { - url: "https://github.com/IBM/MAX-Audio-Sample-Generator" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 10 - description: "Generate short audio clips of speech commands and lo-fi instrumental samples" - } - repositories: { - url: "https://github.com/LEChaney/AudioStyleGAN" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/ShaunBarry/wavegan" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/mostafaelaraby/wavegan-pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 43 - description: "Pytorch Implementation of wavegan model to generate audio " - } - methods: { - name: "Leaky ReLU" - full_name: "Leaky ReLU" - description: "**Leaky Rectified Linear Unit**, or **Leaky ReLU**, is a type of activation function based on a [ReLU](https://paperswithcode.com/method/relu), but it has a small slope for negative values instead of a flat slope. The slope coefficient is determined before training, i.e. it is not learnt during training. This type of activation function is popular in tasks where we we may suffer from sparse gradients, for example training generative adversarial networks." - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "DCGAN" - full_name: "Deep Convolutional GAN" - description: "**DCGAN**, or **Deep Convolutional GAN**, is a generative adversarial network architecture. It uses a couple of guidelines, in particular:\r\n\r\n- Replacing any pooling layers with strided convolutions (discriminator) and fractional-strided convolutions (generator).\r\n- Using batchnorm in both the generator and the discriminator.\r\n- Removing fully connected hidden layers for deeper architectures.\r\n- Using ReLU activation in generator for all layers except for the output, which uses tanh.\r\n- Using LeakyReLU activation in the discriminator for all layer." - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "WGAN-GP Loss" - full_name: "WGAN-GP Loss" - description: "**Wasserstein Gradient Penalty Loss**, or **WGAN-GP Loss**, is a loss used for generative adversarial networks that augments the Wasserstein loss with a gradient norm penalty for random samples $\\mathbf{\\hat{x}} \\sim \\mathbb{P}\\_{\\hat{\\mathbf{x}}}$ to achieve Lipschitz continuity:\r\n\r\n$$ L = \\mathbb{E}\\_{\\mathbf{\\hat{x}} \\sim \\mathbb{P}\\_{g}}\\left[D\\left(\\tilde{\\mathbf{x}}\\right)\\right] - \\mathbb{E}\\_{\\mathbf{x} \\sim \\mathbb{P}\\_{r}}\\left[D\\left(\\mathbf{x}\\right)\\right] + \\lambda\\mathbb{E}\\_{\\mathbf{\\hat{x}} \\sim \\mathbb{P}\\_{\\hat{\\mathbf{x}}}}\\left[\\left(||\\nabla\\_{\\tilde{\\mathbf{x}}}D\\left(\\mathbf{\\tilde{x}}\\right)||\\_{2}-1\\right)^{2}\\right]$$\r\n\r\nIt was introduced as part of the [WGAN-GP](https://paperswithcode.com/method/wgan-gp) overall model." - } - methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - methods: { - name: "Dropout" - full_name: "Dropout" - description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." - } - methods: { - name: "SpecGAN" - full_name: "SpecGAN" - description: "**SpecGAN** is a generative adversarial network method for spectrogram-based, frequency-domain audio generation. The problem is suited for GANs designed for image generation. The model can be approximately inverted. \r\n\r\nTo process audio into suitable spectrograms, the authors perform the short-time Fourier transform with 16 ms windows and 8ms stride, resulting in 128 frequency bins, linearly spaced from 0 to 8 kHz. They take the magnitude of the resultant spectra and scale amplitude values logarithmically to better-align with human perception. They then normalize each frequency bin to have zero mean and unit variance. They clip the spectra to $3$ standard deviations and rescale to $\\left[−1, 1\\right]$.\r\n\r\nThey then use the DCGAN approach on the result spectra." - } - methods: { - name: "Batch Normalization" - full_name: "Batch Normalization" - description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." - } - } - video: { - video_id: "UXVKSSXdwb8" - video_title: "PR-079: Synthesizing Audio with Generative Adversarial Networks" - number_of_likes: 20 - number_of_views: 1303 - published_date: { - seconds: 1523206394 - } - uploader: "Seungil Kim" - } - } -} -pr_id_to_video: { - key: 80 - value: { - papers: { - paper_id: "practical-bayesian-optimization-of-machine" - title: "Practical Bayesian Optimization of Machine Learning Algorithms" - arxiv_id: "1206.2944" - abstract: "Machine learning algorithms frequently require careful tuning of model\nhyperparameters, regularization terms, and optimization parameters.\nUnfortunately, this tuning is often a \"black art\" that requires expert\nexperience, unwritten rules of thumb, or sometimes brute-force search. Much\nmore appealing is the idea of developing automatic approaches which can\noptimize the performance of a given learning algorithm to the task at hand. In\nthis work, we consider the automatic tuning problem within the framework of\nBayesian optimization, in which a learning algorithm's generalization\nperformance is modeled as a sample from a Gaussian process (GP). The tractable\nposterior distribution induced by the GP leads to efficient use of the\ninformation gathered by previous experiments, enabling optimal choices about\nwhat parameters to try next. Here we show how the effects of the Gaussian\nprocess prior and the associated inference procedure can have a large impact on\nthe success or failure of Bayesian optimization. We show that thoughtful\nchoices can lead to results that exceed expert-level performance in tuning\nmachine learning algorithms. We also describe new algorithms that take into\naccount the variable cost (duration) of learning experiments and that can\nleverage the presence of multiple cores for parallel experimentation. We show\nthat these proposed algorithms improve on previous automatic procedures and can\nreach or surpass human expert-level optimization on a diverse set of\ncontemporary algorithms including latent Dirichlet allocation, structured SVMs\nand convolutional neural networks." - pub_date: { - seconds: 1339545600 - } - authors: "Jasper Snoek" - authors: "Hugo Larochelle" - authors: "Ryan P. Adams" - repositories: { - url: "https://github.com/c-bata/goptuna" - framework: FRAMEWORK_OTHERS - number_of_stars: 180 - description: "A hyperparameter optimization framework, inspired by Optuna." - } - repositories: { - url: "https://github.com/JasperSnoek/spearmint" - framework: FRAMEWORK_OTHERS - number_of_stars: 1362 - description: "Spearmint is a package to perform Bayesian optimization according to the algorithms outlined in the paper: Practical Bayesian Optimization of Machine Learning Algorithms. Jasper Snoek, Hugo Larochelle and Ryan P. Adams. Advances in Neural Information Processing Systems, 2012 " - } - repositories: { - url: "https://github.com/HIPS/Spearmint" - framework: FRAMEWORK_OTHERS - number_of_stars: 1430 - description: "Spearmint Bayesian optimization codebase" - } - repositories: { - url: "https://github.com/Argaadya/intro-bayesian" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "This is an introduction for Bayesian optimization" - } - methods: { - name: "Gaussian Process" - full_name: "Gaussian Process" - description: "**Gaussian Processes** are non-parametric models for approximating functions. They rely upon a measure of similarity between points (the kernel function) to predict the value for an unseen point from training data. The models are fully probabilistic so uncertainty bounds are baked in with the model.\r\n\r\nImage Source: Gaussian Processes for Machine Learning, C. E. Rasmussen & C. K. I. Williams" - } - } - video: { - video_id: "MnHCe8tGjQ8" - video_title: "PR-080: Practical Bayesian Optimization of Machine Learning Algorithms" - number_of_likes: 26 - number_of_views: 2377 - published_date: { - seconds: 1523799259 - } - uploader: "Jiyang Kang" - } - } -} -pr_id_to_video: { - key: 81 - value: { - papers: { - paper_id: "machine-theory-of-mind" - title: "Machine Theory of Mind" - arxiv_id: "1802.07740" - abstract: "Theory of mind (ToM; Premack & Woodruff, 1978) broadly refers to humans'\nability to represent the mental states of others, including their desires,\nbeliefs, and intentions. We propose to train a machine to build such models\ntoo. We design a Theory of Mind neural network -- a ToMnet -- which uses\nmeta-learning to build models of the agents it encounters, from observations of\ntheir behaviour alone. Through this process, it acquires a strong prior model\nfor agents' behaviour, as well as the ability to bootstrap to richer\npredictions about agents' characteristics and mental states using only a small\nnumber of behavioural observations. We apply the ToMnet to agents behaving in\nsimple gridworld environments, showing that it learns to model random,\nalgorithmic, and deep reinforcement learning agents from varied populations,\nand that it passes classic ToM tasks such as the \"Sally-Anne\" test (Wimmer &\nPerner, 1983; Baron-Cohen et al., 1985) of recognising that others can hold\nfalse beliefs about the world. We argue that this system -- which autonomously\nlearns how to model other agents in its world -- is an important step forward\nfor developing multi-agent AI systems, for building intermediating technology\nfor machine-human interaction, and for advancing the progress on interpretable\nAI." - pub_date: { - seconds: 1519171200 - } - authors: "Neil C. Rabinowitz" - authors: "Frank Perbet" - authors: "H. Francis Song" - authors: "Chiyuan Zhang" - authors: "S. M. Ali Eslami" - authors: "Matthew Botvinick" - } - video: {} - } -} -pr_id_to_video: { - key: 82 - value: { - papers: { - paper_id: "supervised-speech-separation-based-on-deep" - title: "Supervised Speech Separation Based on Deep Learning: An Overview" - arxiv_id: "1708.07524" - abstract: "Speech separation is the task of separating target speech from background\ninterference. Traditionally, speech separation is studied as a signal\nprocessing problem. A more recent approach formulates speech separation as a\nsupervised learning problem, where the discriminative patterns of speech,\nspeakers, and background noise are learned from training data. Over the past\ndecade, many supervised separation algorithms have been put forward. In\nparticular, the recent introduction of deep learning to supervised speech\nseparation has dramatically accelerated progress and boosted separation\nperformance. This article provides a comprehensive overview of the research on\ndeep learning based supervised speech separation in the last several years. We\nfirst introduce the background of speech separation and the formulation of\nsupervised separation. Then we discuss three main components of supervised\nseparation: learning machines, training targets, and acoustic features. Much of\nthe overview is on separation algorithms where we review monaural methods,\nincluding speech enhancement (speech-nonspeech separation), speaker separation\n(multi-talker separation), and speech dereverberation, as well as\nmulti-microphone techniques. The important issue of generalization, unique to\nsupervised learning, is discussed. This overview provides a historical\nperspective on how advances are made. In addition, we discuss a number of\nconceptual issues, including what constitutes the target source." - pub_date: { - seconds: 1503532800 - } - authors: "DeLiang Wang" - authors: "Jitong Chen" - } - video: { - video_id: "OgNSFKeHy8k" - video_title: "PR-082: Introduction to Speech Separation" - number_of_likes: 14 - number_of_views: 1210 - published_date: { - seconds: 1524410583 - } - uploader: "taesu" - } - } -} -pr_id_to_video: { - key: 83 - value: { - papers: { - paper_id: "non-local-neural-networks" - title: "Non-local Neural Networks" - arxiv_id: "1711.07971" - abstract: "Both convolutional and recurrent operations are building blocks that process\none local neighborhood at a time. In this paper, we present non-local\noperations as a generic family of building blocks for capturing long-range\ndependencies. Inspired by the classical non-local means method in computer\nvision, our non-local operation computes the response at a position as a\nweighted sum of the features at all positions. This building block can be\nplugged into many computer vision architectures. On the task of video\nclassification, even without any bells and whistles, our non-local models can\ncompete or outperform current competition winners on both Kinetics and Charades\ndatasets. In static image recognition, our non-local models improve object\ndetection/segmentation and pose estimation on the COCO suite of tasks. Code is\navailable at https://github.com/facebookresearch/video-nonlocal-net ." - pub_date: { - seconds: 1511222400 - } - authors: "Xiaolong Wang" - authors: "Ross Girshick" - authors: "Abhinav Gupta" - authors: "Kaiming He" - repositories: { - url: "https://github.com/open-mmlab/mmaction2" - framework: FRAMEWORK_PYTORCH - number_of_stars: 938 - description: "OpenMMLab's Next Generation Video Understanding Toolbox and Benchmark" - } - repositories: { - url: "https://github.com/jordiae/DeepLearning-MAI" - framework: FRAMEWORK_PYTORCH - description: "Code for the Deep Learning course (Master in Artificial Intelligence at UPC)" - } - repositories: { - url: "https://github.com/LRacoci/permutation-graphml" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/rijuldhir/TSM" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/JiaPeng1234/MRI-Segmentation-Transformer" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 7 - } - repositories: { - is_official: true - url: "https://github.com/facebookresearch/video-nonlocal-net" - framework: FRAMEWORK_OTHERS - number_of_stars: 1787 - description: "Non-local Neural Networks for Video Classification" - } - repositories: { - url: "https://github.com/jiajunhua/facebookresearch-Detectron" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/huyz1117/Non_Local_Net_TensorFlow" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 8 - description: "TensorFlow implementation of Non-local Neural Network " - } - repositories: { - url: "https://github.com/facebookresearch/detectron" - framework: FRAMEWORK_PYTORCH - number_of_stars: 24456 - description: "FAIR's research platform for object detection research, implementing popular algorithms like Mask R-CNN and RetinaNet." - } - repositories: { - url: "https://github.com/seominseok0429/inception-I3D-NON-LOCAL" - framework: FRAMEWORK_PYTORCH - number_of_stars: 13 - description: "Inception-I3D, Non Local finetune, hmdb51_flow" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Residual Block" - full_name: "Residual Block" - description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." - } - methods: { - name: "Max Pooling" - full_name: "Max Pooling" - description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" - } - methods: { - name: "Non-Local Operation" - full_name: "Non-Local Operation" - description: "A **Non-Local Operation** is a component for capturing long-range dependencies with deep neural networks. It is a generalization of the classical non-local mean operation in computer vision. Intuitively a non-local operation computes the response at a position as a weighted sum of the features at all positions in the input feature maps. The set of positions can be in space, time, or spacetime, implying that these operations are applicable for image, sequence, and video problems.\r\n\r\nFollowing the non-local mean operation, a generic non-local operation for deep neural networks is defined as:\r\n\r\n$$ \\mathbb{y}\\_{i} = \\frac{1}{\\mathcal{C}\\left(\\mathbb{x}\\right)}\\sum\\_{\\forall{j}}f\\left(\\mathbb{x}\\_{i}, \\mathbb{x}\\_{j}\\right)g\\left(\\mathbb{x}\\_{j}\\right) $$\r\n\r\nHere $i$ is the index of an output position (in space, time, or spacetime) whose response is to be computed and $j$ is the index that enumerates all possible positions. x is the input signal (image, sequence, video; often their features) and $y$ is the output signal of the same size as $x$. A pairwise function $f$ computes a scalar (representing relationship such as affinity) between $i$ and all $j$. The unary function $g$ computes a representation of the input signal at the position $j$. The\r\nresponse is normalized by a factor $C\\left(x\\right)$.\r\n\r\nThe non-local behavior is due to the fact that all positions ($\\forall{j}$) are considered in the operation. As a comparison, a convolutional operation sums up the weighted input in a local neighborhood (e.g., $i − 1 \\leq j \\leq i + 1$ in a 1D case with kernel size 3), and a recurrent operation at time $i$ is often based only on the current and the latest time steps (e.g., $j = i$ or $i − 1$).\r\n\r\nThe non-local operation is also different from a fully-connected (fc) layer. The equation above computes responses based on relationships between different locations, whereas fc uses learned weights. In other words, the relationship between $x\\_{j}$ and $x\\_{i}$ is not a function of the input data in fc, unlike in nonlocal layers. Furthermore, the formulation in the equation above supports inputs of variable sizes, and maintains the corresponding size in the output. On the contrary, an fc layer requires a fixed-size input/output and loses positional correspondence (e.g., that from $x\\_{i}$ to $y\\_{i}$ at the position $i$).\r\n\r\nA non-local operation is a flexible building block and can be easily used together with convolutional/recurrent layers. It can be added into the earlier part of deep neural networks, unlike fc layers that are often used in the end. This allows us to build a richer hierarchy that combines both non-local and local information.\r\n\r\nIn terms of parameterisation, we usually parameterise $g$ as a linear embedding of the form $g\\left(x\\_{j}\\right) = W\\_{g}\\mathbb{x}\\_{j}$ , where $W\\_{g}$ is a weight matrix to be learned. This is implemented as, e.g., 1×1 convolution in space or 1×1×1 convolution in spacetime. For $f$ we use an affinity function, a list of which can be found [here](https://paperswithcode.com/methods/category/affinity-functions)." - } - methods: { - name: "Embedded Dot Product Affinity" - full_name: "Embedded Dot Product Affinity" - description: "**Embedded Dot Product Affinity** is a type of affinity or self-similarity function between two points $\\mathbb{x\\_{i}}$ and $\\mathbb{x\\_{j}}$ that uses a dot product function in an embedding space:\r\n\r\n$$ f\\left(\\mathbb{x\\_{i}}, \\mathbb{x\\_{j}}\\right) = \\theta\\left(\\mathbb{x\\_{i}}\\right)^{T}\\phi\\left(\\mathbb{x\\_{j}}\\right) $$\r\n\r\nHere $\\theta\\left(x\\_{i}\\right) = W\\_{θ}x\\_{i}$ and $\\phi\\left(x\\_{j}\\right) = W\\_{φ}x\\_{j}$ are two embeddings.\r\n\r\nThe main difference between the dot product and embedded Gaussian affinity functions is the presence of softmax, which plays the role of an activation function." - } - methods: { - name: "Weight Decay" - full_name: "Weight Decay" - description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - methods: { - name: "ResNeXt" - full_name: "ResNeXt" - description: "A **ResNeXt** repeats a building block that aggregates a set of transformations with the same topology. Compared to a [ResNet](https://paperswithcode.com/method/resnet), it exposes a new dimension, *cardinality* (the size of the set of transformations) $C$, as an essential factor in addition to the dimensions of depth and width. \r\n\r\nFormally, a set of aggregated transformations can be represented as: $\\mathcal{F}(x)=\\sum_{i=1}^{C}\\mathcal{T}_i(x)$, where $\\mathcal{T}_i(x)$ can be an arbitrary function. Analogous to a simple neuron, $\\mathcal{T}_i$ should project $x$ into an (optionally low-dimensional) embedding and then transform it." - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - methods: { - name: "Kaiming Initialization" - full_name: "Kaiming Initialization" - description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." - } - } - video: { - video_id: "ZM153wo3baA" - video_title: "PR-083: Non-local Neural Networks" - number_of_likes: 45 - number_of_views: 4900 - published_date: { - seconds: 1525008094 - } - uploader: "Taeoh Kim" - } - } -} -pr_id_to_video: { - key: 84 - value: { - papers: { - paper_id: "megdet-a-large-mini-batch-object-detector" - title: "MegDet: A Large Mini-Batch Object Detector" - arxiv_id: "1711.07240" - abstract: "The improvements in recent CNN-based object detection works, from R-CNN [11],\nFast/Faster R-CNN [10, 31] to recent Mask R-CNN [14] and RetinaNet [24], mainly\ncome from new network, new framework, or novel loss design. But mini-batch\nsize, a key factor in the training, has not been well studied. In this paper,\nwe propose a Large MiniBatch Object Detector (MegDet) to enable the training\nwith much larger mini-batch size than before (e.g. from 16 to 256), so that we\ncan effectively utilize multiple GPUs (up to 128 in our experiments) to\nsignificantly shorten the training time. Technically, we suggest a learning\nrate policy and Cross-GPU Batch Normalization, which together allow us to\nsuccessfully train a large mini-batch detector in much less time (e.g., from 33\nhours to 4 hours), and achieve even better accuracy. The MegDet is the backbone\nof our submission (mmAP 52.5%) to COCO 2017 Challenge, where we won the 1st\nplace of Detection task." - pub_date: { - seconds: 1511136000 - } - authors: "Chao Peng" - authors: "Tete Xiao" - authors: "Zeming Li" - authors: "Yuning Jiang" - authors: "Xiangyu Zhang" - authors: "Kai Jia" - authors: "Gang Yu" - authors: "Jian Sun" - repositories: { - url: "https://github.com/CSAILVision/semantic-segmentation-pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3932 - description: "Pytorch implementation for Semantic Segmentation/Scene Parsing on MIT ADE20K dataset" - } - repositories: { - url: "https://github.com/keyEpoch/semen_seg-kd" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/Louis24/Segmentation" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/chenyilun95/tf-cpn" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 759 - description: "Cascaded Pyramid Network for Multi-Person Pose Estimation (CVPR 2018)" - } - repositories: { - url: "https://github.com/vacancy/Synchronized-BatchNorm-PyTorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1301 - description: "Synchronized Batch Normalization implementation in PyTorch." - } - repositories: { - url: "https://github.com/chrisway613/Synchronized-BatchNormalization" - framework: FRAMEWORK_PYTORCH - number_of_stars: 10 - description: "Multi-Gpus Synchronized Batch Normalization implementation in PyTorch" - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - methods: { - name: "RetinaNet" - full_name: "RetinaNet" - description: "**RetinaNet** is a one-stage object detection model that utilizes a focal loss function to address class imbalance during training. Focal loss applies a modulating term to the cross entropy loss in order to focus learning on hard negative examples. RetinaNet is a single, unified network composed of a *backbone* network and two task-specific *subnetworks*. The backbone is responsible for computing a convolutional feature map over an entire input image and is an off-the-self convolutional network. The first subnet performs convolutional object classification on the backbone's output; the second subnet performs convolutional bounding box regression. The two subnetworks feature a simple design that the authors propose specifically for one-stage, dense detection. \r\n\r\nWe can see the motivation for focal loss by comparing with two-stage object detectors. Here class imbalance is addressed by a two-stage cascade and sampling heuristics. The proposal stage (e.g., [Selective Search](https://paperswithcode.com/method/selective-search), [EdgeBoxes](https://paperswithcode.com/method/edgeboxes), [DeepMask](https://paperswithcode.com/method/deepmask), [RPN](https://paperswithcode.com/method/rpn)) rapidly narrows down the number of candidate object locations to a small number (e.g., 1-2k), filtering out most background samples. In the second classification stage, sampling heuristics, such as a fixed foreground-to-background ratio, or online hard example mining ([OHEM](https://paperswithcode.com/method/ohem)), are performed to maintain a\r\nmanageable balance between foreground and background.\r\n\r\nIn contrast, a one-stage detector must process a much larger set of candidate object locations regularly sampled across an image. To tackle this, RetinaNet uses a focal loss function, a dynamically scaled cross entropy loss, where the scaling factor decays to zero as confidence in the correct class increases. Intuitively, this scaling factor can automatically down-weight the contribution of easy examples during training and rapidly focus the model on hard examples. \r\n\r\nFormally, the Focal Loss adds a factor $(1 - p\\_{t})^\\gamma$ to the standard cross entropy criterion. Setting $\\gamma>0$ reduces the relative loss for well-classified examples ($p\\_{t}>.5$), putting more focus on hard, misclassified examples. Here there is tunable *focusing* parameter $\\gamma \\ge 0$. \r\n\r\n$$ {\\text{FL}(p\\_{t}) = - (1 - p\\_{t})^\\gamma \\log\\left(p\\_{t}\\right)} $$" - } - methods: { - name: "Batch Normalization" - full_name: "Batch Normalization" - description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." - } - methods: { - name: "Mask R-CNN" - full_name: "Mask R-CNN" - description: "**Mask R-CNN** extends [Faster R-CNN](http://paperswithcode.com/method/faster-r-cnn) to solve instance segmentation tasks. It achieves this by adding a branch for predicting an object mask in parallel with the existing branch for bounding box recognition. In principle, Mask R-CNN is an intuitive extension of Faster R-CNN, but constructing the mask branch properly is critical for good results. \r\n\r\nMost importantly, Faster R-CNN was not designed for pixel-to-pixel alignment between network inputs and outputs. This is evident in how [RoIPool](http://paperswithcode.com/method/roi-pooling), the *de facto* core operation for attending to instances, performs coarse spatial quantization for feature extraction. To fix the misalignment, Mask R-CNN utilises a simple, quantization-free layer, called [RoIAlign](http://paperswithcode.com/method/roi-align), that faithfully preserves exact spatial locations. \r\n\r\nSecondly, Mask R-CNN *decouples* mask and class prediction: it predicts a binary mask for each class independently, without competition among classes, and relies on the network's RoI classification branch to predict the category. In contrast, an [FCN](http://paperswithcode.com/method/fcn) usually perform per-pixel multi-class categorization, which couples segmentation and classification." - } - methods: { - name: "FPN" - full_name: "Feature Pyramid Network" - description: "A **Feature Pyramid Network**, or **FPN**, is a feature extractor that takes a single-scale image of an arbitrary size as input, and outputs proportionally sized feature maps at multiple levels, in a fully convolutional fashion. This process is independent of the backbone convolutional architectures. It therefore acts as a generic solution for building feature pyramids inside deep convolutional networks to be used in tasks like object detection.\r\n\r\nThe construction of the pyramid involves a bottom-up pathway and a top-down pathway.\r\n\r\nThe bottom-up pathway is the feedforward computation of the backbone ConvNet, which computes a feature hierarchy consisting of feature maps at several scales with a scaling step of 2. For the feature\r\npyramid, one pyramid level is defined for each stage. The output of the last layer of each stage is used as a reference set of feature maps. For [ResNets](https://paperswithcode.com/method/resnet) we use the feature activations output by each stage’s last residual block. \r\n\r\nThe top-down pathway hallucinates higher resolution features by upsampling spatially coarser, but semantically stronger, feature maps from higher pyramid levels. These features are then enhanced with features from the bottom-up pathway via lateral connections. Each lateral connection merges feature maps of the same spatial size from the bottom-up pathway and the top-down pathway. The bottom-up feature map is of lower-level semantics, but its activations are more accurately localized as it was subsampled fewer times." - } - methods: { - name: "RoIAlign" - full_name: "RoIAlign" - description: "**Region of Interest Align**, or **RoIAlign**, is an operation for extracting a small feature map from each RoI in detection and segmentation based tasks. It removes the harsh quantization of [RoI Pool](https://paperswithcode.com/method/roi-pooling), properly *aligning* the extracted features with the input. To avoid any quantization of the RoI boundaries or bins (using $x/16$ instead of $[x/16]$), RoIAlign uses bilinear interpolation to compute the exact values of the input features at four regularly sampled locations in each RoI bin, and the result is then aggregated (using max or average)." - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "Focal Loss" - full_name: "Focal Loss" - description: "A **Focal Loss** function addresses class imbalance during training in tasks like object detection. Focal loss applies a modulating term to the cross entropy loss in order to focus learning on hard negative examples. It is a dynamically scaled cross entropy loss, where the scaling factor decays to zero as confidence in the correct class increases. Intuitively, this scaling factor can automatically down-weight the contribution of easy examples during training and rapidly focus the model on hard examples. \r\n\r\nFormally, the Focal Loss adds a factor $(1 - p\\_{t})^\\gamma$ to the standard cross entropy criterion. Setting $\\gamma>0$ reduces the relative loss for well-classified examples ($p\\_{t}>.5$), putting more focus on hard, misclassified examples. Here there is tunable *focusing* parameter $\\gamma \\ge 0$. \r\n\r\n$$ {\\text{FL}(p\\_{t}) = - (1 - p\\_{t})^\\gamma \\log\\left(p\\_{t}\\right)} $$" - } - } - video: { - video_id: "nkYFEoKQBH0" - video_title: "PR-084 MegDet: A Large Mini-Batch Object Detector (CVPR2018)" - number_of_likes: 3 - number_of_views: 1222 - published_date: { - seconds: 1525663256 - } - uploader: "Taegyun Jeon" - } - } -} -pr_id_to_video: { - key: 85 - value: { - papers: { - paper_id: "in-datacenter-performance-analysis-of-a" - title: "In-Datacenter Performance Analysis of a Tensor Processing Unit" - arxiv_id: "1704.04760" - abstract: "Many architects believe that major improvements in cost-energy-performance\nmust now come from domain-specific hardware. This paper evaluates a custom\nASIC---called a Tensor Processing Unit (TPU)---deployed in datacenters since\n2015 that accelerates the inference phase of neural networks (NN). The heart of\nthe TPU is a 65,536 8-bit MAC matrix multiply unit that offers a peak\nthroughput of 92 TeraOps/second (TOPS) and a large (28 MiB) software-managed\non-chip memory. The TPU's deterministic execution model is a better match to\nthe 99th-percentile response-time requirement of our NN applications than are\nthe time-varying optimizations of CPUs and GPUs (caches, out-of-order\nexecution, multithreading, multiprocessing, prefetching, ...) that help average\nthroughput more than guaranteed latency. The lack of such features helps\nexplain why, despite having myriad MACs and a big memory, the TPU is relatively\nsmall and low power. We compare the TPU to a server-class Intel Haswell CPU and\nan Nvidia K80 GPU, which are contemporaries deployed in the same datacenters.\nOur workload, written in the high-level TensorFlow framework, uses production\nNN applications (MLPs, CNNs, and LSTMs) that represent 95% of our datacenters'\nNN inference demand. Despite low utilization for some applications, the TPU is\non average about 15X - 30X faster than its contemporary GPU or CPU, with\nTOPS/Watt about 30X - 80X higher. Moreover, using the GPU's GDDR5 memory in the\nTPU would triple achieved TOPS and raise TOPS/Watt to nearly 70X the GPU and\n200X the CPU." - pub_date: { - seconds: 1492300800 - } - authors: "Norman P. Jouppi" - authors: "Cliff Young" - authors: "Nishant Patil" - authors: "David Patterson" - authors: "Gaurav Agrawal" - authors: "Raminder Bajwa" - authors: "Sarah Bates" - authors: "Suresh Bhatia" - authors: "Nan Boden" - authors: "Al Borchers" - authors: "Rick Boyle" - authors: "Pierre-luc Cantin" - authors: "Clifford Chao" - authors: "Chris Clark" - authors: "Jeremy Coriell" - authors: "Mike Daley" - authors: "Matt Dau" - authors: "Jeffrey Dean" - authors: "Ben Gelb" - authors: "Tara Vazir Ghaemmaghami" - authors: "Rajendra Gottipati" - authors: "William Gulland" - authors: "Robert Hagmann" - authors: "C. Richard Ho" - authors: "Doug Hogberg" - authors: "John Hu" - authors: "Robert Hundt" - authors: "Dan Hurt" - authors: "Julian Ibarz" - authors: "Aaron Jaffey" - authors: "Alek Jaworski" - authors: "Alexander Kaplan" - authors: "Harshit Khaitan" - authors: "Andy Koch" - authors: "Naveen Kumar" - authors: "Steve Lacy" - authors: "James Laudon" - authors: "James Law" - authors: "Diemthu Le" - authors: "Chris Leary" - authors: "Zhuyuan Liu" - authors: "Kyle Lucke" - authors: "Alan Lundin" - authors: "Gordon MacKean" - authors: "Adriana Maggiore" - authors: "Maire Mahony" - authors: "Kieran Miller" - authors: "Rahul Nagarajan" - authors: "Ravi Narayanaswami" - authors: "Ray Ni" - authors: "Kathy Nix" - authors: "Thomas Norrie" - authors: "Mark Omernick" - authors: "Narayana Penukonda" - authors: "Andy Phelps" - authors: "Jonathan Ross" - authors: "Matt Ross" - authors: "Amir Salek" - authors: "Emad Samadiani" - authors: "Chris Severn" - authors: "Gregory Sizikov" - authors: "Matthew Snelham" - authors: "Jed Souter" - authors: "Dan Steinberg" - authors: "Andy Swing" - authors: "Mercedes Tan" - authors: "Gregory Thorson" - authors: "Bo Tian" - authors: "Horia Toma" - authors: "Erick Tuttle" - authors: "Vijay Vasudevan" - authors: "Richard Walter" - authors: "Walter Wang" - authors: "Eric Wilcox" - authors: "Doe Hyun Yoon" - } - video: { - video_id: "7WhWkhFAIO4" - video_title: "PR-085: In-Datacenter Performance Analysis of a Tensor Processing Unit" - number_of_likes: 21 - number_of_views: 1585 - published_date: { - seconds: 1526140508 - } - uploader: "JinWon Lee" - } - } -} -pr_id_to_video: { - key: 86 - value: { - papers: { - paper_id: "on-the-power-of-curriculum-learning-in" - title: "On The Power of Curriculum Learning in Training Deep Networks" - arxiv_id: "1904.03626" - abstract: "Training neural networks is traditionally done by providing a sequence of random mini-batches sampled uniformly from the entire training data. In this work, we analyze the effect of curriculum learning, which involves the non-uniform sampling of mini-batches, on the training of deep networks, and specifically CNNs trained for image recognition. To employ curriculum learning, the training algorithm must resolve 2 problems: (i) sort the training examples by difficulty; (ii) compute a series of mini-batches that exhibit an increasing level of difficulty. We address challenge (i) using two methods: transfer learning from some competitive ``teacher\" network, and bootstrapping. In our empirical evaluation, both methods show similar benefits in terms of increased learning speed and improved final performance on test data. We address challenge (ii) by investigating different pacing functions to guide the sampling. The empirical investigation includes a variety of network architectures, using images from CIFAR-10, CIFAR-100 and subsets of ImageNet. We conclude with a novel theoretical analysis of curriculum learning, where we show how it effectively modifies the optimization landscape. We then define the concept of an ideal curriculum, and show that under mild conditions it does not change the corresponding global minimum of the optimization function." - pub_date: { - seconds: 1554595200 - } - authors: "Guy Hacohen" - authors: "Daphna Weinshall" - repositories: { - url: "https://github.com/josephch405/curriculum-nmt" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - } - repositories: { - is_official: true - url: "https://github.com/GuyHacohen/curriculum_learning" - framework: FRAMEWORK_OTHERS - number_of_stars: 46 - description: "Code implementing the experiments described in the paper \"On The Power of Curriculum Learning in Training Deep Networks\" by Hacohen & Weinshall (ICML 2019)" - } - } - video: { - video_id: "fQtuWEuwXrA" - video_title: "PR-086: Curriculum Learning" - number_of_likes: 5 - number_of_views: 1367 - published_date: { - seconds: 1526221428 - } - uploader: "차준범" - } - } -} -pr_id_to_video: { - key: 87 - value: { - papers: { - paper_id: "spectral-normalization-for-generative" - title: "Spectral Normalization for Generative Adversarial Networks" - arxiv_id: "1802.05957" - abstract: "One of the challenges in the study of generative adversarial networks is the\ninstability of its training. In this paper, we propose a novel weight\nnormalization technique called spectral normalization to stabilize the training\nof the discriminator. Our new normalization technique is computationally light\nand easy to incorporate into existing implementations. We tested the efficacy\nof spectral normalization on CIFAR10, STL-10, and ILSVRC2012 dataset, and we\nexperimentally confirmed that spectrally normalized GANs (SN-GANs) is capable\nof generating images of better or equal quality relative to the previous\ntraining stabilization techniques." - pub_date: { - seconds: 1518739200 - } - authors: "Takeru Miyato" - authors: "Toshiki Kataoka" - authors: "Masanori Koyama" - authors: "Yuichi Yoshida" - repositories: { - url: "https://github.com/karoly-hars/GAN_image_colorizing" - framework: FRAMEWORK_PYTORCH - number_of_stars: 10 - description: "Image colorization with generative adversarial networks on the CIFAR10 dataset." - } - repositories: { - url: "https://github.com/ncuzzy/mygan" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/IShengFang/SpectralNormalizationKeras" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 105 - description: "Spectral Normalization for Keras Dense and Convolution Layers" - } - repositories: { - url: "https://github.com/zhusiling/SAGAN" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/qiaolinhan/ws-preprocess" - framework: FRAMEWORK_OTHERS - description: "This is image restoration for UAV based wildfire segmentation because it will always meet some disturbance, noise or other serious situation " - } - repositories: { - is_official: true - url: "https://github.com/pfnet-research/sngan_projection" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 950 - description: "GANs with spectral normalization and projection discriminator" - } - repositories: { - url: "https://github.com/kklemon/bgan-pytorch" - framework: FRAMEWORK_PYTORCH - description: "PyTorch implementation of Boundary Seeking GAN for discrete data" - } - repositories: { - url: "https://github.com/guy-oren/DIRT-OST" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/Bingwen-Hu/DRIT" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/hinofafa/Self-Attention-HearthStone-GAN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "This repository provides a PyTorch implementation of SAGAN cited by heykeetae/Self-Attention-GAN. This repository provide an efficient method to generate large resolution images and attention weights visualisation using tensorboard platform. Tensorboard is a robust platform to monitor generated images and learning weights in computer vision learning experiment." - } - methods: { - name: "GAN Hinge Loss" - full_name: "GAN Hinge Loss" - description: "The **GAN Hinge Loss** is a hinge loss based loss function for [generative adversarial networks](https://paperswithcode.com/methods/category/generative-adversarial-networks):\r\n\r\n$$ L\\_{D} = -\\mathbb{E}\\_{\\left(x, y\\right)\\sim{p}\\_{data}}\\left[\\min\\left(0, -1 + D\\left(x, y\\right)\\right)\\right] -\\mathbb{E}\\_{z\\sim{p\\_{z}}, y\\sim{p\\_{data}}}\\left[\\min\\left(0, -1 - D\\left(G\\left(z\\right), y\\right)\\right)\\right] $$\r\n\r\n$$ L\\_{G} = -\\mathbb{E}\\_{z\\sim{p\\_{z}}, y\\sim{p\\_{data}}}D\\left(G\\left(z\\right), y\\right) $$" - } - methods: { - name: "Residual Block" - full_name: "Residual Block" - description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." - } - methods: { - name: "Spectral Normalization" - full_name: "Spectral Normalization" - description: "**Spectral Normalization** is a normalization technique used for generative adversarial networks, used to stabilize training of the discriminator. Spectral normalization has the convenient property that the Lipschitz constant is the only hyper-parameter to be tuned.\r\n\r\nIt controls the Lipschitz constant of the discriminator $f$ by constraining the spectral norm of each layer $g : \\textbf{h}\\_{in} \\rightarrow \\textbf{h}_{out}$. The Lipschitz norm $\\Vert{g}\\Vert\\_{\\text{Lip}}$ is equal to $\\sup\\_{\\textbf{h}}\\sigma\\left(\\nabla{g}\\left(\\textbf{h}\\right)\\right)$, where $\\sigma\\left(a\\right)$ is the spectral norm of the matrix $A$ ($L\\_{2}$ matrix norm of $A$):\r\n\r\n$$ \\sigma\\left(a\\right) = \\max\\_{\\textbf{h}:\\textbf{h}\\neq{0}}\\frac{\\Vert{A\\textbf{h}}\\Vert\\_{2}}{\\Vert\\textbf{h}\\Vert\\_{2}} = \\max\\_{\\Vert\\textbf{h}\\Vert\\_{2}\\leq{1}}{\\Vert{A\\textbf{h}}\\Vert\\_{2}} $$\r\n\r\nwhich is equivalent to the largest singular value of $A$. Therefore for a linear layer $g\\left(\\textbf{h}\\right) = W\\textbf{h}$ the norm is given by $\\Vert{g}\\Vert\\_{\\text{Lip}} = \\sup\\_{\\textbf{h}}\\sigma\\left(\\nabla{g}\\left(\\textbf{h}\\right)\\right) = \\sup\\_{\\textbf{h}}\\sigma\\left(W\\right) = \\sigma\\left(W\\right) $. Spectral normalization normalizes the spectral norm of the weight matrix $W$ so it satisfies the Lipschitz constraint $\\sigma\\left(W\\right) = 1$:\r\n\r\n$$ \\bar{W}\\_{\\text{SN}}\\left(W\\right) = W / \\sigma\\left(W\\right) $$" - } - methods: { - name: "SNGAN" - full_name: "Spectrally Normalised GAN" - description: "**SNGAN**, or **Spectrally Normalised GAN**, is a type of generative adversarial network that uses spectral normalization, a type of weight normalization, to stabilise the training of the discriminator." - } - methods: { - name: "Bottleneck Residual Block" - full_name: "Bottleneck Residual Block" - description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." - } - methods: { - name: "Kaiming Initialization" - full_name: "Kaiming Initialization" - description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - methods: { - name: "Leaky ReLU" - full_name: "Leaky ReLU" - description: "**Leaky Rectified Linear Unit**, or **Leaky ReLU**, is a type of activation function based on a [ReLU](https://paperswithcode.com/method/relu), but it has a small slope for negative values instead of a flat slope. The slope coefficient is determined before training, i.e. it is not learnt during training. This type of activation function is popular in tasks where we we may suffer from sparse gradients, for example training generative adversarial networks." - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." - } - } - video: { - video_id: "iXSYqohGQhM" - video_title: "PR-087: Spectral Normalization for Generative Adversarial Networks" - number_of_likes: 44 - number_of_views: 4660 - published_date: { - seconds: 1526221916 - } - uploader: "Jaejun Yoo" - } - } -} -pr_id_to_video: { - key: 88 - value: { - papers: { - paper_id: "deep-variational-bayes-filters-unsupervised" - title: "Deep Variational Bayes Filters: Unsupervised Learning of State Space Models from Raw Data" - arxiv_id: "1605.06432" - abstract: "We introduce Deep Variational Bayes Filters (DVBF), a new method for\nunsupervised learning and identification of latent Markovian state space\nmodels. Leveraging recent advances in Stochastic Gradient Variational Bayes,\nDVBF can overcome intractable inference distributions via variational\ninference. Thus, it can handle highly nonlinear input data with temporal and\nspatial dependencies such as image sequences without domain knowledge. Our\nexperiments show that enabling backpropagation through transitions enforces\nstate space assumptions and significantly improves information content of the\nlatent embedding. This also enables realistic long-term prediction." - pub_date: { - seconds: 1463702400 - } - authors: "Maximilian Karl" - authors: "Maximilian Soelch" - authors: "Justin Bayer" - authors: "Patrick van der Smagt" - repositories: { - url: "https://github.com/baggepinnen/DeepFilters.jl" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Fiiiiiiiiiiiiiilters" - } - repositories: { - is_official: true - url: "https://github.com/baggepinnen/DVBF.jl" - framework: FRAMEWORK_OTHERS - number_of_stars: 8 - description: "Deep variational Bayes filter in julia using Flux" - } - methods: { - name: "Stochastic Gradient Variational Bayes" - full_name: "Stochastic Gradient Variational Bayes" - } - } - video: { - video_id: "uM0rQtL6_AA" - video_title: "PR-088: Deep Variational Bayes Filters (2017)" - number_of_likes: 37 - number_of_views: 2792 - published_date: { - seconds: 1526901682 - } - uploader: "Terry TaeWoong Um" - } - } -} -pr_id_to_video: { - key: 89 - value: { - papers: { - paper_id: "beyond-word-importance-contextual" - title: "Beyond Word Importance: Contextual Decomposition to Extract Interactions from LSTMs" - arxiv_id: "1801.05453" - abstract: "The driving force behind the recent success of LSTMs has been their ability\nto learn complex and non-linear relationships. Consequently, our inability to\ndescribe these relationships has led to LSTMs being characterized as black\nboxes. To this end, we introduce contextual decomposition (CD), an\ninterpretation algorithm for analysing individual predictions made by standard\nLSTMs, without any changes to the underlying model. By decomposing the output\nof a LSTM, CD captures the contributions of combinations of words or variables\nto the final prediction of an LSTM. On the task of sentiment analysis with the\nYelp and SST data sets, we show that CD is able to reliably identify words and\nphrases of contrasting sentiment, and how they are combined to yield the LSTM's\nfinal prediction. Using the phrase-level labels in SST, we also demonstrate\nthat CD is able to successfully extract positive and negative negations from an\nLSTM, something which has not previously been done." - pub_date: { - seconds: 1516060800 - } - authors: "W. James Murdoch" - authors: "Peter J. Liu" - authors: "Bin Yu" - repositories: { - is_official: true - url: "https://github.com/jamie-murdoch/ContextualDecomposition" - framework: FRAMEWORK_PYTORCH - number_of_stars: 53 - description: "Demo for method introduced in \"Beyond Word Importance: Contextual Decomposition to Extract Interactions from LSTMs\"" - } - repositories: { - url: "https://github.com/csinva/hierarchical-dnn-interpretations" - framework: FRAMEWORK_PYTORCH - number_of_stars: 92 - description: "Using / reproducing ACD from the paper \"Hierarchical interpretations for neural network predictions\" 🧠 (ICLR 2019)" - } - repositories: { - url: "https://github.com/suyash/ContextualDecomposition" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Contextual Decomposition Experiments" - } - methods: { - name: "Tanh Activation" - full_name: "Tanh Activation" - description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" - } - methods: { - name: "LSTM" - full_name: "Long Short-Term Memory" - description: "An **LSTM** is a type of [recurrent neural network](https://paperswithcode.com/methods/category/recurrent-neural-networks) that addresses the vanishing gradient problem in vanilla RNNs through additional cells, input and output gates. Intuitively, vanishing gradients are solved through additional *additive* components, and forget gate activations, that allow the gradients to flow through the network without vanishing as quickly.\r\n\r\n(Image Source [here](https://medium.com/datadriveninvestor/how-do-lstm-networks-solve-the-problem-of-vanishing-gradients-a6784971a577))\r\n\r\n(Introduced by Hochreiter and Schmidhuber)" - } - methods: { - name: "Sigmoid Activation" - full_name: "Sigmoid Activation" - description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." - } - } - video: { - video_id: "5whGIpoLoq4" - video_title: "PR-089: Beyond Word Importance: Contextual Decomposition to Extract Interactions from LSTMs" - number_of_views: 440 - published_date: { - seconds: 1528641922 - } - uploader: "Byung-Hak Kim" - } - } -} -pr_id_to_video: { - key: 90 - value: { - papers: { - paper_id: "representation-learning-by-learning-to-count" - title: "Representation Learning by Learning to Count" - arxiv_id: "1708.06734" - abstract: "We introduce a novel method for representation learning that uses an\nartificial supervision signal based on counting visual primitives. This\nsupervision signal is obtained from an equivariance relation, which does not\nrequire any manual annotation. We relate transformations of images to\ntransformations of the representations. More specifically, we look for the\nrepresentation that satisfies such relation rather than the transformations\nthat match a given representation. In this paper, we use two image\ntransformations in the context of counting: scaling and tiling. The first\ntransformation exploits the fact that the number of visual primitives should be\ninvariant to scale. The second transformation allows us to equate the total\nnumber of visual primitives in each tile to that in the whole image. These two\ntransformations are combined in one constraint and used to train a neural\nnetwork with a contrastive loss. The proposed task produces representations\nthat perform on par or exceed the state of the art in transfer learning\nbenchmarks." - pub_date: { - seconds: 1503360000 - } - authors: "Mehdi Noroozi" - authors: "Hamed Pirsiavash" - authors: "Paolo Favaro" - repositories: { - url: "https://github.com/gitlimlab/Representation-Learning-by-Learning-to-Count" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 107 - description: "A Tensorflow implementation of Representation Learning by Learning to Count" - } - } - video: { - video_id: "T7i_YKN2EY8" - video_title: "PR-090: Representation Learning by Learning to Count" - number_of_likes: 3 - number_of_views: 426 - published_date: { - seconds: 1529233262 - } - uploader: "Suh Kiho" - } - } -} -pr_id_to_video: { - key: 91 - value: { - video: { - video_id: "v1GbxpKqH8Q" - video_title: "PR-091: A Universal Music Translation Network" - number_of_likes: 12 - number_of_views: 732 - published_date: { - seconds: 1529241765 - } - uploader: "Seungil Kim" - } - } -} -pr_id_to_video: { - key: 92 - value: { - papers: { - paper_id: "a-hitchhikers-guide-on-distributed-training" - title: "A Hitchhiker's Guide On Distributed Training of Deep Neural Networks" - arxiv_id: "1810.11787" - abstract: "Deep learning has led to tremendous advancements in the field of Artificial\nIntelligence. One caveat however is the substantial amount of compute needed to\ntrain these deep learning models. Training a benchmark dataset like ImageNet on\na single machine with a modern GPU can take upto a week, distributing training\non multiple machines has been observed to drastically bring this time down.\nRecent work has brought down ImageNet training time to a time as low as 4\nminutes by using a cluster of 2048 GPUs. This paper surveys the various\nalgorithms and techniques used to distribute training and presents the current\nstate of the art for a modern distributed training framework. More\nspecifically, we explore the synchronous and asynchronous variants of\ndistributed Stochastic Gradient Descent, various All Reduce gradient\naggregation strategies and best practices for obtaining higher throughout and\nlower latency over a cluster such as mixed precision training, large batch\ntraining and gradient compression." - pub_date: { - seconds: 1540684800 - } - authors: "Karanbir Chahal" - authors: "Manraj Singh Grover" - authors: "Kuntal Dey" - } - video: { - video_id: "pAH3KhVnADE" - video_title: "PR-092: Distributed Training of Neural Networks" - number_of_likes: 4 - number_of_views: 745 - published_date: { - seconds: 1529243628 - } - uploader: "Jiyang Kang" - } - } -} -pr_id_to_video: { - key: 93 - value: { - papers: { - paper_id: "playing-hard-exploration-games-by-watching" - title: "Playing hard exploration games by watching YouTube" - arxiv_id: "1805.11592" - abstract: "Deep reinforcement learning methods traditionally struggle with tasks where\nenvironment rewards are particularly sparse. One successful method of guiding\nexploration in these domains is to imitate trajectories provided by a human\ndemonstrator. However, these demonstrations are typically collected under\nartificial conditions, i.e. with access to the agent's exact environment setup\nand the demonstrator's action and reward trajectories. Here we propose a\ntwo-stage method that overcomes these limitations by relying on noisy,\nunaligned footage without access to such data. First, we learn to map unaligned\nvideos from multiple sources to a common representation using self-supervised\nobjectives constructed over both time and modality (i.e. vision and sound).\nSecond, we embed a single YouTube video in this representation to construct a\nreward function that encourages an agent to imitate human gameplay. This method\nof one-shot imitation allows our agent to convincingly exceed human-level\nperformance on the infamously hard exploration games Montezuma's Revenge,\nPitfall! and Private Eye for the first time, even if the agent is not presented\nwith any environment rewards." - pub_date: { - seconds: 1527552000 - } - authors: "Yusuf Aytar" - authors: "Tobias Pfaff" - authors: "David Budden" - authors: "Tom Le Paine" - authors: "Ziyu Wang" - authors: "Nando de Freitas" - repositories: { - url: "https://github.com/MaxSobolMark/HardRLWithYoutube" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 30 - description: "TensorFlow implementation of \"Playing hard exploration games by watching YouTube\"" - } - } - video: {} - } -} -pr_id_to_video: { - key: 94 - value: { - papers: { - paper_id: "model-agnostic-meta-learning-for-fast" - title: "Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks" - arxiv_id: "1703.03400" - abstract: "We propose an algorithm for meta-learning that is model-agnostic, in the\nsense that it is compatible with any model trained with gradient descent and\napplicable to a variety of different learning problems, including\nclassification, regression, and reinforcement learning. The goal of\nmeta-learning is to train a model on a variety of learning tasks, such that it\ncan solve new learning tasks using only a small number of training samples. In\nour approach, the parameters of the model are explicitly trained such that a\nsmall number of gradient steps with a small amount of training data from a new\ntask will produce good generalization performance on that task. In effect, our\nmethod trains the model to be easy to fine-tune. We demonstrate that this\napproach leads to state-of-the-art performance on two few-shot image\nclassification benchmarks, produces good results on few-shot regression, and\naccelerates fine-tuning for policy gradient reinforcement learning with neural\nnetwork policies." - pub_date: { - seconds: 1489017600 - } - authors: "Chelsea Finn" - authors: "Pieter Abbeel" - authors: "Sergey Levine" - repositories: { - url: "https://github.com/ThomasGoerttler/similarity-analysis-of-maml" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Code for \"Exploring the Similarity of Representations in Model-Agnostic Meta-Learning\" Forked from the code of the original paper \"Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks\"" - } - repositories: { - url: "https://github.com/mikehuisman/revisiting-learned-optimizers" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/antaradas94/MAML-waste-classification" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/GeorgeDUT/MetaRLSAS" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/SinghJasdeep/Projecting-Conflicting-Gradients" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/Tikquuss/meta_XLM" - framework: FRAMEWORK_PYTORCH - number_of_stars: 10 - description: "Cross-lingual Language Model (XLM) pretraining and Model-Agnostic Meta-Learning (MAML) for fast adaptation of deep networks" - } - repositories: { - url: "https://github.com/Zhiwei-Z/prompzzw" - framework: FRAMEWORK_TENSORFLOW - description: "Experiment sequential meta training using promp" - } - repositories: { - url: "https://github.com/sidney1505/arc_maml_transformer" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - } - repositories: { - url: "https://github.com/laiviet/maml" - framework: FRAMEWORK_PYTORCH - description: "Implementation of Model Agnostic Meta Learning" - } - repositories: { - url: "https://github.com/foolyc/Meta-SGD" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 65 - description: "Meta-SGD experiment on Omniglot classification compared with MAML" - } - methods: { - name: "TRPO" - full_name: "Trust Region Policy Optimization" - description: "**Trust Region Policy Optimization**, or **TRPO**, is a policy gradient method in reinforcement learning that avoids parameter updates that change the policy too much with a KL divergence constraint on the size of the policy update at each iteration.\r\n\r\nTake the case of off-policy reinforcement learning, where the policy $\\beta$ for collecting trajectories on rollout workers is different from the policy $\\pi$ to optimize for. The objective function in an off-policy model measures the total advantage over the state visitation distribution and actions, while the mismatch between the training data distribution and the true policy state distribution is compensated with an importance sampling estimator:\r\n\r\n$$ J\\left(\\theta\\right) = \\sum\\_{s\\in{S}}p^{\\pi\\_{\\theta\\_{old}}}\\sum\\_{a\\in\\mathcal{A}}\\left(\\pi\\_{\\theta}\\left(a\\mid{s}\\right)\\hat{A}\\_{\\theta\\_{old}}\\left(s, a\\right)\\right) $$\r\n\r\n$$ J\\left(\\theta\\right) = \\sum\\_{s\\in{S}}p^{\\pi\\_{\\theta\\_{old}}}\\sum\\_{a\\in\\mathcal{A}}\\left(\\beta\\left(a\\mid{s}\\right)\\frac{\\pi\\_{\\theta}\\left(a\\mid{s}\\right)}{\\beta\\left(a\\mid{s}\\right)}\\hat{A}\\_{\\theta\\_{old}}\\left(s, a\\right)\\right) $$\r\n\r\n$$ J\\left(\\theta\\right) = \\mathbb{E}\\_{s\\sim{p}^{\\pi\\_{\\theta\\_{old}}}, a\\sim{\\beta}} \\left(\\frac{\\pi\\_{\\theta}\\left(a\\mid{s}\\right)}{\\beta\\left(a\\mid{s}\\right)}\\hat{A}\\_{\\theta\\_{old}}\\left(s, a\\right)\\right)$$\r\n\r\nWhen training on policy, theoretically the policy for collecting data is same as the policy that we want to optimize. However, when rollout workers and optimizers are running in parallel asynchronously, the behavior policy can get stale. TRPO considers this subtle difference: It labels the behavior policy as $\\pi\\_{\\theta\\_{old}}\\left(a\\mid{s}\\right)$ and thus the objective function becomes:\r\n\r\n$$ J\\left(\\theta\\right) = \\mathbb{E}\\_{s\\sim{p}^{\\pi\\_{\\theta\\_{old}}}, a\\sim{\\pi\\_{\\theta\\_{old}}}} \\left(\\frac{\\pi\\_{\\theta}\\left(a\\mid{s}\\right)}{\\pi\\_{\\theta\\_{old}}\\left(a\\mid{s}\\right)}\\hat{A}\\_{\\theta\\_{old}}\\left(s, a\\right)\\right)$$\r\n\r\nTRPO aims to maximize the objective function $J\\left(\\theta\\right)$ subject to a trust region constraint which enforces the distance between old and new policies measured by KL-divergence to be small enough, within a parameter $\\delta$:\r\n\r\n$$ \\mathbb{E}\\_{s\\sim{p}^{\\pi\\_{\\theta\\_{old}}}} \\left[D\\_{KL}\\left(\\pi\\_{\\theta\\_{old}}\\left(.\\mid{s}\\right)\\mid\\mid\\pi\\_{\\theta}\\left(.\\mid{s}\\right)\\right)\\right] \\leq \\delta$$" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "Max Pooling" - full_name: "Max Pooling" - description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" - } - methods: { - name: "MAML" - full_name: "Model-Agnostic Meta-Learning" - description: "**MAML**, or **Model-Agnostic Meta-Learning**, is a model and task-agnostic algorithm for meta-learning that trains a model’s parameters such that a small number of gradient updates will lead to fast learning on a new task.\r\n\r\nConsider a model represented by a parametrized function $f\\_{\\theta}$ with parameters $\\theta$. When adapting to a new task $\\mathcal{T}\\_{i}$, the model’s parameters $\\theta$ become $\\theta'\\_{i}$. With MAML, the updated parameter vector $\\theta'\\_{i}$ is computed using one or more gradient descent updates on task $\\mathcal{T}\\_{i}$. For example, when using one gradient update,\r\n\r\n$$ \\theta'\\_{i} = \\theta - \\alpha\\nabla\\_{\\theta}\\mathcal{L}\\_{\\mathcal{T}\\_{i}}\\left(f\\_{\\theta}\\right) $$\r\n\r\nThe step size $\\alpha$ may be fixed as a hyperparameter or metalearned. The model parameters are trained by optimizing for the performance of $f\\_{\\theta'\\_{i}}$ with respect to $\\theta$ across tasks sampled from $p\\left(\\mathcal{T}\\_{i}\\right)$. More concretely the meta-objective is as follows:\r\n\r\n$$ \\min\\_{\\theta} \\sum\\_{\\mathcal{T}\\_{i} \\sim p\\left(\\mathcal{T}\\right)} \\mathcal{L}\\_{\\mathcal{T\\_{i}}}\\left(f\\_{\\theta'\\_{i}}\\right) = \\sum\\_{\\mathcal{T}\\_{i} \\sim p\\left(\\mathcal{T}\\right)} \\mathcal{L}\\_{\\mathcal{T\\_{i}}}\\left(f\\_{\\theta - \\alpha\\nabla\\_{\\theta}\\mathcal{L}\\_{\\mathcal{T}\\_{i}}\\left(f\\_{\\theta}\\right)}\\right) $$\r\n\r\nNote that the meta-optimization is performed over the model parameters $\\theta$, whereas the objective is computed using the updated model parameters $\\theta'$. In effect MAML aims to optimize the model parameters such that one or a small number of gradient steps on a new task will produce maximally effective behavior on that task. The meta-optimization across tasks is performed via stochastic gradient descent (SGD), such that the model parameters $\\theta$ are updated as follows:\r\n\r\n$$ \\theta \\leftarrow \\theta - \\beta\\nabla\\_{\\theta} \\sum\\_{\\mathcal{T}\\_{i} \\sim p\\left(\\mathcal{T}\\right)} \\mathcal{L}\\_{\\mathcal{T\\_{i}}}\\left(f\\_{\\theta'\\_{i}}\\right)$$\r\n\r\nwhere $\\beta$ is the meta step size." - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - methods: { - name: "Batch Normalization" - full_name: "Batch Normalization" - description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." - } - methods: { - name: "Linear Layer" - full_name: "Linear Layer" - description: "A **Linear Layer** is a projection $\\mathbf{XW + b}$." - } - } - video: { - video_id: "fxJXXKZb-ik" - video_title: "PR-094: Model-Agnostic Meta-Learning for fast adaptation of deep networks" - number_of_likes: 58 - number_of_views: 5061 - published_date: { - seconds: 1529847830 - } - uploader: "taesu" - } - } -} -pr_id_to_video: { - key: 95 - value: { - papers: { - paper_id: "modularity-matters-learning-invariant" - title: "Modularity Matters: Learning Invariant Relational Reasoning Tasks" - arxiv_id: "1806.06765" - abstract: "We focus on two supervised visual reasoning tasks whose labels encode a\nsemantic relational rule between two or more objects in an image: the MNIST\nParity task and the colorized Pentomino task. The objects in the images undergo\nrandom translation, scaling, rotation and coloring transformations. Thus these\ntasks involve invariant relational reasoning. We report uneven performance of\nvarious deep CNN models on these two tasks. For the MNIST Parity task, we\nreport that the VGG19 model soundly outperforms a family of ResNet models.\nMoreover, the family of ResNet models exhibits a general sensitivity to random\ninitialization for the MNIST Parity task. For the colorized Pentomino task, now\nboth the VGG19 and ResNet models exhibit sluggish optimization and very poor\ntest generalization, hovering around 30% test error. The CNN we tested all\nlearn hierarchies of fully distributed features and thus encode the distributed\nrepresentation prior. We are motivated by a hypothesis from cognitive\nneuroscience which posits that the human visual cortex is modularized, and this\nallows the visual cortex to learn higher order invariances. To this end, we\nconsider a modularized variant of the ResNet model, referred to as a Residual\nMixture Network (ResMixNet) which employs a mixture-of-experts architecture to\ninterleave distributed representations with more specialized, modular\nrepresentations. We show that very shallow ResMixNets are capable of learning\neach of the two tasks well, attaining less than 2% and 1% test error on the\nMNIST Parity and the colorized Pentomino tasks respectively. Most importantly,\nthe ResMixNet models are extremely parameter efficient: generalizing better\nthan various non-modular CNNs that have over 10x the number of parameters.\nThese experimental results support the hypothesis that modularity is a robust\nprior for learning invariant relational reasoning." - pub_date: { - seconds: 1529280000 - } - authors: "Jason Jo" - authors: "Vikas Verma" - authors: "Yoshua Bengio" - methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Residual Block" - full_name: "Residual Block" - description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." - } - methods: { - name: "Max Pooling" - full_name: "Max Pooling" - description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - methods: { - name: "Kaiming Initialization" - full_name: "Kaiming Initialization" - description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." - } - methods: { - name: "Batch Normalization" - full_name: "Batch Normalization" - description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." - } - methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." - } - methods: { - name: "Bottleneck Residual Block" - full_name: "Bottleneck Residual Block" - description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." - } - } - video: { - video_id: "dAGI3mlOmfw" - video_title: "PR-095: Modularity Matters: Learning Invariant Relational Reasoning Tasks" - number_of_likes: 9 - number_of_views: 767 - published_date: { - seconds: 1532272031 - } - uploader: "JinWon Lee" - } - } -} -pr_id_to_video: { - key: 96 - value: { - papers: { - paper_id: "taskonomy-disentangling-task-transfer" - title: "Taskonomy: Disentangling Task Transfer Learning" - arxiv_id: "1804.08328" - abstract: "Do visual tasks have a relationship, or are they unrelated? For instance,\ncould having surface normals simplify estimating the depth of an image?\nIntuition answers these questions positively, implying existence of a structure\namong visual tasks. Knowing this structure has notable values; it is the\nconcept underlying transfer learning and provides a principled way for\nidentifying redundancies across tasks, e.g., to seamlessly reuse supervision\namong related tasks or solve many tasks in one system without piling up the\ncomplexity.\n We proposes a fully computational approach for modeling the structure of\nspace of visual tasks. This is done via finding (first and higher-order)\ntransfer learning dependencies across a dictionary of twenty six 2D, 2.5D, 3D,\nand semantic tasks in a latent space. The product is a computational taxonomic\nmap for task transfer learning. We study the consequences of this structure,\ne.g. nontrivial emerged relationships, and exploit them to reduce the demand\nfor labeled data. For example, we show that the total number of labeled\ndatapoints needed for solving a set of 10 tasks can be reduced by roughly 2/3\n(compared to training independently) while keeping the performance nearly the\nsame. We provide a set of tools for computing and probing this taxonomical\nstructure including a solver that users can employ to devise efficient\nsupervision policies for their use cases." - pub_date: { - seconds: 1524441600 - } - authors: "Amir Zamir" - authors: "Alexander Sax" - authors: "William Shen" - authors: "Leonidas Guibas" - authors: "Jitendra Malik" - authors: "Silvio Savarese" - repositories: { - is_official: true - url: "https://github.com/StanfordVL/taskonomy" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 728 - description: "Taskonomy: Disentangling Task Transfer Learning" - } - } - video: { - video_id: "WjUGrzBIDv0" - video_title: "PR-096: Taskonomy: Disentangling Task Transfer Learning" - number_of_likes: 10 - number_of_views: 1179 - published_date: { - seconds: 1530451567 - } - uploader: "Taeoh Kim" - } - } -} -pr_id_to_video: { - key: 97 - value: { - papers: { - paper_id: "learning-representations-for-counterfactual" - title: "Learning Representations for Counterfactual Inference" - arxiv_id: "1605.03661" - abstract: "Observational studies are rising in importance due to the widespread\naccumulation of data in fields such as healthcare, education, employment and\necology. We consider the task of answering counterfactual questions such as,\n\"Would this patient have lower blood sugar had she received a different\nmedication?\". We propose a new algorithmic framework for counterfactual\ninference which brings together ideas from domain adaptation and representation\nlearning. In addition to a theoretical justification, we perform an empirical\ncomparison with previous approaches to causal inference from observational\ndata. Our deep learning algorithm significantly outperforms the previous\nstate-of-the-art." - pub_date: { - seconds: 1463011200 - } - authors: "Fredrik D. Johansson" - authors: "Uri Shalit" - authors: "David Sontag" - repositories: { - url: "https://github.com/lightlightdyy/Deep-Learning-and-Causal-Inference" - framework: FRAMEWORK_OTHERS - number_of_stars: 24 - } - methods: { - name: "Causal Inference" - full_name: "Causal Inference" - description: "Causal inference is the process of drawing a conclusion about a causal connection based on the conditions of the occurrence of an effect. The main difference between causal inference and inference of association is that the former analyzes the response of the effect variable when the cause is changed." - } - } - video: { - video_id: "l-pcG77Hr58" - video_title: "PR-097: Learning Representations for Counterfactual Inference" - number_of_views: 819 - published_date: { - seconds: 1531061236 - } - uploader: "Byung-Hak Kim" - } - } -} -pr_id_to_video: { - key: 98 - value: { - papers: { - paper_id: "megadepth-learning-single-view-depth" - title: "MegaDepth: Learning Single-View Depth Prediction from Internet Photos" - arxiv_id: "1804.00607" - abstract: "Single-view depth prediction is a fundamental problem in computer vision.\nRecently, deep learning methods have led to significant progress, but such\nmethods are limited by the available training data. Current datasets based on\n3D sensors have key limitations, including indoor-only images (NYU), small\nnumbers of training examples (Make3D), and sparse sampling (KITTI). We propose\nto use multi-view Internet photo collections, a virtually unlimited data\nsource, to generate training data via modern structure-from-motion and\nmulti-view stereo (MVS) methods, and present a large depth dataset called\nMegaDepth based on this idea. Data derived from MVS comes with its own\nchallenges, including noise and unreconstructable objects. We address these\nchallenges with new data cleaning methods, as well as automatically augmenting\nour data with ordinal depth relations generated using semantic segmentation. We\nvalidate the use of large amounts of Internet data by showing that models\ntrained on MegaDepth exhibit strong generalization-not only to novel scenes,\nbut also to other diverse datasets including Make3D, KITTI, and DIW, even when\nno images from those datasets are seen during training." - pub_date: { - seconds: 1522627200 - } - authors: "Zhengqi Li" - authors: "Noah Snavely" - repositories: { - url: "https://github.com/zhengqili/MegaDepth" - framework: FRAMEWORK_PYTORCH - number_of_stars: 535 - description: "Code of single-view depth prediction algorithm on Internet Photos described in \"MegaDepth: Learning Single-View Depth Prediction from Internet Photos, Z. Li and N. Snavely, CVPR 2018\"." - } - } - video: { - video_id: "tGbMWAFMMBQ" - video_title: "PR-098: MegaDepth: Learning Single-View Depth Prediction from Internet Photos (CVPR2018)" - number_of_likes: 6 - number_of_views: 776 - published_date: { - seconds: 1531661811 - } - uploader: "이광희" - } - } -} -pr_id_to_video: { - key: 99 - value: { - papers: { - paper_id: "mrnet-product2vec-a-multi-task-recurrent" - title: "MRNet-Product2Vec: A Multi-task Recurrent Neural Network for Product Embeddings" - arxiv_id: "1709.07534" - abstract: "E-commerce websites such as Amazon, Alibaba, Flipkart, and Walmart sell\nbillions of products. Machine learning (ML) algorithms involving products are\noften used to improve the customer experience and increase revenue, e.g.,\nproduct similarity, recommendation, and price estimation. The products are\nrequired to be represented as features before training an ML algorithm. In this\npaper, we propose an approach called MRNet-Product2Vec for creating generic\nembeddings of products within an e-commerce ecosystem. We learn a dense and\nlow-dimensional embedding where a diverse set of signals related to a product\nare explicitly injected into its representation. We train a Discriminative\nMulti-task Bidirectional Recurrent Neural Network (RNN), where the input is a\nproduct title fed through a Bidirectional RNN and at the output, product labels\ncorresponding to fifteen different tasks are predicted. The task set includes\nseveral intrinsic characteristics about a product such as price, weight, size,\ncolor, popularity, and material. We evaluate the proposed embedding\nquantitatively and qualitatively. We demonstrate that they are almost as good\nas sparse and extremely high-dimensional TF-IDF representation in spite of\nhaving less than 3% of the TF-IDF dimension. We also use a multimodal\nautoencoder for comparing products from different language-regions and show\npreliminary yet promising qualitative results." - pub_date: { - seconds: 1505952000 - } - authors: "Arijit Biswas" - authors: "Mukul Bhutani" - authors: "Subhajit Sanyal" - } - video: { - video_id: "cpCS7LBRkRU" - video_title: "PR-099: MRNet-Product2Vec" - number_of_likes: 23 - number_of_views: 1342 - published_date: { - seconds: 1531661636 - } - uploader: "keun bong Kwak" - } - } -} -pr_id_to_video: { - key: 100 - value: { - papers: { - paper_id: "f-brs-rethinking-backpropagating-refinement" - title: "f-BRS: Rethinking Backpropagating Refinement for Interactive Segmentation" - arxiv_id: "2001.10331" - abstract: "Deep neural networks have become a mainstream approach to interactive segmentation. As we show in our experiments, while for some images a trained network provides accurate segmentation result with just a few clicks, for some unknown objects it cannot achieve satisfactory result even with a large amount of user input. Recently proposed backpropagating refinement (BRS) scheme introduces an optimization problem for interactive segmentation that results in significantly better performance for the hard cases. At the same time, BRS requires running forward and backward pass through a deep network several times that leads to significantly increased computational budget per click compared to other methods. We propose f-BRS (feature backpropagating refinement scheme) that solves an optimization problem with respect to auxiliary variables instead of the network inputs, and requires running forward and backward pass just for a small part of a network. Experiments on GrabCut, Berkeley, DAVIS and SBD datasets set new state-of-the-art at an order of magnitude lower time per click compared to original BRS. The code and trained models are available at https://github.com/saic-vul/fbrs_interactive_segmentation ." - pub_date: { - seconds: 1580169600 - } - authors: "Konstantin Sofiiuk" - authors: "Ilia Petrov" - authors: "Olga Barinova" - authors: "Anton Konushin" - repositories: { - url: "https://github.com/jpconnel/fbrs-segmentation" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "f-brs segmentation modification for Hololens" - } - repositories: { - is_official: true - url: "https://github.com/saic-vul/fbrs_interactive_segmentation" - framework: FRAMEWORK_PYTORCH - number_of_stars: 400 - description: "[CVPR2020] f-BRS: Rethinking Backpropagating Refinement for Interactive Segmentation https://arxiv.org/abs/2001.10331" - } - methods: { - name: "Spatial Broadcast Decoder" - full_name: "Spatial Broadcast Decoder" - description: "Spatial Broadcast Decoder is an architecture that aims to improve disentangling, reconstruction accuracy, and generalization to held-out regions in data space. It provides a particularly dramatic\r\nbenefit when applied to datasets with small objects.\r\n\r\nSource: [Watters et al.](https://arxiv.org/pdf/1901.07017v2.pdf)\r\n\r\nImage source: [Watters et al.](https://arxiv.org/pdf/1901.07017v2.pdf)" - } - } - video: { - video_id: "ksTkCecBTCY" - video_title: "PR100: SeedNet" - number_of_likes: 11 - number_of_views: 1043 - published_date: { - seconds: 1532265921 - } - uploader: "이광희" - } - } -} -pr_id_to_video: { - key: 101 - value: { - papers: { - paper_id: "deep-feature-consistent-variational" - title: "Deep Feature Consistent Variational Autoencoder" - arxiv_id: "1610.00291" - abstract: "We present a novel method for constructing Variational Autoencoder (VAE).\nInstead of using pixel-by-pixel loss, we enforce deep feature consistency\nbetween the input and the output of a VAE, which ensures the VAE's output to\npreserve the spatial correlation characteristics of the input, thus leading the\noutput to have a more natural visual appearance and better perceptual quality.\nBased on recent deep learning works such as style transfer, we employ a\npre-trained deep convolutional neural network (CNN) and use its hidden features\nto define a feature perceptual loss for VAE training. Evaluated on the CelebA\nface dataset, we show that our model produces better results than other methods\nin the literature. We also show that our method can produce latent vectors that\ncan capture the semantic information of face expressions and can be used to\nachieve state-of-the-art performance in facial attribute prediction." - pub_date: { - seconds: 1475366400 - } - authors: "Xianxu Hou" - authors: "Linlin Shen" - authors: "Ke Sun" - authors: "Guoping Qiu" - repositories: { - url: "https://github.com/svenrdz/DFC-VAE" - framework: FRAMEWORK_PYTORCH - number_of_stars: 11 - description: "Deep Feature Consistent Variational AutoEncoder (Pytorch)" - } - repositories: { - url: "https://github.com/nmichlo/disent" - framework: FRAMEWORK_PYTORCH - number_of_stars: 21 - description: "🧶 Modular VAE Disentanglement Framework built with PyTorch Lightning. Optionally configured and run with Hydra Config." - } - repositories: { - url: "https://github.com/bhpfelix/Variational-Autoencoder-PyTorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 144 - description: "Variational Autoencoder implemented with PyTorch, Trained over CelebA Dataset" - } - repositories: { - url: "https://github.com/UdbhavPrasad072300/Generate-Fake-Faces-with-CVAE-in-PyTorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Making fake faces with a Convolutional Variational Autoencoder in PyTorch with celebA dataset" - } - repositories: { - url: "https://github.com/bogedy/intro_dfc" - framework: FRAMEWORK_TENSORFLOW - description: "Introspective Deep Feature Consistent Variational Autoencoder" - } - repositories: { - url: "https://github.com/peria1/VAEconvMNIST" - framework: FRAMEWORK_PYTORCH - description: "Basic Pytorch VAE adapted to use conv2d on MNIST" - } - repositories: { - url: "https://github.com/Nanway/dfc-vae" - framework: FRAMEWORK_PYTORCH - description: "I turned my friends into dogs and made computer generated images of them with this deep feature consistent variational autoencoder" - } - repositories: { - url: "https://github.com/inkplatform/beta-vae" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/vinoth654321/Beta-Vae-face-dataset" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/matthew-liu/beta-vae" - framework: FRAMEWORK_PYTORCH - number_of_stars: 30 - description: "A Pytorch Implementation of the Beta-VAE" - } - methods: { - name: "VAE" - full_name: "Variational Autoencoder" - description: "A **Variational Autoencoder** is a type of likelihood-based generative model. It consists of an encoder, that takes in data $x$ as input and transforms this into a latent representation $z$, and a decoder, that takes a latent representation $z$ and returns a reconstruction $\\hat{x}$. Inference is performed via variational inference to approximate the posterior of the model." - } - methods: { - name: "AutoEncoder" - full_name: "AutoEncoder" - description: "An **Autoencoder** is a bottleneck architecture that turns a high-dimensional input into a latent low-dimensional code (encoder), and then performs a reconstruction of the input with this latent code (the decoder).\r\n\r\nImage: [Michael Massi](https://en.wikipedia.org/wiki/Autoencoder#/media/File:Autoencoder_schema.png)" - } - } - video: { - video_id: "FfBp6xJqZVA" - video_title: "PR-101: Deep Feature Consistent Variational Autoencoder" - number_of_likes: 34 - number_of_views: 9164 - published_date: { - seconds: 1536508427 - } - uploader: "MinGuk Kang" - } - } -} -pr_id_to_video: { - key: 102 - value: { - papers: { - paper_id: "everybody-dance-now" - title: "Everybody Dance Now" - arxiv_id: "1808.07371" - abstract: "This paper presents a simple method for \"do as I do\" motion transfer: given a source video of a person dancing, we can transfer that performance to a novel (amateur) target after only a few minutes of the target subject performing standard moves. We approach this problem as video-to-video translation using pose as an intermediate representation. To transfer the motion, we extract poses from the source subject and apply the learned pose-to-appearance mapping to generate the target subject. We predict two consecutive frames for temporally coherent video results and introduce a separate pipeline for realistic face synthesis. Although our method is quite simple, it produces surprisingly compelling results (see video). This motivates us to also provide a forensics tool for reliable synthetic content detection, which is able to distinguish videos synthesized by our system from real data. In addition, we release a first-of-its-kind open-source dataset of videos that can be legally used for training and motion transfer." - pub_date: { - seconds: 1534896000 - } - authors: "Caroline Chan" - authors: "Shiry Ginosar" - authors: "Tinghui Zhou" - authors: "Alexei A. Efros" - repositories: { - url: "https://github.com/justinjohn0306/EverybodyDanceNow-Colab" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "Motion Retargeting Video Subjects, Modified Colab Version by Justin John" - } - repositories: { - url: "https://github.com/j-void/ISL_v2v" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/rajatsahay/Pose2Pose" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Research Paper Implementation" - } - repositories: { - url: "https://github.com/martin220485/everybody_dance_now_pytorch" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/CNC-IISER-BHOPAL/Any-Body-Can-Dance" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - } - repositories: { - url: "https://github.com/aman-arya/Any-Body-Can-Dance" - framework: FRAMEWORK_PYTORCH - } - repositories: { - is_official: true - url: "https://github.com/carolineec/EverybodyDanceNow" - framework: FRAMEWORK_PYTORCH - number_of_stars: 411 - description: "Motion Retargeting Video Subjects" - } - repositories: { - url: "https://github.com/Lotayou/everybody_dance_now_pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 240 - description: "A PyTorch Implementation of \"Everybody Dance Now\" from Berkeley AI lab." - } - repositories: { - url: "https://github.com/wjy5446/pytorch-everybody-dance-now" - framework: FRAMEWORK_PYTORCH - number_of_stars: 9 - description: ":dancer: Dance Now !!!" - } - repositories: { - url: "https://github.com/dakenan1/Everybody-Dance-Now" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "Implementation of Everybody dance now via tensorflow" - } - } - video: { - video_id: "_onRnCb_h3Q" - video_title: "PR-102: Everybody Dance Now" - number_of_views: 1367 - published_date: { - seconds: 1536505303 - } - uploader: "Byung-Hak Kim" - } - } -} -pr_id_to_video: { - key: 103 - value: { - papers: { - paper_id: "an-analysis-of-the-t-sne-algorithm-for-data" - title: "An Analysis of the t-SNE Algorithm for Data Visualization" - arxiv_id: "1803.01768" - abstract: "A first line of attack in exploratory data analysis is data visualization,\ni.e., generating a 2-dimensional representation of data that makes clusters of\nsimilar points visually identifiable. Standard Johnson-Lindenstrauss\ndimensionality reduction does not produce data visualizations. The t-SNE\nheuristic of van der Maaten and Hinton, which is based on non-convex\noptimization, has become the de facto standard for visualization in a wide\nrange of applications.\n This work gives a formal framework for the problem of data visualization -\nfinding a 2-dimensional embedding of clusterable data that correctly separates\nindividual clusters to make them visually identifiable. We then give a rigorous\nanalysis of the performance of t-SNE under a natural, deterministic condition\non the \"ground-truth\" clusters (similar to conditions assumed in earlier\nanalyses of clustering) in the underlying data. These are the first provable\nguarantees on t-SNE for constructing good data visualizations.\n We show that our deterministic condition is satisfied by considerably general\nprobabilistic generative models for clusterable data such as mixtures of\nwell-separated log-concave distributions. Finally, we give theoretical evidence\nthat t-SNE provably succeeds in partially recovering cluster structure even\nwhen the above deterministic condition is not met." - pub_date: { - seconds: 1520208000 - } - authors: "Sanjeev Arora" - authors: "Wei Hu" - authors: "Pravesh K. Kothari" - methods: { - name: "LINE" - full_name: "Large-scale Information Network Embedding" - description: "LINE is a novel network embedding method which is suitable for arbitrary types of information networks: undirected, directed, and/or weighted. The method optimizes a carefully designed objective function that preserves both the local and global network structures.\r\n\r\nSource: [Tang et al.](https://arxiv.org/pdf/1503.03578v1.pdf)\r\n\r\nImage source: [Tang et al.](https://arxiv.org/pdf/1503.03578v1.pdf)" - } - } - video: { - video_id: "zpJwm7f7EXs" - video_title: "PR-103: Visualizing Data using t-SNE" - number_of_likes: 36 - number_of_views: 2872 - published_date: { - seconds: 1537108725 - } - uploader: "Taeoh Kim" - } - } -} -pr_id_to_video: { - key: 104 - value: { - papers: { - paper_id: "video-to-video-synthesis" - title: "Video-to-Video Synthesis" - arxiv_id: "1808.06601" - abstract: "We study the problem of video-to-video synthesis, whose goal is to learn a\nmapping function from an input source video (e.g., a sequence of semantic\nsegmentation masks) to an output photorealistic video that precisely depicts\nthe content of the source video. While its image counterpart, the\nimage-to-image synthesis problem, is a popular topic, the video-to-video\nsynthesis problem is less explored in the literature. Without understanding\ntemporal dynamics, directly applying existing image synthesis approaches to an\ninput video often results in temporally incoherent videos of low visual\nquality. In this paper, we propose a novel video-to-video synthesis approach\nunder the generative adversarial learning framework. Through carefully-designed\ngenerator and discriminator architectures, coupled with a spatio-temporal\nadversarial objective, we achieve high-resolution, photorealistic, temporally\ncoherent video results on a diverse set of input formats including segmentation\nmasks, sketches, and poses. Experiments on multiple benchmarks show the\nadvantage of our method compared to strong baselines. In particular, our model\nis capable of synthesizing 2K resolution videos of street scenes up to 30\nseconds long, which significantly advances the state-of-the-art of video\nsynthesis. Finally, we apply our approach to future video prediction,\noutperforming several state-of-the-art competing systems." - pub_date: { - seconds: 1534723200 - } - authors: "Ting-Chun Wang" - authors: "Ming-Yu Liu" - authors: "Jun-Yan Zhu" - authors: "Guilin Liu" - authors: "Andrew Tao" - authors: "Jan Kautz" - authors: "Bryan Catanzaro" - repositories: { - url: "https://github.com/play166/vid2vid" - framework: FRAMEWORK_PYTORCH - description: "make myself for building successful" - } - repositories: { - url: "https://github.com/MadRabbit-jt/vid2vid" - framework: FRAMEWORK_PYTORCH - description: "make myself for building successful" - } - repositories: { - url: "https://github.com/divyanshpuri02/divyansh.github.io" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/divyanshpuri02/Nvidia" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/BUTIYO/vid2vid-test" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/Sjunna9819/My-First-Project" - framework: FRAMEWORK_PYTORCH - } - repositories: { - is_official: true - url: "https://github.com/NVIDIA/vid2vid" - framework: FRAMEWORK_PYTORCH - number_of_stars: 7747 - description: "Pytorch implementation of our method for high-resolution (e.g. 2048x1024) photorealistic video-to-video translation." - } - repositories: { - url: "https://github.com/eric-erki/vid2vid" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "Pytorch implementation of our method for high-resolution (e.g. 2048x1024) photorealistic video-to-video translation." - } - repositories: { - url: "https://github.com/freedombenLiu/vid2vid" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/yawayo/vid2vid" - framework: FRAMEWORK_PYTORCH - } - } - video: { - video_id: "WxeeqxqnRyE" - video_title: "PR-104: Video-to-Video synthesis" - number_of_likes: 16 - number_of_views: 1607 - published_date: { - seconds: 1537107746 - } - uploader: "taesu" - } - } -} -pr_id_to_video: { - key: 105 - value: { - papers: { - paper_id: "mnasnet-platform-aware-neural-architecture" - title: "MnasNet: Platform-Aware Neural Architecture Search for Mobile" - arxiv_id: "1807.11626" - abstract: "Designing convolutional neural networks (CNN) for mobile devices is challenging because mobile models need to be small and fast, yet still accurate. Although significant efforts have been dedicated to design and improve mobile CNNs on all dimensions, it is very difficult to manually balance these trade-offs when there are so many architectural possibilities to consider. In this paper, we propose an automated mobile neural architecture search (MNAS) approach, which explicitly incorporate model latency into the main objective so that the search can identify a model that achieves a good trade-off between accuracy and latency. Unlike previous work, where latency is considered via another, often inaccurate proxy (e.g., FLOPS), our approach directly measures real-world inference latency by executing the model on mobile phones. To further strike the right balance between flexibility and search space size, we propose a novel factorized hierarchical search space that encourages layer diversity throughout the network. Experimental results show that our approach consistently outperforms state-of-the-art mobile CNN models across multiple vision tasks. On the ImageNet classification task, our MnasNet achieves 75.2% top-1 accuracy with 78ms latency on a Pixel phone, which is 1.8x faster than MobileNetV2 [29] with 0.5% higher accuracy and 2.3x faster than NASNet [36] with 1.2% higher accuracy. Our MnasNet also achieves better mAP quality than MobileNets for COCO object detection. Code is at https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet" - pub_date: { - seconds: 1532995200 - } - authors: "Mingxing Tan" - authors: "Bo Chen" - authors: "Ruoming Pang" - authors: "Vijay Vasudevan" - authors: "Mark Sandler" - authors: "Andrew Howard" - authors: "Quoc V. Le" - repositories: { - url: "https://github.com/osmr/imgclsmob" - framework: FRAMEWORK_OTHERS - number_of_stars: 2200 - description: "Sandbox for training deep learning networks" - } - repositories: { - is_official: true - url: "https://github.com/tensorflow/tpu" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4338 - description: "Reference models and tools for Cloud TPUs." - } - repositories: { - url: "https://github.com/abhoi/Keras-MnasNet" - framework: FRAMEWORK_OTHERS - number_of_stars: 8 - description: "A Keras implementation of MnasNet" - } - repositories: { - url: "https://github.com/PotatoSpudowski/CactiNet" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Pytorch Implementation of a CNN similar to Google Brain's new EfficientNet from scratch to identify images of cactus🌵" - } - repositories: { - url: "https://github.com/mingxingtan/mnasnet" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 26 - description: "MnasNet snapshot" - } - repositories: { - url: "https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4338 - description: "Reference models and tools for Cloud TPUs." - } - repositories: { - url: "https://github.com/nsarang/MnasNet" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5 - description: "A TensorFlow 2.0 implementation of MnasNet: Platform-Aware Neural Architecture Search for Mobile." - } - repositories: { - url: "https://github.com/cgebbe/kaggle_pku-autonomous-driving" - framework: FRAMEWORK_PYTORCH - description: "Code for kaggle competition https://www.kaggle.com/c/pku-autonomous-driving" - } - repositories: { - url: "https://github.com/rwightman/gen-efficientnet-pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1342 - description: "Pretrained EfficientNet, EfficientNet-Lite, MixNet, MobileNetV3 / V2, MNASNet A1 and B1, FBNet, Single-Path NAS" - } - repositories: { - url: "https://github.com/azamatkhid/mnasnet-pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "Pytorch implementation of MnasNet-A1 & MnasNet-B1" - } - methods: { - name: "Average Pooling" - full_name: "Average Pooling" - description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" - } - methods: { - name: "LSTM" - full_name: "Long Short-Term Memory" - description: "An **LSTM** is a type of [recurrent neural network](https://paperswithcode.com/methods/category/recurrent-neural-networks) that addresses the vanishing gradient problem in vanilla RNNs through additional cells, input and output gates. Intuitively, vanishing gradients are solved through additional *additive* components, and forget gate activations, that allow the gradients to flow through the network without vanishing as quickly.\r\n\r\n(Image Source [here](https://medium.com/datadriveninvestor/how-do-lstm-networks-solve-the-problem-of-vanishing-gradients-a6784971a577))\r\n\r\n(Introduced by Hochreiter and Schmidhuber)" - } - methods: { - name: "MnasNet" - full_name: "MnasNet" - description: "**MnasNet** is a type of convolutional neural network optimized for mobile devices that is discovered through mobile neural architecture search, which explicitly incorporates model latency into the main objective so that the search can identify a model that achieves a good trade-off between accuracy and latency. The main building block is an inverted residual block (from [MobileNetV2](https://paperswithcode.com/method/mobilenetv2))." - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." - } - methods: { - name: "Random Horizontal Flip" - full_name: "Random Horizontal Flip" - description: "**RandomHorizontalFlip** is a type of image data augmentation which horizontally flips a given image with a given probability.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" - } - methods: { - name: "Depthwise Separable Convolution" - full_name: "Depthwise Separable Convolution" - description: "While [standard convolution](https://paperswithcode.com/method/convolution) performs the channelwise and spatial-wise computation in one step, **Depthwise Separable Convolution** splits the computation into two steps: depthwise convolution applies a single convolutional filter per each input channel and pointwise convolution is used to create a linear combination of the output of the depthwise convolution. The comparison of standard convolution and depthwise separable convolution is shown to the right.\r\n\r\nCredit: [Depthwise Convolution Is All You Need for Learning Multiple Visual Domains](https://paperswithcode.com/paper/depthwise-convolution-is-all-you-need-for)" - } - methods: { - name: "RMSProp" - full_name: "RMSProp" - description: "**RMSProp** is an unpublished adaptive learning rate optimizer [proposed by Geoff Hinton](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf). The motivation is that the magnitude of gradients can differ for different weights, and can change during learning, making it hard to choose a single global learning rate. RMSProp tackles this by keeping a moving average of the squared gradient and adjusting the weight updates by this magnitude. The gradient updates are performed as:\r\n\r\n$$E\\left[g^{2}\\right]\\_{t} = \\gamma E\\left[g^{2}\\right]\\_{t-1} + \\left(1 - \\gamma\\right) g^{2}\\_{t}$$\r\n\r\n$$\\theta\\_{t+1} = \\theta\\_{t} - \\frac{\\eta}{\\sqrt{E\\left[g^{2}\\right]\\_{t} + \\epsilon}}g\\_{t}$$\r\n\r\nHinton suggests $\\gamma=0.9$, with a good default for $\\eta$ as $0.001$.\r\n\r\nImage: [Alec Radford](https://twitter.com/alecrad)" - } - methods: { - name: "Depthwise Convolution" - full_name: "Depthwise Convolution" - description: "**Depthwise Convolution** is a type of convolution where we apply a single convolutional filter for each input channel. In the regular 2D [convolution](https://paperswithcode.com/method/convolution) performed over multiple input channels, the filter is as deep as the input and lets us freely mix channels to generate each element in the output. In contrast, depthwise convolutions keep each channel separate. To summarize the steps, we:\r\n\r\n1. Split the input and filter into channels.\r\n2. We convolve each input with the respective filter.\r\n3. We stack the convolved outputs together.\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" - } - methods: { - name: "Tanh Activation" - full_name: "Tanh Activation" - description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" - } - } - video: { - video_id: "4uDZxefPd-I" - video_title: "PR-105: MnasNet: Platform-Aware Neural Architecture Search for Mobile" - number_of_likes: 23 - number_of_views: 1974 - published_date: { - seconds: 1538623331 - } - uploader: "Taekmin Kim" - } - } -} -pr_id_to_video: { - key: 106 - value: { - papers: { - paper_id: "learning-to-explain-an-information-theoretic" - title: "Learning to Explain: An Information-Theoretic Perspective on Model Interpretation" - arxiv_id: "1802.07814" - abstract: "We introduce instancewise feature selection as a methodology for model\ninterpretation. Our method is based on learning a function to extract a subset\nof features that are most informative for each given example. This feature\nselector is trained to maximize the mutual information between selected\nfeatures and the response variable, where the conditional distribution of the\nresponse variable given the input is the model to be explained. We develop an\nefficient variational approximation to the mutual information, and show the\neffectiveness of our method on a variety of synthetic and real data sets using\nboth quantitative metrics and human evaluation." - pub_date: { - seconds: 1519171200 - } - authors: "Jianbo Chen" - authors: "Le Song" - authors: "Martin J. Wainwright" - authors: "Michael I. Jordan" - repositories: { - url: "https://github.com/vikua/l2x" - framework: FRAMEWORK_TENSORFLOW - description: "Experiments for implementation of \"Learning to Explain\" paper: https://arxiv.org/abs/1802.07814" - } - repositories: { - is_official: true - url: "https://github.com/Jianbo-Lab/L2X" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 89 - } - } - video: { - video_id: "id_CmUaTWpg" - video_title: "PR-106: Learning to Explain: An Information-Theoretic Perspective on Model Interpretation" - number_of_likes: 11 - number_of_views: 1138 - published_date: { - seconds: 1538321661 - } - uploader: "Jinsung Yoon" - } - } -} -pr_id_to_video: { - key: 107 - value: { - papers: { - paper_id: "image-inpainting-for-irregular-holes-using" - title: "Image Inpainting for Irregular Holes Using Partial Convolutions" - arxiv_id: "1804.07723" - abstract: "Existing deep learning based image inpainting methods use a standard\nconvolutional network over the corrupted image, using convolutional filter\nresponses conditioned on both valid pixels as well as the substitute values in\nthe masked holes (typically the mean value). This often leads to artifacts such\nas color discrepancy and blurriness. Post-processing is usually used to reduce\nsuch artifacts, but are expensive and may fail. We propose the use of partial\nconvolutions, where the convolution is masked and renormalized to be\nconditioned on only valid pixels. We further include a mechanism to\nautomatically generate an updated mask for the next layer as part of the\nforward pass. Our model outperforms other methods for irregular masks. We show\nqualitative and quantitative comparisons with other methods to validate our\napproach." - pub_date: { - seconds: 1524182400 - } - authors: "Guilin Liu" - authors: "Fitsum A. Reda" - authors: "Kevin J. Shih" - authors: "Ting-Chun Wang" - authors: "Andrew Tao" - authors: "Bryan Catanzaro" - repositories: { - url: "https://github.com/feixuetuba/inpating" - framework: FRAMEWORK_PYTORCH - description: "复现Image Inpainting for Irregular Holes Using Partial Convolutions" - } - repositories: { - url: "https://github.com/jshi31/edge-connect" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/ayulockin/deepimageinpainting" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 15 - description: "Deep Image Inpainting using UNET like Vanilla Autoencoder and Partial Convolution based Autoencoder. " - } - repositories: { - url: "https://github.com/KPMG-wiseuniv/AI" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "for AI" - } - repositories: { - url: "https://github.com/hiyaroy12/DFT_inpainting" - framework: FRAMEWORK_PYTORCH - number_of_stars: 11 - description: "Image inpainting using frequency domain priors" - } - repositories: { - url: "https://github.com/yashk2000/SneakySketchers" - framework: FRAMEWORK_PYTORCH - number_of_stars: 14 - description: "A python desktop application that allows you to do image inpainting by directly drawing on it. " - } - repositories: { - url: "https://github.com/preeti-2810/object-removal" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/Maouriyan/inpainting_demo" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/chefpr7/Image-Inpainting-using-Partial-Convolutional-Layers" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/waallf/edge-connect-master" - framework: FRAMEWORK_PYTORCH - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - } - video: { - video_id: "BhZN6AqfylA" - video_title: "PR-107: Image Inpainting for Irregular Holes Using Partial Convolutions" - number_of_likes: 26 - number_of_views: 2714 - published_date: { - seconds: 1539060135 - } - uploader: "정지원" - } - } -} -pr_id_to_video: { - key: 108 - value: { - papers: { - paper_id: "mobilenetv2-inverted-residuals-and-linear" - title: "MobileNetV2: Inverted Residuals and Linear Bottlenecks" - arxiv_id: "1801.04381" - abstract: "In this paper we describe a new mobile architecture, MobileNetV2, that\nimproves the state of the art performance of mobile models on multiple tasks\nand benchmarks as well as across a spectrum of different model sizes. We also\ndescribe efficient ways of applying these mobile models to object detection in\na novel framework we call SSDLite. Additionally, we demonstrate how to build\nmobile semantic segmentation models through a reduced form of DeepLabv3 which\nwe call Mobile DeepLabv3.\n The MobileNetV2 architecture is based on an inverted residual structure where\nthe input and output of the residual block are thin bottleneck layers opposite\nto traditional residual models which use expanded representations in the input\nan MobileNetV2 uses lightweight depthwise convolutions to filter features in\nthe intermediate expansion layer. Additionally, we find that it is important to\nremove non-linearities in the narrow layers in order to maintain\nrepresentational power. We demonstrate that this improves performance and\nprovide an intuition that led to this design. Finally, our approach allows\ndecoupling of the input/output domains from the expressiveness of the\ntransformation, which provides a convenient framework for further analysis. We\nmeasure our performance on Imagenet classification, COCO object detection, VOC\nimage segmentation. We evaluate the trade-offs between accuracy, and number of\noperations measured by multiply-adds (MAdd), as well as the number of\nparameters" - pub_date: { - seconds: 1515801600 - } - authors: "Mark Sandler" - authors: "Andrew Howard" - authors: "Menglong Zhu" - authors: "Andrey Zhmoginov" - authors: "Liang-Chieh Chen" - repositories: { - url: "https://github.com/espressif/esp-who" - framework: FRAMEWORK_OTHERS - number_of_stars: 1057 - description: "Face detection and recognition framework" - } - repositories: { - url: "https://github.com/Gideon0805/Tensorflow_Model_Pruning" - framework: FRAMEWORK_TENSORFLOW - description: "Pruning for TF1.5" - } - repositories: { - url: "https://github.com/akrapukhin/MobileNetV3" - framework: FRAMEWORK_PYTORCH - description: "An implementation of the MobileNetV3 models in Pytorch with scripts for training, testing and measuring latency." - } - repositories: { - url: "https://github.com/tensorflow/models/tree/master/research/deeplab" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70334 - description: "Models and examples built with TensorFlow" - } - repositories: { - url: "https://github.com/pytorch/vision" - framework: FRAMEWORK_PYTORCH - number_of_stars: 9293 - description: "Datasets, Transforms and Models specific to Computer Vision" - } - repositories: { - url: "https://github.com/stevensmiley1989/MrRobot" - framework: FRAMEWORK_TENSORFLOW - description: "This is a robot I designed in Fusion 360 and 3D printed with my FlashForge Creator Pro in PLA, Main Hardware: 1 x Raspberry Pi 3b, 3 x Arduinos with I2C, 5 x ultrasonic sensors, 4 x 60Kg Servos, 4 x 12V 200rpm DC motors, 1 x stepper motor for loading ammo into custom built coil gun. The coil gun uses 2 x 450V 1000uF Capacitors in parallel with a boost converter, yielding 380V maximum charge discharge from a 12V input, firing with a 1.2kV maximum peak non-repetitive surge current 1.1kA rated Thyristor SCR, Main Software: Uses TensorFlow and Python for Object Detection with some C++ for motor controls. The model used is a retrained Single Shot Detection MobileNet V2 algorithm trained on a toy reindeer. Signal processing allows proportional controller feedback to adjust movement of the robot for moving, aiming, and shooting. An application for IOS was written in Swift to control the robot as well, using Mosquito MQTT Broker for communication. " - } - repositories: { - url: "https://github.com/d-li14/mobilenetv2.pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 395 - description: "72.8% MobileNetV2 1.0 model on ImageNet and a spectrum of pre-trained MobileNetV2 models" - } - repositories: { - url: "https://github.com/lpirola13/flower-recognizer" - framework: FRAMEWORK_TENSORFLOW - description: "This project aims to create a deep learning model suitable in a mobile context that can recognize flowers from images." - } - repositories: { - url: "https://github.com/clairehester/face-mask-detector" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "GA DSI Capstone project - Face Mask Detection using Computer Vision and Machine Learning" - } - repositories: { - url: "https://github.com/tensorflow/models/tree/master/research/slim" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70334 - description: "Models and examples built with TensorFlow" - } - methods: { - name: "DeepLabv3" - full_name: "DeepLabv3" - description: "**DeepLabv3** is a semantic segmentation architecture that improves upon DeepLabv2 with several modifications. To handle the problem of segmenting objects at multiple scales, modules are designed which employ atrous convolution in cascade or in parallel to capture multi-scale context by adopting multiple atrous rates. Furthermore, the Atrous Spatial Pyramid Pooling module from DeepLabv2 augmented with image-level features encoding global context and further boost performance. \r\n\r\nThe changes to the ASSP module are that the authors apply global average pooling on the last feature map of the model, feed the resulting image-level features to a 1 × 1 convolution with 256 filters (and batch normalization), and then bilinearly upsample the feature to the desired spatial dimension. In the\r\nend, the improved ASPP consists of (a) one 1×1 convolution and three 3 × 3 convolutions with rates = (6, 12, 18) when output stride = 16 (all with 256 filters and batch normalization), and (b) the image-level features.\r\n\r\nAnother interesting difference is that DenseCRF post-processing from DeepLabv2 is no longer needed." - } - methods: { - name: "Dilated Convolution" - full_name: "Dilated Convolution" - description: "**Dilated Convolutions** are a type of convolution that “inflate” the kernel by inserting holes between the kernel elements. An additional parameter $l$ (dilation rate) indicates how much the kernel is widened. There are usually $l-1$ spaces inserted between kernel elements. \r\n\r\nNote that concept has existed in past literature under different names, for instance the *algorithme a trous*, an algorithm for wavelet decomposition (Holschneider et al., 1987; Shensa, 1992)." - } - methods: { - name: "ReLU6" - full_name: "ReLU6" - description: "**ReLU6** is a modification of the [rectified linear unit](https://paperswithcode.com/method/relu) where we limit the activation to a maximum size of $6$. This is due to increased robustness when used with low-precision computation.\r\n\r\nImage Credit: [PyTorch](https://pytorch.org/docs/master/generated/torch.nn.ReLU6.html)" - } - methods: { - name: "Average Pooling" - full_name: "Average Pooling" - description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" - } - methods: { - name: "RMSProp" - full_name: "RMSProp" - description: "**RMSProp** is an unpublished adaptive learning rate optimizer [proposed by Geoff Hinton](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf). The motivation is that the magnitude of gradients can differ for different weights, and can change during learning, making it hard to choose a single global learning rate. RMSProp tackles this by keeping a moving average of the squared gradient and adjusting the weight updates by this magnitude. The gradient updates are performed as:\r\n\r\n$$E\\left[g^{2}\\right]\\_{t} = \\gamma E\\left[g^{2}\\right]\\_{t-1} + \\left(1 - \\gamma\\right) g^{2}\\_{t}$$\r\n\r\n$$\\theta\\_{t+1} = \\theta\\_{t} - \\frac{\\eta}{\\sqrt{E\\left[g^{2}\\right]\\_{t} + \\epsilon}}g\\_{t}$$\r\n\r\nHinton suggests $\\gamma=0.9$, with a good default for $\\eta$ as $0.001$.\r\n\r\nImage: [Alec Radford](https://twitter.com/alecrad)" - } - methods: { - name: "Depthwise Separable Convolution" - full_name: "Depthwise Separable Convolution" - description: "While [standard convolution](https://paperswithcode.com/method/convolution) performs the channelwise and spatial-wise computation in one step, **Depthwise Separable Convolution** splits the computation into two steps: depthwise convolution applies a single convolutional filter per each input channel and pointwise convolution is used to create a linear combination of the output of the depthwise convolution. The comparison of standard convolution and depthwise separable convolution is shown to the right.\r\n\r\nCredit: [Depthwise Convolution Is All You Need for Learning Multiple Visual Domains](https://paperswithcode.com/paper/depthwise-convolution-is-all-you-need-for)" - } - methods: { - name: "ASPP" - full_name: "Atrous Spatial Pyramid Pooling" - description: "**Atrous Spatial Pyramid Pooling (ASSP)** is a semantic segmentation module for resampling a given feature layer at multiple rates prior to convolution. This amounts to probing the original image with multiple filters that have complementary effective fields of view, thus capturing objects as well as useful image context at multiple scales. Rather than actually resampling features, the mapping is implemented using multiple parallel atrous convolutional layers with different sampling rates." - } - methods: { - name: "Depthwise Convolution" - full_name: "Depthwise Convolution" - description: "**Depthwise Convolution** is a type of convolution where we apply a single convolutional filter for each input channel. In the regular 2D [convolution](https://paperswithcode.com/method/convolution) performed over multiple input channels, the filter is as deep as the input and lets us freely mix channels to generate each element in the output. In contrast, depthwise convolutions keep each channel separate. To summarize the steps, we:\r\n\r\n1. Split the input and filter into channels.\r\n2. We convolve each input with the respective filter.\r\n3. We stack the convolved outputs together.\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" - } - methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." - } - methods: { - name: "Pointwise Convolution" - full_name: "Pointwise Convolution" - description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" - } - } - video: { - video_id: "mT5Y-Zumbbw" - video_title: "PR-108: MobileNetV2: Inverted Residuals and Linear Bottlenecks" - number_of_likes: 67 - number_of_views: 8201 - published_date: { - seconds: 1540388729 - } - uploader: "JinWon Lee" - } - } -} -pr_id_to_video: { - key: 109 - value: { - papers: { - paper_id: "large-scale-gan-training-for-high-fidelity" - title: "Large Scale GAN Training for High Fidelity Natural Image Synthesis" - arxiv_id: "1809.11096" - abstract: "Despite recent progress in generative image modeling, successfully generating\nhigh-resolution, diverse samples from complex datasets such as ImageNet remains\nan elusive goal. To this end, we train Generative Adversarial Networks at the\nlargest scale yet attempted, and study the instabilities specific to such\nscale. We find that applying orthogonal regularization to the generator renders\nit amenable to a simple \"truncation trick,\" allowing fine control over the\ntrade-off between sample fidelity and variety by reducing the variance of the\nGenerator's input. Our modifications lead to models which set the new state of\nthe art in class-conditional image synthesis. When trained on ImageNet at\n128x128 resolution, our models (BigGANs) achieve an Inception Score (IS) of\n166.5 and Frechet Inception Distance (FID) of 7.4, improving over the previous\nbest IS of 52.52 and FID of 18.6." - pub_date: { - seconds: 1538092800 - } - authors: "Andrew Brock" - authors: "Jeff Donahue" - authors: "Karen Simonyan" - repositories: { - url: "https://github.com/roberttwomey/machine-imagination-workshop" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "text to image notebook with CLIP for workshop on Machine Imagination, Spring 2021" - } - repositories: { - url: "https://github.com/notperquisites/bigsleep" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Personal Big Sleep AI Repo" - } - repositories: { - url: "https://github.com/lucidrains/big-sleep" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1146 - description: "A simple command line tool for text to image generation, using OpenAI's CLIP and a BigGAN. Technique was originally created by https://twitter.com/advadnoun" - } - repositories: { - url: "https://github.com/PacktPublishing/Hands-On-Image-Generation-with-TensorFlow-2.0/tree/master/Chapter08" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 62 - description: "Hands-On Image Generation with TensorFlow 2.0, published by Packt" - } - repositories: { - url: "https://github.com/yaxingwang/DeepI2I" - framework: FRAMEWORK_PYTORCH - number_of_stars: 18 - description: "Image-to-image translation, knowledge transfer" - } - repositories: { - url: "https://github.com/uoguelph-mlrg/instance_selection_for_gans" - framework: FRAMEWORK_PYTORCH - number_of_stars: 27 - description: "Official code repository for Instance Selection for GANs." - } - repositories: { - url: "https://github.com/minyoungg/pix2latent" - framework: FRAMEWORK_PYTORCH - number_of_stars: 151 - description: "Code for: Transforming and Projecting Images into Class-conditional Generative Networks" - } - repositories: { - url: "https://github.com/times2049/talkinghead" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/krisrjohnson/Realistic-Neural-Talking-Head-Models" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/amanjaiswal73892/changemypet" - framework: FRAMEWORK_PYTORCH - description: "Deep Learning Project" - } - methods: { - name: "GAN Hinge Loss" - full_name: "GAN Hinge Loss" - description: "The **GAN Hinge Loss** is a hinge loss based loss function for [generative adversarial networks](https://paperswithcode.com/methods/category/generative-adversarial-networks):\r\n\r\n$$ L\\_{D} = -\\mathbb{E}\\_{\\left(x, y\\right)\\sim{p}\\_{data}}\\left[\\min\\left(0, -1 + D\\left(x, y\\right)\\right)\\right] -\\mathbb{E}\\_{z\\sim{p\\_{z}}, y\\sim{p\\_{data}}}\\left[\\min\\left(0, -1 - D\\left(G\\left(z\\right), y\\right)\\right)\\right] $$\r\n\r\n$$ L\\_{G} = -\\mathbb{E}\\_{z\\sim{p\\_{z}}, y\\sim{p\\_{data}}}D\\left(G\\left(z\\right), y\\right) $$" - } - methods: { - name: "Residual Block" - full_name: "Residual Block" - description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." - } - methods: { - name: "Feedforward Network" - full_name: "Feedforward Network" - description: "A **Feedforward Network**, or a **Multilayer Perceptron (MLP)**, is a neural network with solely densely connected layers. This is the classic neural network architecture of the literature. It consists of inputs $x$ passed through units $h$ (of which there can be many layers) to predict a target $y$. Activation functions are generally chosen to be non-linear to allow for flexible functional approximation.\r\n\r\nImage Source: Deep Learning, Goodfellow et al" - } - methods: { - name: "Spectral Normalization" - full_name: "Spectral Normalization" - description: "**Spectral Normalization** is a normalization technique used for generative adversarial networks, used to stabilize training of the discriminator. Spectral normalization has the convenient property that the Lipschitz constant is the only hyper-parameter to be tuned.\r\n\r\nIt controls the Lipschitz constant of the discriminator $f$ by constraining the spectral norm of each layer $g : \\textbf{h}\\_{in} \\rightarrow \\textbf{h}_{out}$. The Lipschitz norm $\\Vert{g}\\Vert\\_{\\text{Lip}}$ is equal to $\\sup\\_{\\textbf{h}}\\sigma\\left(\\nabla{g}\\left(\\textbf{h}\\right)\\right)$, where $\\sigma\\left(a\\right)$ is the spectral norm of the matrix $A$ ($L\\_{2}$ matrix norm of $A$):\r\n\r\n$$ \\sigma\\left(a\\right) = \\max\\_{\\textbf{h}:\\textbf{h}\\neq{0}}\\frac{\\Vert{A\\textbf{h}}\\Vert\\_{2}}{\\Vert\\textbf{h}\\Vert\\_{2}} = \\max\\_{\\Vert\\textbf{h}\\Vert\\_{2}\\leq{1}}{\\Vert{A\\textbf{h}}\\Vert\\_{2}} $$\r\n\r\nwhich is equivalent to the largest singular value of $A$. Therefore for a linear layer $g\\left(\\textbf{h}\\right) = W\\textbf{h}$ the norm is given by $\\Vert{g}\\Vert\\_{\\text{Lip}} = \\sup\\_{\\textbf{h}}\\sigma\\left(\\nabla{g}\\left(\\textbf{h}\\right)\\right) = \\sup\\_{\\textbf{h}}\\sigma\\left(W\\right) = \\sigma\\left(W\\right) $. Spectral normalization normalizes the spectral norm of the weight matrix $W$ so it satisfies the Lipschitz constraint $\\sigma\\left(W\\right) = 1$:\r\n\r\n$$ \\bar{W}\\_{\\text{SN}}\\left(W\\right) = W / \\sigma\\left(W\\right) $$" - } - methods: { - name: "Truncation Trick" - full_name: "Truncation Trick" - description: "The **Truncation Trick** is a latent sampling procedure for generative adversarial networks, where we sample $z$ from a truncated normal (where values which fall outside a range are resampled to fall inside that range). \r\nThe original implementation was in [Megapixel Size Image Creation with GAN](https://paperswithcode.com/paper/megapixel-size-image-creation-using).\r\nIn [BigGAN](http://paperswithcode.com/method/biggan), the authors find this provides a boost to the Inception Score and FID." - } - methods: { - name: "Off-Diagonal Orthogonal Regularization" - full_name: "Off-Diagonal Orthogonal Regularization" - description: "**Off-Diagonal Orthogonal Regularization** is a modified form of [orthogonal regularization](https://paperswithcode.com/method/orthogonal-regularization) originally used in [BigGAN](https://paperswithcode.com/method/biggan). The original orthogonal regularization is known to be limiting so the authors explore several variants designed to relax the constraint while still imparting the desired smoothness to the models. They opt for a modification where they remove diagonal terms from the regularization, and aim to minimize the pairwise cosine similarity between filters but does not constrain their norm:\r\n\r\n$$ R\\_{\\beta}\\left(W\\right) = \\beta|| W^{T}W \\odot \\left(\\mathbf{1}-I\\right) ||^{2}\\_{F} $$\r\n\r\nwhere $\\mathbf{1}$ denotes a matrix with all elements set to 1. The authors sweep $\\beta$ values and select $10^{−4}$." - } - methods: { - name: "Bottleneck Residual Block" - full_name: "Bottleneck Residual Block" - description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." - } - methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." - } - methods: { - name: "TTUR" - full_name: "Two Time-scale Update Rule" - description: "The **Two Time-scale Update Rule (TTUR)** is an update rule for generative adversarial networks trained with stochastic gradient descent. TTUR has an individual learning rate for both the discriminator and the generator. The main premise is that the discriminator converges to a local minimum when the generator is fixed. If the generator changes slowly enough, then the discriminator still converges, since the generator perturbations are small. Besides ensuring convergence, the performance may also improve since the discriminator must first learn new patterns before they are transferred to the generator. In contrast, a generator which is overly fast, drives the discriminator steadily into new regions without capturing its gathered information." - } - methods: { - name: "Conditional Batch Normalization" - full_name: "Conditional Batch Normalization" - description: "**Conditional Batch Normalization (CBN)** is a class-conditional variant of [batch normalization](https://paperswithcode.com/method/batch-normalization). The key idea is to predict the $\\gamma$ and $\\beta$ of the batch normalization from an embedding - e.g. a language embedding in VQA. CBN enables the linguistic embedding to manipulate entire feature maps by scaling them up or down, negating them, or shutting them off. CBN has also been used in [GANs](https://paperswithcode.com/methods/category/generative-adversarial-networks) to allow class information to affect the batch normalization parameters.\r\n\r\nConsider a single convolutional layer with batch normalization module $\\text{BN}\\left(F\\_{i,c,h,w}|\\gamma\\_{c}, \\beta\\_{c}\\right)$ for which pretrained scalars $\\gamma\\_{c}$ and $\\beta\\_{c}$ are available. We would like to directly predict these affine scaling parameters from, e.g., a language embedding $\\mathbf{e\\_{q}}$. When starting the training procedure, these parameters must be close to the pretrained values to recover the original ResNet model as a poor initialization could significantly deteriorate performance. Unfortunately, it is difficult to initialize a network to output the pretrained $\\gamma$ and $\\beta$. For these reasons, the authors propose to predict a change $\\delta\\beta\\_{c}$ and $\\delta\\gamma\\_{c}$ on the frozen original scalars, for which it is straightforward to initialize a neural network to produce an output with zero-mean and small variance.\r\n\r\nThe authors use a one-hidden-layer MLP to predict these deltas from a question embedding $\\mathbf{e\\_{q}}$ for all feature maps within the layer:\r\n\r\n$$\\Delta\\beta = \\text{MLP}\\left(\\mathbf{e\\_{q}}\\right)$$\r\n\r\n$$\\Delta\\gamma = \\text{MLP}\\left(\\mathbf{e\\_{q}}\\right)$$\r\n\r\nSo, given a feature map with $C$ channels, these MLPs output a vector of size $C$. We then add these predictions to the $\\beta$ and $\\gamma$ parameters:\r\n\r\n$$ \\hat{\\beta}\\_{c} = \\beta\\_{c} + \\Delta\\beta\\_{c} $$\r\n\r\n$$ \\hat{\\gamma}\\_{c} = \\gamma\\_{c} + \\Delta\\gamma\\_{c} $$\r\n\r\nFinally, these updated $\\hat{β}$ and $\\hat{\\gamma}$ are used as parameters for the batch normalization: $\\text{BN}\\left(F\\_{i,c,h,w}|\\hat{\\gamma\\_{c}}, \\hat{\\beta\\_{c}}\\right)$. The authors freeze all ResNet parameters, including $\\gamma$ and $\\beta$, during training. A ResNet consists of\r\nfour stages of computation, each subdivided in several residual blocks. In each block, the authors apply CBN to the three convolutional layers." - } - } - video: { - video_id: "1f0faOeqDQ0" - video_title: "PR-109: Large Scale GAN Training for High Fidelity Natural Image Synthesis" - number_of_likes: 12 - number_of_views: 1205 - published_date: { - seconds: 1539797131 - } - uploader: "Young Seok Kim" - } - } -} -pr_id_to_video: { - key: 110 - value: { - papers: { - paper_id: "an-analysis-of-scale-invariance-in-object-1" - title: "An Analysis of Scale Invariance in Object Detection - SNIP" - arxiv_id: "1711.08189" - abstract: "An analysis of different techniques for recognizing and detecting objects\nunder extreme scale variation is presented. Scale specific and scale invariant\ndesign of detectors are compared by training them with different configurations\nof input data. By evaluating the performance of different network architectures\nfor classifying small objects on ImageNet, we show that CNNs are not robust to\nchanges in scale. Based on this analysis, we propose to train and test\ndetectors on the same scales of an image-pyramid. Since small and large objects\nare difficult to recognize at smaller and larger scales respectively, we\npresent a novel training scheme called Scale Normalization for Image Pyramids\n(SNIP) which selectively back-propagates the gradients of object instances of\ndifferent sizes as a function of the image scale. On the COCO dataset, our\nsingle model performance is 45.7% and an ensemble of 3 networks obtains an mAP\nof 48.3%. We use off-the-shelf ImageNet-1000 pre-trained models and only train\nwith bounding box supervision. Our submission won the Best Student Entry in the\nCOCO 2017 challenge. Code will be made available at\n\\url{http://bit.ly/2yXVg4c}." - pub_date: { - seconds: 1511308800 - } - authors: "Bharat Singh" - authors: "Larry S. Davis" - methods: { - name: "ResNet" - full_name: "Residual Network" - description: "**Residual Networks**, or **ResNets**, learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. Instead of hoping each few stacked layers directly fit a desired underlying mapping, residual nets let these layers fit a residual mapping. They stack [residual blocks](https://paperswithcode.com/method/residual-block) ontop of each other to form network: e.g. a ResNet-50 has fifty layers using these blocks. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}(x)$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}(x):=\\mathcal{H}(x)-x$. The original mapping is recast into $\\mathcal{F}(x)+x$.\r\n\r\nThere is empirical evidence that these types of network are easier to optimize, and can gain accuracy from considerably increased depth." - } - methods: { - name: "DPN" - full_name: "Dual Path Network" - description: "A **Dual Path Network (DPN)** is a convolutional neural network which presents a new topology of connection paths internally. The intuition is that [ResNets](https://paperswithcode.com/method/resnet) enables feature re-usage while DenseNet enables new feature exploration, and both are important for learning good representations. To enjoy the benefits from both path topologies, Dual Path Networks share common features while maintaining the flexibility to explore new features through dual path architectures. \r\n\r\nWe formulate such a dual path architecture as follows:\r\n\r\n$$x^{k} = \\sum\\limits\\_{t=1}^{k-1} f\\_t^{k}(h^t) \\text{,} $$\r\n\r\n$$\r\ny^{k} = \\sum\\limits\\_{t=1}^{k-1} v\\_t(h^t) = y^{k-1} + \\phi^{k-1}(y^{k-1}) \\text{,} \\\\\\\\\r\n$$\r\n\r\n$$\r\nr^{k} = x^{k} + y^{k} \\text{,} \\\\\\\\\r\n$$\r\n\r\n$$\r\nh^k = g^k \\left( r^{k} \\right) \\text{,}\r\n$$\r\n\r\nwhere $x^{k}$ and $y^{k}$ denote the extracted information at $k$-th step from individual path, $v_t(\\cdot)$ is a feature learning function as $f_t^k(\\cdot)$. The first equation refers to the densely connected path that enables exploring new features. The second equation refers to the residual path that enables common features re-usage. The third equation defines the dual path that integrates them and feeds them to the last transformation function in the last equation." - } - methods: { - name: "RPN" - full_name: "Region Proposal Network" - description: "A **Region Proposal Network**, or **RPN**, is a fully convolutional network that simultaneously predicts object bounds and objectness scores at each position. The RPN is trained end-to-end to generate high-quality region proposals. RPN and algorithms like Fast R-CNN can be merged into a single network by sharing their convolutional features - using the recently popular terminology of neural networks with attention mechanisms, the RPN component tells the unified network where to look.\r\n\r\nRPNs are designed to efficiently predict region proposals with a wide range of scales and aspect ratios. RPNs use anchor boxes that serve as references at multiple scales and aspect ratios. The scheme can be thought of as a pyramid of regression references, which avoids enumerating images or filters of multiple scales or aspect ratios." - } - methods: { - name: "Average Pooling" - full_name: "Average Pooling" - description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." - } - methods: { - name: "Concatenated Skip Connection" - full_name: "Concatenated Skip Connection" - description: "A **Concatenated Skip Connection** is a type of skip connection that seeks to reuse features by concatenating them to new layers, allowing more information to be retained from previous layers of the network. This contrasts with say, residual connections, where element-wise summation is used instead to incorporate information from previous layers. This type of skip connection is prominently used in DenseNets (and also Inception networks), which the Figure to the right illustrates." - } - methods: { - name: "Position-Sensitive RoI Pooling" - full_name: "Position-Sensitive RoI Pooling" - description: "**Position-Sensitive RoI Pooling layer** aggregates the outputs of the last convolutional layer and generates scores for each RoI. Unlike [RoI Pooling](https://paperswithcode.com/method/roi-pooling), PS RoI Pooling conducts selective pooling, and each of the $k$ × $k$ bin aggregates responses from only one score map out of the bank of $k$ × $k$ score maps. With end-to-end training, this RoI layer shepherds the last convolutional layer to learn specialized position-sensitive score maps." - } - methods: { - name: "Deformable Convolution" - full_name: "Deformable Convolution" - description: "**Deformable convolutions** add 2D offsets to the regular grid sampling locations in the standard convolution. It enables free form deformation of the sampling grid. The offsets are learned from the preceding feature maps, via additional convolutional layers. Thus, the deformation is conditioned on the input features in a local, dense, and adaptive manner." - } - methods: { - name: "Grouped Convolution" - full_name: "Grouped Convolution" - description: "A **Grouped Convolution** uses a group of convolutions - multiple kernels per layer - resulting in multiple channel outputs per layer. This leads to wider networks helping a network learn a varied set of low level and high level features. The original motivation of using Grouped Convolutions in [AlexNet](https://paperswithcode.com/method/alexnet) was to distribute the model over multiple GPUs as an engineering compromise. But later, with models such as [ResNeXt](https://paperswithcode.com/method/alexnet), it was shown this module could be used to improve classification accuracy. Specifically by exposing a new dimension through grouped convolutions, *cardinality* (the size of set of transformations), we can increase accuracy by increasing it." - } - } - video: { - video_id: "nimHWHxjBJ8" - video_title: "PR-110: An Analysis of Scale Invariance in Object Detection – SNIP" - number_of_likes: 14 - number_of_views: 1225 - published_date: { - seconds: 1540590955 - } - uploader: "visionNoob" - } - } -} -pr_id_to_video: { - key: 111 - value: { - papers: { - paper_id: "eva2-exploiting-temporal-redundancy-in-live" - title: "EVA$^2$: Exploiting Temporal Redundancy in Live Computer Vision" - arxiv_id: "1803.06312" - abstract: "Hardware support for deep convolutional neural networks (CNNs) is critical to\nadvanced computer vision in mobile and embedded devices. Current designs,\nhowever, accelerate generic CNNs; they do not exploit the unique\ncharacteristics of real-time vision. We propose to use the temporal redundancy\nin natural video to avoid unnecessary computation on most frames. A new\nalgorithm, activation motion compensation, detects changes in the visual input\nand incrementally updates a previously-computed output. The technique takes\ninspiration from video compression and applies well-known motion estimation\ntechniques to adapt to visual changes. We use an adaptive key frame rate to\ncontrol the trade-off between efficiency and vision quality as the input\nchanges. We implement the technique in hardware as an extension to existing\nstate-of-the-art CNN accelerator designs. The new unit reduces the average\nenergy per frame by 54.2%, 61.7%, and 87.6% for three CNNs with less than 1%\nloss in vision accuracy." - pub_date: { - seconds: 1521158400 - } - authors: "Mark Buckler" - authors: "Philip Bedoukian" - authors: "Suren Jayasuriya" - authors: "Adrian Sampson" - } - video: { - video_id: "uwRz7PjVtB0" - video_title: "PR-111: EVA2:Exploiting Temporal Redundancy in Live Computer Vision" - number_of_likes: 23 - number_of_views: 798 - published_date: { - seconds: 1540137553 - } - uploader: "Soyeon Kim" - } - } -} -pr_id_to_video: { - key: 112 - value: { - papers: { - paper_id: "a-tutorial-on-independent-component-analysis" - title: "A Tutorial on Independent Component Analysis" - arxiv_id: "1404.2986" - abstract: "Independent component analysis (ICA) has become a standard data analysis\ntechnique applied to an array of problems in signal processing and machine\nlearning. This tutorial provides an introduction to ICA based on linear algebra\nformulating an intuition for ICA from first principles. The goal of this\ntutorial is to provide a solid foundation on this advanced topic so that one\nmight learn the motivation behind ICA, learn why and when to apply this\ntechnique and in the process gain an introduction to this exciting field of\nactive research." - pub_date: { - seconds: 1397174400 - } - authors: "Jonathon Shlens" - repositories: { - url: "https://github.com/VU-BEAM-Lab/ADMIRE" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - } - repositories: { - url: "https://github.com/bhaskar-agrawal/Independent-component-analysis" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/petteriTeikari/mixedImageSeparation" - framework: FRAMEWORK_OTHERS - number_of_stars: 3 - description: "with FastICA (and icasso for robustness)" - } - methods: { - name: "ICA" - full_name: "Independent Component Analysis" - description: "_**Independent component analysis** (ICA) is a statistical and computational technique for revealing hidden factors that underlie sets of random variables, measurements, or signals._\r\n\r\n_ICA defines a generative model for the observed multivariate data, which is typically given as a large database of samples. In the model, the data variables are assumed to be linear mixtures of some unknown latent variables, and the mixing system is also unknown. The latent variables are assumed nongaussian and mutually independent, and they are called the independent components of the observed data. These independent components, also called sources or factors, can be found by ICA._\r\n\r\n_ICA is superficially related to principal component analysis and factor analysis. ICA is a much more powerful technique, however, capable of finding the underlying factors or sources when these classic methods fail completely._\r\n\r\n\r\nExtracted from (https://www.cs.helsinki.fi/u/ahyvarin/whatisica.shtml)\r\n\r\n**Source papers**:\r\n\r\n[Blind separation of sources, part I: An adaptive algorithm based on neuromimetic architecture](https://doi.org/10.1016/0165-1684(91)90079-X)\r\n\r\n[Independent component analysis, A new concept?](https://doi.org/10.1016/0165-1684(94)90029-9)\r\n\r\n[Independent component analysis: algorithms and applications](https://doi.org/10.1016/S0893-6080(00)00026-5)" - } - } - video: { - video_id: "mLSPA76qSuU" - } - } -} -pr_id_to_video: { - key: 113 - value: { - papers: { - paper_id: "the-perception-distortion-tradeoff" - title: "The Perception-Distortion Tradeoff" - arxiv_id: "1711.06077" - abstract: "Image restoration algorithms are typically evaluated by some distortion measure (e.g. PSNR, SSIM, IFC, VIF) or by human opinion scores that quantify perceived perceptual quality. In this paper, we prove mathematically that distortion and perceptual quality are at odds with each other. Specifically, we study the optimal probability for correctly discriminating the outputs of an image restoration algorithm from real images. We show that as the mean distortion decreases, this probability must increase (indicating worse perceptual quality). As opposed to the common belief, this result holds true for any distortion measure, and is not only a problem of the PSNR or SSIM criteria. We also show that generative-adversarial-nets (GANs) provide a principled way to approach the perception-distortion bound. This constitutes theoretical support to their observed success in low-level vision tasks. Based on our analysis, we propose a new methodology for evaluating image restoration methods, and use it to perform an extensive comparison between recent super-resolution algorithms." - pub_date: { - seconds: 1510790400 - } - authors: "Yochai Blau" - authors: "Tomer Michaeli" - repositories: { - url: "https://github.com/roimehrez/PIRM2018" - framework: FRAMEWORK_OTHERS - number_of_stars: 186 - description: "Workshop and Challenge on Perceptual Image Restoration and Manipulation" - } - } - video: { - video_id: "6Yid4dituqo" - video_title: "PR-113: The Perception Distortion Tradeoff" - number_of_likes: 16 - number_of_views: 1363 - published_date: { - seconds: 1540734798 - } - uploader: "Taeoh Kim" - } - } -} -pr_id_to_video: { - key: 114 - value: { - papers: { - paper_id: "recycle-gan-unsupervised-video-retargeting" - title: "Recycle-GAN: Unsupervised Video Retargeting" - arxiv_id: "1808.05174" - abstract: "We introduce a data-driven approach for unsupervised video retargeting that\ntranslates content from one domain to another while preserving the style native\nto a domain, i.e., if contents of John Oliver's speech were to be transferred\nto Stephen Colbert, then the generated content/speech should be in Stephen\nColbert's style. Our approach combines both spatial and temporal information\nalong with adversarial losses for content translation and style preservation.\nIn this work, we first study the advantages of using spatiotemporal constraints\nover spatial constraints for effective retargeting. We then demonstrate the\nproposed approach for the problems where information in both space and time\nmatters such as face-to-face translation, flower-to-flower, wind and cloud\nsynthesis, sunrise and sunset." - pub_date: { - seconds: 1534291200 - } - authors: "Aayush Bansal" - authors: "Shugao Ma" - authors: "Deva Ramanan" - authors: "Yaser Sheikh" - repositories: { - url: "https://github.com/aayushbansal/Recycle-GAN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 375 - description: "Unsupervised Video Retargeting (e.g. face to face, flower to flower, clouds and winds, sunrise and sunset)" - } - } - video: { - video_id: "eMZXUqmp_PU" - video_title: "PR-114: Recycle-GAN, Unsupervised Video Retargeting" - number_of_views: 1172 - published_date: { - seconds: 1540738223 - } - uploader: "Byung-Hak Kim" - } - } -} -pr_id_to_video: { - key: 115 - value: { - papers: { - paper_id: "unsupervised-anomaly-detection-with" - title: "Unsupervised Anomaly Detection with Generative Adversarial Networks to Guide Marker Discovery" - arxiv_id: "1703.05921" - abstract: "Obtaining models that capture imaging markers relevant for disease\nprogression and treatment monitoring is challenging. Models are typically based\non large amounts of data with annotated examples of known markers aiming at\nautomating detection. High annotation effort and the limitation to a vocabulary\nof known markers limit the power of such approaches. Here, we perform\nunsupervised learning to identify anomalies in imaging data as candidates for\nmarkers. We propose AnoGAN, a deep convolutional generative adversarial network\nto learn a manifold of normal anatomical variability, accompanying a novel\nanomaly scoring scheme based on the mapping from image space to a latent space.\nApplied to new data, the model labels anomalies, and scores image patches\nindicating their fit into the learned distribution. Results on optical\ncoherence tomography images of the retina demonstrate that the approach\ncorrectly identifies anomalous images, such as images containing retinal fluid\nor hyperreflective foci." - pub_date: { - seconds: 1489708800 - } - authors: "Thomas Schlegl" - authors: "Philipp Seeböck" - authors: "Sebastian M. Waldstein" - authors: "Ursula Schmidt-Erfurth" - authors: "Georg Langs" - repositories: { - url: "https://github.com/YeongHyeon/f-AnoGAN-TF" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "TensorFlow implementation of f-AnoGAN (with MNIST dataset)" - } - repositories: { - url: "https://github.com/xtarx/Unsupervised-Anomaly-Detection-with-Generative-Adversarial-Networks" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 93 - description: "Unsupervised Anomaly Detection with Generative Adversarial Networks on MIAS dataset" - } - repositories: { - url: "https://github.com/NMADALI97/Learning-With-Wasserstein-Loss" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/seungjunlee96/AnoGAN-pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "Pytorch implementation of \"Unsupervised Anomaly Detection with Generative Adversarial Networks to Guide Marker Discovery\"" - } - repositories: { - url: "https://github.com/mullue/anogan-mnist" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/fuchami/ANOGAN" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 8 - description: "anomaly detection using GAN" - } - repositories: { - url: "https://github.com/kosyoshida/simple-keras" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - } - repositories: { - url: "https://github.com/LeeDoYup/AnoGAN" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 228 - description: "Unofficial Tensorflow Implementation of AnoGAN (Anomaly GAN)" - } - repositories: { - url: "https://github.com/tkwoo/anogan-keras" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 164 - description: "Unsupervised anomaly detection with generative model, keras implementation" - } - repositories: { - url: "https://github.com/Dai7Igarashi/Anomaly-Detection" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 - } - } - video: { - video_id: "R0H0gqtnMyA" - video_title: "PR-115: Unsupervised Anomaly Detection with Generative Adversarial Networks" - number_of_likes: 35 - number_of_views: 3270 - published_date: { - seconds: 1541343064 - } - uploader: "MinGuk Kang" - } - } -} -pr_id_to_video: { - key: 116 - value: { - papers: { - paper_id: "glow-generative-flow-with-invertible-1x1" - title: "Glow: Generative Flow with Invertible 1x1 Convolutions" - arxiv_id: "1807.03039" - abstract: "Flow-based generative models (Dinh et al., 2014) are conceptually attractive\ndue to tractability of the exact log-likelihood, tractability of exact\nlatent-variable inference, and parallelizability of both training and\nsynthesis. In this paper we propose Glow, a simple type of generative flow\nusing an invertible 1x1 convolution. Using our method we demonstrate a\nsignificant improvement in log-likelihood on standard benchmarks. Perhaps most\nstrikingly, we demonstrate that a generative model optimized towards the plain\nlog-likelihood objective is capable of efficient realistic-looking synthesis\nand manipulation of large images. The code for our model is available at\nhttps://github.com/openai/glow" - pub_date: { - seconds: 1531094400 - } - authors: "Diederik P. Kingma" - authors: "Prafulla Dhariwal" - repositories: { - url: "https://github.com/Naagar/Glow_NormalizingFlow_implimentation" - framework: FRAMEWORK_PYTORCH - description: "pyTorch implimentation of the Glow paper and Reimplementations of density estimation algorithms" - } - repositories: { - url: "https://github.com/Zhangyanbo/iResNetLab" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "A python/pytorch package for invertible neural networks" - } - repositories: { - url: "https://github.com/Daniel-H-99/CRD" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/simonwestberg/Glow" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "A replication of \"Glow: Generative Flow with Invertible 1×1 Convolutions\" and an investigation of its performance on Out-of-Distribution detection " - } - repositories: { - url: "https://github.com/simonwestberg/DD2412-Glow" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "A replication of \"Glow: Generative Flow with Invertible 1×1 Convolutions\" and an investigation of its performance on Out-of-Distribution detection " - } - repositories: { - url: "https://github.com/samuelmat19/GLOW-tf2" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Glow: Generative Flow with Invertible 1x1 Convolutions in Tensorflow 2" - } - repositories: { - url: "https://github.com/L0SG/NanoFlow" - framework: FRAMEWORK_PYTORCH - number_of_stars: 60 - description: "PyTorch implementation of the paper \"NanoFlow: Scalable Normalizing Flows with Sublinear Parameter Complexity.\"" - } - repositories: { - url: "https://github.com/eyalbetzalel/GLOW2" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/ClaraBing/flow" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/rhychen/Glow" - framework: FRAMEWORK_PYTORCH - } - methods: { - name: "GLOW" - full_name: "GLOW" - description: "**GLOW** is a type of flow-based generative model that is based on an invertible $1 \\times 1$ convolution. This builds on the flows introduced by [NICE](https://paperswithcode.com/method/nice) and [RealNVP](https://paperswithcode.com/method/realnvp). It consists of a series of steps of flow, combined in a multi-scale architecture; see the Figure to the right. Each step of flow consists of Act Normalization followed by an *invertible $1 \\times 1$ convolution* followed by an affine coupling layer." - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Normalizing Flows" - full_name: "Normalizing Flows" - description: "**Normalizing Flows** are a method for constructing complex distributions by transforming a\r\nprobability density through a series of invertible mappings. By repeatedly applying the rule for change of variables, the initial density ‘flows’ through the sequence of invertible mappings. At the end of this sequence we obtain a valid probability distribution and hence this type of flow is referred to as a normalizing flow.\r\n\r\nIn the case of finite flows, the basic rule for the transformation of densities considers an invertible, smooth mapping $f : \\mathbb{R}^{d} \\rightarrow \\mathbb{R}^{d}$ with inverse $f^{-1} = g$, i.e. the composition $g \\cdot f\\left(z\\right) = z$. If we use this mapping to transform a random variable $z$ with distribution $q\\left(z\\right)$, the resulting random variable $z' = f\\left(z\\right)$ has a distribution:\r\n\r\n$$ q\\left(\\mathbf{z}'\\right) = q\\left(\\mathbf{z}\\right)\\bigl\\vert{\\text{det}}\\frac{\\delta{f}^{-1}}{\\delta{\\mathbf{z'}}}\\bigr\\vert = q\\left(\\mathbf{z}\\right)\\bigl\\vert{\\text{det}}\\frac{\\delta{f}}{\\delta{\\mathbf{z}}}\\bigr\\vert ^{-1} $$\r\n\x0c\r\nwhere the last equality can be seen by applying the chain rule (inverse function theorem) and is a property of Jacobians of invertible functions. We can construct arbitrarily complex densities by composing several simple maps and successively applying the above equation. The density $q\\_{K}\\left(\\mathbf{z}\\right)$ obtained by successively transforming a random variable $z\\_{0}$ with distribution $q\\_{0}$ through a chain of $K$ transformations $f\\_{k}$ is:\r\n\r\n$$ z\\_{K} = f\\_{K} \\cdot \\dots \\cdot f\\_{2} \\cdot f\\_{1}\\left(z\\_{0}\\right) $$\r\n\r\n$$ \\ln{q}\\_{K}\\left(z\\_{K}\\right) = \\ln{q}\\_{0}\\left(z\\_{0}\\right) − \\sum^{K}\\_{k=1}\\ln\\vert\\det\\frac{\\delta{f\\_{k}}}{\\delta{\\mathbf{z\\_{k-1}}}}\\vert $$\r\n\x0c\r\nThe path traversed by the random variables $z\\_{k} = f\\_{k}\\left(z\\_{k-1}\\right)$ with initial distribution $q\\_{0}\\left(z\\_{0}\\right)$ is called the flow and the path formed by the successive distributions $q\\_{k}$ is a normalizing flow." - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - methods: { - name: "Affine Coupling" - full_name: "Affine Coupling" - description: "**Affine Coupling** is a method for implementing a normalizing flow (where we stack a sequence of invertible bijective transformation functions). Affine coupling is one of these bijective transformation functions. Specifically, it is an example of a reversible transformation where the forward function, the reverse function and the log-determinant are computationally efficient. For the forward function, we split the input dimension into two parts:\r\n\r\n$$ \\mathbf{x}\\_{a}, \\mathbf{x}\\_{b} = \\text{split}\\left(\\mathbf{x}\\right) $$\r\n\r\nThe second part stays the same $\\mathbf{x}\\_{b} = \\mathbf{y}\\_{b}$, while the first part $\\mathbf{x}\\_{a}$ undergoes an affine transformation, where the parameters for this transformation are learnt using the second part $\\mathbf{x}\\_{b}$ being put through a neural network. Together we have:\r\n\r\n$$ \\left(\\log{\\mathbf{s}, \\mathbf{t}}\\right) = \\text{NN}\\left(\\mathbf{x}\\_{b}\\right) $$\r\n\r\n$$ \\mathbf{s} = \\exp\\left(\\log{\\mathbf{s}}\\right) $$\r\n\r\n$$ \\mathbf{y}\\_{a} = \\mathbf{s} \\odot \\mathbf{x}\\_{a} + \\mathbf{t} $$\r\n\r\n$$ \\mathbf{y}\\_{b} = \\mathbf{x}\\_{b} $$\r\n\r\n$$ \\mathbf{y} = \\text{concat}\\left(\\mathbf{y}\\_{a}, \\mathbf{y}\\_{b}\\right) $$\r\n\r\nImage: [GLOW](https://arxiv.org/pdf/1807.03039.pdf)" - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." - } - methods: { - name: "Invertible 1x1 Convolution" - full_name: "Invertible 1x1 Convolution" - description: "The **Invertible 1x1 Convolution** is a type of convolution used in flow-based generative models that reverses the ordering of channels. The weight matrix is initialized as a random rotation matrix. The log-determinant of an invertible 1 × 1 convolution of a $h \\times w \\times c$ tensor $h$ with $c \\times c$ weight matrix $\\mathbf{W}$ is straightforward to compute:\r\n\r\n$$ \\log | \\text{det}\\left(\\frac{d\\text{conv2D}\\left(\\mathbf{h};\\mathbf{W}\\right)}{d\\mathbf{h}}\\right) | = h \\cdot w \\cdot \\log | \\text{det}\\left(\\mathbf{W}\\right) | $$" - } - methods: { - name: "Activation Normalization" - full_name: "Activation Normalization" - description: "**Activation Normalization** is a type of normalization used for flow-based generative models; specifically it was introduced in the [GLOW](https://paperswithcode.com/method/glow) architecture. An ActNorm layer performs an affine transformation of the activations using a scale and bias parameter per channel, similar to batch normalization. These parameters are initialized such that the post-actnorm activations per-channel have zero mean and unit variance given an initial minibatch of data. This is a form of data dependent initilization. After initialization, the scale and bias are treated as regular trainable parameters that are independent of the data." - } - } - video: { - video_id: "6OVH1i2BVAE" - video_title: "PR-116: Glow: Generative Flow with Invertible 1x1 Convolutions" - number_of_likes: 21 - number_of_views: 2792 - published_date: { - seconds: 1541342135 - } - uploader: "taesu" - } - } -} -pr_id_to_video: { - key: 117 - value: { - papers: { - paper_id: "peernets-exploiting-peer-wisdom-against" - title: "PeerNets: Exploiting Peer Wisdom Against Adversarial Attacks" - arxiv_id: "1806.00088" - abstract: "Deep learning systems have become ubiquitous in many aspects of our lives.\nUnfortunately, it has been shown that such systems are vulnerable to\nadversarial attacks, making them prone to potential unlawful uses. Designing\ndeep neural networks that are robust to adversarial attacks is a fundamental\nstep in making such systems safer and deployable in a broader variety of\napplications (e.g. autonomous driving), but more importantly is a necessary\nstep to design novel and more advanced architectures built on new computational\nparadigms rather than marginally building on the existing ones. In this paper\nwe introduce PeerNets, a novel family of convolutional networks alternating\nclassical Euclidean convolutions with graph convolutions to harness information\nfrom a graph of peer samples. This results in a form of non-local forward\npropagation in the model, where latent features are conditioned on the global\nstructure induced by the graph, that is up to 3 times more robust to a variety\nof white- and black-box adversarial attacks compared to conventional\narchitectures with almost no drop in accuracy." - pub_date: { - seconds: 1527724800 - } - authors: "Jan Svoboda" - authors: "Jonathan Masci" - authors: "Federico Monti" - authors: "Michael M. Bronstein" - authors: "Leonidas Guibas" - repositories: { - url: "https://github.com/tantara/PeerNets-pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 14 - description: "A pytorch implementation of 'PeerNets: Exploiting Peer Wisdom Against Adversarial Attacks'" - } - } - video: { - video_id: "VQsG_Yk9KuQ" - video_title: "PR-117: PeerNets: Exploiting Peer Wisdom Against Adversarial Attacks" - number_of_likes: 6 - number_of_views: 769 - published_date: { - seconds: 1542016335 - } - uploader: "Taekmin Kim" - } - } -} -pr_id_to_video: { - key: 118 - value: { - papers: { - paper_id: "black-box-adversarial-attacks-with-limited" - title: "Black-box Adversarial Attacks with Limited Queries and Information" - arxiv_id: "1804.08598" - abstract: "Current neural network-based classifiers are susceptible to adversarial\nexamples even in the black-box setting, where the attacker only has query\naccess to the model. In practice, the threat model for real-world systems is\noften more restrictive than the typical black-box model where the adversary can\nobserve the full output of the network on arbitrarily many chosen inputs. We\ndefine three realistic threat models that more accurately characterize many\nreal-world classifiers: the query-limited setting, the partial-information\nsetting, and the label-only setting. We develop new attacks that fool\nclassifiers under these more restrictive threat models, where previous methods\nwould be impractical or ineffective. We demonstrate that our methods are\neffective against an ImageNet classifier under our proposed threat models. We\nalso demonstrate a targeted black-box attack against a commercial classifier,\novercoming the challenges of limited query access, partial information, and\nother practical issues to break the Google Cloud Vision API." - pub_date: { - seconds: 1524441600 - } - authors: "Andrew Ilyas" - authors: "Logan Engstrom" - authors: "Anish Athalye" - authors: "Jessy Lin" - repositories: { - is_official: true - url: "https://github.com/labsix/limited-blackbox-attacks" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 140 - description: "Code for \"Black-box Adversarial Attacks with Limited Queries and Information\" (http://arxiv.org/abs/1804.08598)" - } - repositories: { - url: "https://github.com/mllab-adv-attack/lazy-attack" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - } - } - video: { - video_id: "AMPpOFtg3Q4" - video_title: "PR-118: Black-Box Attacks with Limited Queries and Information" - number_of_likes: 2 - number_of_views: 419 - published_date: { - seconds: 1541943972 - } - uploader: "Jinsung Yoon" - } - } -} -pr_id_to_video: { - key: 119 - value: { - papers: { - paper_id: "active-learning-for-convolutional-neural" - title: "Active Learning for Convolutional Neural Networks: A Core-Set Approach" - arxiv_id: "1708.00489" - abstract: "Convolutional neural networks (CNNs) have been successfully applied to many\nrecognition and learning tasks using a universal recipe; training a deep model\non a very large dataset of supervised examples. However, this approach is\nrather restrictive in practice since collecting a large set of labeled images\nis very expensive. One way to ease this problem is coming up with smart ways\nfor choosing images to be labelled from a very large collection (ie. active\nlearning).\n Our empirical study suggests that many of the active learning heuristics in\nthe literature are not effective when applied to CNNs in batch setting.\nInspired by these limitations, we define the problem of active learning as\ncore-set selection, ie. choosing set of points such that a model learned over\nthe selected subset is competitive for the remaining data points. We further\npresent a theoretical result characterizing the performance of any selected\nsubset using the geometry of the datapoints. As an active learning algorithm,\nwe choose the subset which is expected to yield best result according to our\ncharacterization. Our experiments show that the proposed method significantly\noutperforms existing approaches in image classification experiments by a large\nmargin." - pub_date: { - seconds: 1501545600 - } - authors: "Ozan Sener" - authors: "Silvio Savarese" - repositories: { - url: "https://github.com/razvancaramalau/Visual-Transformer-for-Task-aware-Active-Learning" - framework: FRAMEWORK_PYTORCH - number_of_stars: 8 - } - repositories: { - url: "https://github.com/razvancaramalau/Sequential-GCN-for-Active-Learning" - framework: FRAMEWORK_PYTORCH - number_of_stars: 22 - } - repositories: { - url: "https://github.com/rpinsler/active-bayesian-coresets" - framework: FRAMEWORK_PYTORCH - number_of_stars: 25 - } - methods: { - name: "Coresets" - full_name: "Coresets" - } - } - video: { - video_id: "3ROQis3hxPs" - video_title: "PR-119: Active Learning For Convolutional Neural Networks: A Core-Set Approach" - number_of_likes: 22 - number_of_views: 1885 - published_date: { - seconds: 1543402308 - } - uploader: "정지원" - } - } -} -pr_id_to_video: { - key: 120 - value: { - papers: { - paper_id: "shufflenet-v2-practical-guidelines-for" - title: "ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" - arxiv_id: "1807.11164" - abstract: "Datasets, Transforms and Models specific to Computer Vision" - pub_date: { - seconds: 1532908800 - } - authors: "Ningning Ma" - authors: "Xiangyu Zhang" - authors: "Hai-Tao Zheng" - authors: "Jian Sun" - repositories: { - url: "https://github.com/pytorch/vision" - framework: FRAMEWORK_PYTORCH - number_of_stars: 9293 - description: "Datasets, Transforms and Models specific to Computer Vision" - } - repositories: { - url: "https://github.com/PaddlePaddle/PaddleSeg" - framework: FRAMEWORK_OTHERS - number_of_stars: 1477 - description: "End-to-end image segmentation kit based on PaddlePaddle. " - } - repositories: { - url: "https://github.com/PaddlePaddle/PaddleClas" - framework: FRAMEWORK_OTHERS - number_of_stars: 1302 - description: "A treasure chest for visual recognition powered by PaddlePaddle" - } - repositories: { - url: "https://github.com/allen108108/Model-Optimizer_Implementation" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "Training different model on MNIST datadet to see their performance" - } - repositories: { - url: "https://github.com/zjZSTU/LightWeightCNN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "轻量化卷积神经网络实现(SqueezeNet/MobileNet/ShuffleNet/MnasNet)" - } - repositories: { - url: "https://github.com/ba-san/MobilePose-Pi" - framework: FRAMEWORK_PYTORCH - number_of_stars: 15 - description: "MobilePose deployment for Raspberry Pi" - } - repositories: { - url: "https://github.com/forcefulowl/image_classification" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/allenai/dnw" - framework: FRAMEWORK_PYTORCH - number_of_stars: 132 - description: "Discovering Neural Wirings (https://arxiv.org/abs/1906.00586)" - } - repositories: { - url: "https://github.com/mnicnc404/CartoonGan-tensorflow" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 649 - description: "Generate your own cartoon-style images with CartoonGAN (CVPR 2018), powered by TensorFlow 2.0 Alpha." - } - repositories: { - url: "https://github.com/xggIoU/centernet_tensorflow_wilderface_voc" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 103 - description: "This is the unofficial implementation of the \"CenterNet:Objects as Points\".Just a simple try with self-modified shufflenetv2 and yolov3.If you want better results, you need more experiments." - } - methods: { - name: "Average Pooling" - full_name: "Average Pooling" - description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." - } - methods: { - name: "Channel Shuffle" - full_name: "Channel Shuffle" - description: "**Channel Shuffle** is an operation to help information flow across feature channels in convolutional neural networks. It was used as part of the [ShuffleNet](https://paperswithcode.com/method/shufflenet) architecture. \r\n\r\nIf we allow a group convolution to obtain input data from different groups, the input and output channels will be fully related. Specifically, for the feature map generated from the previous group layer, we can first divide the channels in each group into several subgroups, then feed each group in the next layer with different subgroups. \r\n\r\nThe above can be efficiently and elegantly implemented by a channel shuffle operation: suppose a convolutional layer with $g$ groups whose output has $g \\times n$ channels; we first reshape the output channel dimension into $\\left(g, n\\right)$, transposing and then flattening it back as the input of next layer. Channel shuffle is also differentiable, which means it can be embedded into network structures for end-to-end training." - } - methods: { - name: "Depthwise Convolution" - full_name: "Depthwise Convolution" - description: "**Depthwise Convolution** is a type of convolution where we apply a single convolutional filter for each input channel. In the regular 2D [convolution](https://paperswithcode.com/method/convolution) performed over multiple input channels, the filter is as deep as the input and lets us freely mix channels to generate each element in the output. In contrast, depthwise convolutions keep each channel separate. To summarize the steps, we:\r\n\r\n1. Split the input and filter into channels.\r\n2. We convolve each input with the respective filter.\r\n3. We stack the convolved outputs together.\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" - } - methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." - } - methods: { - name: "Sigmoid Activation" - full_name: "Sigmoid Activation" - description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." - } - methods: { - name: "Squeeze-and-Excitation Block" - full_name: "Squeeze-and-Excitation Block" - description: "The **Squeeze-and-Excitation Block** is an architectural unit designed to improve the representational power of a network by enabling it to perform dynamic channel-wise feature recalibration. The process is:\r\n\r\n- The block has a convolutional block as an input.\r\n- Each channel is \"squeezed\" into a single numeric value using average pooling.\r\n- A dense layer followed by a ReLU adds non-linearity and output channel complexity is reduced by a ratio.\r\n- Another dense layer followed by a sigmoid gives each channel a smooth gating function.\r\n- Finally, we weight each feature map of the convolutional block based on the side network; the \"excitation\"." - } - methods: { - name: "ShuffleNet v2" - full_name: "ShuffleNet v2" - description: "**ShuffleNet v2** is a convolutional neural network optimized for a direct metric (speed) rather than indirect metrics like FLOPs. It builds upon [ShuffleNet v1](https://paperswithcode.com/method/shufflenet), which utilised pointwise group convolutions, bottleneck-like structures, and a channel shuffle operation. Differences are shown in the Figure to the right, including a new channel split operation and moving the channel shuffle operation further down the block." - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - } - video: { - video_id: "lrU6uXiJ_9Y" - video_title: "PR-120: ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" - number_of_likes: 34 - number_of_views: 2882 - published_date: { - seconds: 1542552935 - } - uploader: "JinWon Lee" - } - } -} -pr_id_to_video: { - key: 121 - value: { - papers: { - paper_id: "bert-pre-training-of-deep-bidirectional" - title: "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding" - arxiv_id: "1810.04805" - abstract: "We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers. Unlike recent language representation models, BERT is designed to pre-train deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result, the pre-trained BERT model can be fine-tuned with just one additional output layer to create state-of-the-art models for a wide range of tasks, such as question answering and language inference, without substantial task-specific architecture modifications. BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural language processing tasks, including pushing the GLUE score to 80.5% (7.7% point absolute improvement), MultiNLI accuracy to 86.7% (4.6% absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement)." - pub_date: { - seconds: 1539216000 - } - authors: "Jacob Devlin" - authors: "Ming-Wei Chang" - authors: "Kenton Lee" - authors: "Kristina Toutanova" - repositories: { - url: "https://github.com/airsplay/vimpac" - framework: FRAMEWORK_PYTORCH - number_of_stars: 15 - } - repositories: { - url: "https://github.com/han-shi/SparseBERT" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - } - repositories: { - url: "https://github.com/NoraH2004/adv-absa" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/SindhuMadi/FakeNewsDetection" - framework: FRAMEWORK_OTHERS - description: "BERT and RoBERTa" - } - repositories: { - url: "https://github.com/labmlai/annotated_deep_learning_paper_implementations/tree/master/labml_nn/transformers/mlm" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3053 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." - } - repositories: { - url: "https://github.com/JA-Bar/nlp-depression" - framework: FRAMEWORK_PYTORCH - description: "NLP course project. Tool to potentially identify signs of depression from text and audio." - } - repositories: { - url: "https://github.com/andi611/Mockingjay-Speech-Representation" - framework: FRAMEWORK_PYTORCH - number_of_stars: 7 - description: "Official Implementation of Mockingjay in Pytorch" - } - repositories: { - url: "https://github.com/huggingface/transformers" - framework: FRAMEWORK_PYTORCH - number_of_stars: 47573 - description: "🤗Transformers: State-of-the-art Natural Language Processing for Pytorch, TensorFlow, and JAX." - } - repositories: { - url: "https://github.com/yoshitomo-matsubara/torchdistill" - framework: FRAMEWORK_PYTORCH - number_of_stars: 310 - description: "PyTorch-based modular, configuration-driven framework for knowledge distillation. 🏆18 methods presented at CVPR, ICLR, ECCV, NeurIPS, ICCV, etc are implemented so far. 🎁 Trained models, training logs and configurations are available for ensuring the reproducibiliy." - } - repositories: { - url: "https://github.com/zer0sh0t/artificial_intelligence/tree/master/language_models/bidirectional_encoder_representations_from_transformers" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "ai codebase" - } - methods: { - name: "Scaled Dot-Product Attention" - full_name: "Scaled Dot-Product Attention" - description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "Weight Decay" - full_name: "Weight Decay" - description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" - } - methods: { - name: "Multi-Head Attention" - full_name: "Multi-Head Attention" - description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" - } - methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." - } - methods: { - name: "Dropout" - full_name: "Dropout" - description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." - } - methods: { - name: "WordPiece" - full_name: "WordPiece" - description: "**WordPiece** is a subword segmentation algorithm used in natural language processing. The vocabulary is initialized with individual characters in the language, then the most frequent combinations of symbols in the vocabulary are iteratively added to the vocabulary. The process is:\r\n\r\n1. Initialize the word unit inventory with all the characters in the text.\r\n2. Build a language model on the training data using the inventory from 1.\r\n3. Generate a new word unit by combining two units out of the current word inventory to increment the word unit inventory by one. Choose the new word unit out of all the possible ones that increases the likelihood on the training data the most when added to the model.\r\n4. Goto 2 until a predefined limit of word units is reached or the likelihood increase falls below a certain threshold.\r\n\r\nText: [Source](https://stackoverflow.com/questions/55382596/how-is-wordpiece-tokenization-helpful-to-effectively-deal-with-rare-words-proble/55416944#55416944)\r\n\r\nImage: WordPiece as used in BERT" - } - methods: { - name: "Attention Dropout" - full_name: "Attention Dropout" - description: "**Attention Dropout** is a type of dropout used in attention-based architectures, where elements are randomly dropped out of the softmax in the attention equation. For example, for scaled-dot product attention, we would drop elements from the first term:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}\\left(\\frac{QK^{T}}{\\sqrt{d_k}}\\right)V $$" - } - methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." - } - methods: { - name: "GELU" - full_name: "Gaussian Error Linear Units" - description: "The **Gaussian Error Linear Unit**, or **GELU**, is an activation function. The GELU activation function is $x\\Phi(x)$, where $\\Phi(x)$ the standard Gaussian cumulative distribution function. The GELU nonlinearity weights inputs by their percentile, rather than gates inputs by their sign as in [ReLUs](https://paperswithcode.com/method/relu) ($x\\mathbf{1}_{x>0}$). Consequently the GELU can be thought of as a smoother ReLU.\r\n\r\n$$\\text{GELU}\\left(x\\right) = x{P}\\left(X\\leq{x}\\right) = x\\Phi\\left(x\\right) = x \\cdot \\frac{1}{2}\\left[1 + \\text{erf}(x/\\sqrt{2})\\right],$$\r\nif $X\\sim \\mathcal{N}(0,1)$.\r\n\r\nOne can approximate the GELU with\r\n$0.5x\\left(1+\\tanh\\left[\\sqrt{2/\\pi}\\left(x + 0.044715x^{3}\\right)\\right]\\right)$ or $x\\sigma\\left(1.702x\\right),$\r\nbut PyTorch's exact implementation is sufficiently fast such that these approximations may be unnecessary. (See also the [SiLU](https://paperswithcode.com/method/silu) $x\\sigma(x)$ which was also coined in the paper that introduced the GELU.)\r\n\r\nGELUs are used in GPT-3, BERT, and most other Transformers." - } - } - video: { - video_id: "GK4IO3qOnLc" - video_title: "PR-121: BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding" - number_of_likes: 41 - number_of_views: 2830 - published_date: { - seconds: 1543981172 - } - uploader: "Young Seok Kim" - } - } -} -pr_id_to_video: { - key: 122 - value: { - papers: { - paper_id: "can-creative-adversarial-networks-generating" - title: "CAN: Creative Adversarial Networks, Generating \"Art\" by Learning About Styles and Deviating from Style Norms" - arxiv_id: "1706.07068" - abstract: "We propose a new system for generating art. The system generates art by\nlooking at art and learning about style; and becomes creative by increasing the\narousal potential of the generated art by deviating from the learned styles. We\nbuild over Generative Adversarial Networks (GAN), which have shown the ability\nto learn to generate novel images simulating a given distribution. We argue\nthat such networks are limited in their ability to generate creative products\nin their original design. We propose modifications to its objective to make it\ncapable of generating creative art by maximizing deviation from established\nstyles and minimizing deviation from art distribution. We conducted experiments\nto compare the response of human subjects to the generated art with their\nresponse to art created by artists. The results show that human subjects could\nnot distinguish art generated by the proposed system from art generated by\ncontemporary artists and shown in top art fairs. Human subjects even rated the\ngenerated images higher on various scales." - pub_date: { - seconds: 1498003200 - } - authors: "Ahmed Elgammal" - authors: "Bingchen Liu" - authors: "Mohamed Elhoseiny" - authors: "Marian Mazzone" - repositories: { - url: "https://github.com/otepencelik/GAN-Artwork-Generation" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - } - repositories: { - url: "https://github.com/naotokui/CreativeGAN-Rhythm" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 19 - description: "Creative Adversarial Network for generating Dance Music Rhythm Patterns" - } - repositories: { - url: "https://github.com/dylanell/conditional-wgan" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Implementation of Conditional Wasserstein Generative Adversarial Network (GAN) in PyTorch" - } - repositories: { - url: "https://github.com/mlberkeley/Creative-Adversarial-Networks" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 199 - description: "(WIP) Implementation of Creative Adversarial Networks https://arxiv.org/pdf/1706.07068.pdf" - } - repositories: { - url: "https://github.com/sfc-computational-creativity-lab/x-rhythm-can" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Creative Adversarial Network for generating Dance Music Rhythm Patterns" - } - repositories: { - url: "https://github.com/VladAleshin/pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "GAN (pet project on pytorch and flask)" - } - repositories: { - url: "https://github.com/AndreasWieg/Creative-GAN" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 - description: " Art-GAN" - } - repositories: { - url: "https://github.com/zawlinnnaing/CAN-thesis" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - } - repositories: { - url: "https://github.com/casey-barr/generative-models-in-tensorflow" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "a collection of generative adversarial networks implemented in TensorFlow" - } - repositories: { - url: "https://github.com/previtus/cci_exploring_machine_intelligence" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 17 - description: "UAL, CCI - MSc course: 19/20 \"IU000128 Coding Three: Exploring Machine Intelligence\"" - } - } - video: { - video_id: "TB7izZIWYyw" - video_title: "PR-122: CAN: Creative Adversarial Networks" - number_of_likes: 13 - number_of_views: 1126 - published_date: { - seconds: 1543554496 - } - uploader: "visionNoob" - } - } -} -pr_id_to_video: { - key: 123 - value: { - papers: { - paper_id: "partial-convolution-based-padding" - title: "Partial Convolution based Padding" - arxiv_id: "1811.11718" - abstract: "In this paper, we present a simple yet effective padding scheme that can be\nused as a drop-in module for existing convolutional neural networks. We call it\npartial convolution based padding, with the intuition that the padded region\ncan be treated as holes and the original input as non-holes. Specifically,\nduring the convolution operation, the convolution results are re-weighted near\nimage borders based on the ratios between the padded area and the convolution\nsliding window area. Extensive experiments with various deep network models on\nImageNet classification and semantic segmentation demonstrate that the proposed\npadding scheme consistently outperforms standard zero padding with better\naccuracy." - pub_date: { - seconds: 1543363200 - } - authors: "Guilin Liu" - authors: "Kevin J. Shih" - authors: "Ting-Chun Wang" - authors: "Fitsum A. Reda" - authors: "Karan Sapra" - authors: "Zhiding Yu" - authors: "Andrew Tao" - authors: "Bryan Catanzaro" - repositories: { - url: "https://github.com/feixuetuba/inpating" - framework: FRAMEWORK_PYTORCH - description: "复现Image Inpainting for Irregular Holes Using Partial Convolutions" - } - repositories: { - is_official: true - url: "https://github.com/NVIDIA/partialconv" - framework: FRAMEWORK_PYTORCH - number_of_stars: 937 - description: "A New Padding Scheme: Partial Convolution based Padding" - } - repositories: { - url: "https://github.com/lessw2020/auto-adaptive-ai" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "auto adaptive framework for intrinsic hyperparameter selection, adaptive padding, normalized weights" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - } - video: { - video_id: "IKHzc7sGCxQ" - video_title: "PR-123: Partial Convolution based Padding" - number_of_likes: 50 - number_of_views: 2265 - published_date: { - seconds: 1544173387 - } - uploader: "Soyeon Kim" - } - } -} -pr_id_to_video: { - key: 124 - value: { - papers: { - paper_id: "training-stacked-denoising-autoencoders-for" - title: "Training Stacked Denoising Autoencoders for Representation Learning" - arxiv_id: "2102.08012" - abstract: "We implement stacked denoising autoencoders, a class of neural networks that are capable of learning powerful representations of high dimensional data. We describe stochastic gradient descent for unsupervised training of autoencoders, as well as a novel genetic algorithm based approach that makes use of gradient information. We analyze the performance of both optimization algorithms and also the representation learning ability of the autoencoder when it is trained on standard image classification datasets." - pub_date: { - seconds: 1613433600 - } - authors: "Jason Liang" - authors: "Keith Kelly" - methods: { - name: "AutoEncoder" - full_name: "AutoEncoder" - description: "An **Autoencoder** is a bottleneck architecture that turns a high-dimensional input into a latent low-dimensional code (encoder), and then performs a reconstruction of the input with this latent code (the decoder).\r\n\r\nImage: [Michael Massi](https://en.wikipedia.org/wiki/Autoencoder#/media/File:Autoencoder_schema.png)" - } - } - video: { - video_id: "saJcr74ldvs" - } - } -} -pr_id_to_video: { - key: 125 - value: { - papers: { - paper_id: "energy-based-generative-adversarial-network" - title: "Energy-based Generative Adversarial Network" - arxiv_id: "1609.03126" - abstract: "We introduce the \"Energy-based Generative Adversarial Network\" model (EBGAN)\nwhich views the discriminator as an energy function that attributes low\nenergies to the regions near the data manifold and higher energies to other\nregions. Similar to the probabilistic GANs, a generator is seen as being\ntrained to produce contrastive samples with minimal energies, while the\ndiscriminator is trained to assign high energies to these generated samples.\nViewing the discriminator as an energy function allows to use a wide variety of\narchitectures and loss functionals in addition to the usual binary classifier\nwith logistic output. Among them, we show one instantiation of EBGAN framework\nas using an auto-encoder architecture, with the energy being the reconstruction\nerror, in place of the discriminator. We show that this form of EBGAN exhibits\nmore stable behavior than regular GANs during training. We also show that a\nsingle-scale architecture can be trained to generate high-resolution images." - pub_date: { - seconds: 1473552000 - } - authors: "Junbo Zhao" - authors: "Michael Mathieu" - authors: "Yann LeCun" - repositories: { - url: "https://github.com/buriburisuri/ebgan" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 207 - description: "A tensorflow implementation of Junbo et al's Energy-based generative adversarial network ( EBGAN ) paper. " - } - repositories: { - url: "https://github.com/eriklindernoren/PyTorch-GAN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 9577 - description: "PyTorch implementations of Generative Adversarial Networks." - } - repositories: { - url: "https://github.com/evan11401/CS_IOC5008_0856043_HW2" - framework: FRAMEWORK_PYTORCH - description: "NCTU VRDL HW2 | Use BEGAN to create human's face" - } - } - video: { - video_id: "8PoewOpK6b4" - video_title: "PR-125: ENERGY-BASED GENERATIVE ADVERSARIAL NETWORKS" - number_of_likes: 7 - number_of_views: 707 - published_date: { - seconds: 1544368518 - } - uploader: "MinGuk Kang" - } - } -} -pr_id_to_video: { - key: 126 - value: { - papers: { - paper_id: "densepose-dense-human-pose-estimation-in-the" - title: "DensePose: Dense Human Pose Estimation In The Wild" - arxiv_id: "1802.00434" - abstract: "In this work, we establish dense correspondences between RGB image and a\nsurface-based representation of the human body, a task we refer to as dense\nhuman pose estimation. We first gather dense correspondences for 50K persons\nappearing in the COCO dataset by introducing an efficient annotation pipeline.\nWe then use our dataset to train CNN-based systems that deliver dense\ncorrespondence 'in the wild', namely in the presence of background, occlusions\nand scale variations. We improve our training set's effectiveness by training\nan 'inpainting' network that can fill in missing groundtruth values and report\nclear improvements with respect to the best results that would be achievable in\nthe past. We experiment with fully-convolutional networks and region-based\nmodels and observe a superiority of the latter; we further improve accuracy\nthrough cascading, obtaining a system that delivers highly0accurate results in\nreal time. Supplementary materials and videos are provided on the project page\nhttp://densepose.org" - pub_date: { - seconds: 1517443200 - } - authors: "Rıza Alp Güler" - authors: "Natalia Neverova" - authors: "Iasonas Kokkinos" - repositories: { - url: "https://github.com/ubc-vision/DwNet" - framework: FRAMEWORK_PYTORCH - number_of_stars: 19 - } - repositories: { - url: "https://github.com/hz-ants/DensePose" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/yongsheng268/DensePose" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/jiajunhua/facebookresearch-DensePose" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/sgoldyaev/DeepFashion.ADGAN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - } - repositories: { - url: "https://github.com/chengjiali/DensePose3" - framework: FRAMEWORK_OTHERS - description: "Adapt FB's DensePose for Python3" - } - repositories: { - url: "https://github.com/facebookresearch/DensePose" - framework: FRAMEWORK_OTHERS - number_of_stars: 6026 - description: "A real-time approach for mapping all human pixels of 2D RGB images to a 3D surface-based model of the body" - } - repositories: { - url: "https://github.com/ARMUGHAN-SHAHID/MoboDensepose" - framework: FRAMEWORK_OTHERS - description: "DEnse" - } - repositories: { - url: "https://github.com/facebookresearch/detectron" - framework: FRAMEWORK_PYTORCH - number_of_stars: 24456 - description: "FAIR's research platform for object detection research, implementing popular algorithms like Mask R-CNN and RetinaNet." - } - repositories: { - url: "https://github.com/svikramank/DensePose" - framework: FRAMEWORK_OTHERS - number_of_stars: 5 - description: "In this repo, I tried replicating the famous Facebook's DensePose R-CNN model and tried to visualize the collected DensePose-COCO dataset and show the correspondences to the SMPL model." - } - } - video: { - video_id: "-bvMCbk_FT8" - video_title: "PR-126: DensePose: Dense Human Pose Estimation In The Wild" - number_of_views: 1783 - published_date: { - seconds: 1544365241 - } - uploader: "Byung-Hak Kim" - } - } -} -pr_id_to_video: { - key: 127 - value: { - papers: { - paper_id: "facenet-a-unified-embedding-for-face" - title: "FaceNet: A Unified Embedding for Face Recognition and Clustering" - arxiv_id: "1503.03832" - abstract: "Despite significant recent advances in the field of face recognition,\nimplementing face verification and recognition efficiently at scale presents\nserious challenges to current approaches. In this paper we present a system,\ncalled FaceNet, that directly learns a mapping from face images to a compact\nEuclidean space where distances directly correspond to a measure of face\nsimilarity. Once this space has been produced, tasks such as face recognition,\nverification and clustering can be easily implemented using standard techniques\nwith FaceNet embeddings as feature vectors.\n Our method uses a deep convolutional network trained to directly optimize the\nembedding itself, rather than an intermediate bottleneck layer as in previous\ndeep learning approaches. To train, we use triplets of roughly aligned matching\n/ non-matching face patches generated using a novel online triplet mining\nmethod. The benefit of our approach is much greater representational\nefficiency: we achieve state-of-the-art face recognition performance using only\n128-bytes per face.\n On the widely used Labeled Faces in the Wild (LFW) dataset, our system\nachieves a new record accuracy of 99.63%. On YouTube Faces DB it achieves\n95.12%. Our system cuts the error rate in comparison to the best published\nresult by 30% on both datasets.\n We also introduce the concept of harmonic embeddings, and a harmonic triplet\nloss, which describe different versions of face embeddings (produced by\ndifferent networks) that are compatible to each other and allow for direct\ncomparison between each other." - pub_date: { - seconds: 1426118400 - } - authors: "Florian Schroff" - authors: "Dmitry Kalenichenko" - authors: "James Philbin" - repositories: { - url: "https://github.com/sdamolini/LooksLikeWho" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "VGGFace2 Facial Recognition using Quadruplet Loss and 4 CNNs." - } - repositories: { - url: "https://github.com/shi510/ffem" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 - description: "Face Feature Embedding Module" - } - repositories: { - url: "https://github.com/obj2vec/obj2vec" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/akshayraghavan21/Face_Recognition_Using_Facenet" - framework: FRAMEWORK_TENSORFLOW - description: "A simple face recognition implementation using a pre-trained, one-shot learning model - FaceNet. Classification on custom dataset by using the WebCam to perform live face recognition." - } - repositories: { - url: "https://github.com/tamerthamoqa/facenet-pytorch-glint360k" - framework: FRAMEWORK_PYTORCH - number_of_stars: 115 - description: "A PyTorch implementation of the 'FaceNet' paper for training a facial recognition model with Triplet Loss using the glint360k dataset. A pre-trained model using Triplet Loss is available for download." - } - repositories: { - url: "https://github.com/Atcold/torch-TripletEmbedding" - framework: FRAMEWORK_OTHERS - number_of_stars: 157 - description: "TripletLoss used in Google's FaceNet paper" - } - repositories: { - url: "https://github.com/serengil/deepface" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1690 - description: "A Lightweight Deep Face Recognition and Facial Attribute Analysis (Age, Gender, Emotion and Race) Framework for Python" - } - repositories: { - url: "https://github.com/PushpakBhoge/Face_Recognition_TF" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5 - description: "A project to Recognise faces in photos and videos or in realtime" - } - repositories: { - url: "https://github.com/Mrzhang3389/FaceRecognition" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 - description: "人脸检测(MTCNN) + 人脸编码(FaceNet) = 人脸识别(FaceRecognition) DockerFile + Docker Image = 容器部署" - } - repositories: { - url: "https://github.com/QuocThangNguyen/deep-metric-learning-tsinghua-dogs" - framework: FRAMEWORK_PYTORCH - number_of_stars: 25 - description: "Dogs classification with Deep Metric Learning" - } - } - video: { - video_id: "0k3X-9y_9S8" - video_title: "PR-127: FaceNet" - number_of_likes: 58 - number_of_views: 4049 - published_date: { - seconds: 1544971153 - } - uploader: "Taeoh Kim" - } - } -} -pr_id_to_video: { - key: 128 - value: { - papers: { - paper_id: "timbretron-a-wavenetcyclegancqtaudio-pipeline" - title: "TimbreTron: A WaveNet(CycleGAN(CQT(Audio))) Pipeline for Musical Timbre Transfer" - arxiv_id: "1811.09620" - abstract: "In this work, we address the problem of musical timbre transfer, where the\ngoal is to manipulate the timbre of a sound sample from one instrument to match\nanother instrument while preserving other musical content, such as pitch,\nrhythm, and loudness. In principle, one could apply image-based style transfer\ntechniques to a time-frequency representation of an audio signal, but this\ndepends on having a representation that allows independent manipulation of\ntimbre as well as high-quality waveform generation. We introduce TimbreTron, a\nmethod for musical timbre transfer which applies \"image\" domain style transfer\nto a time-frequency representation of the audio signal, and then produces a\nhigh-quality waveform using a conditional WaveNet synthesizer. We show that the\nConstant Q Transform (CQT) representation is particularly well-suited to\nconvolutional architectures due to its approximate pitch equivariance. Based on\nhuman perceptual evaluations, we confirmed that TimbreTron recognizably\ntransferred the timbre while otherwise preserving the musical content, for both\nmonophonic and polyphonic samples." - pub_date: { - seconds: 1542844800 - } - authors: "Sicong Huang" - authors: "Qiyang Li" - authors: "Cem Anil" - authors: "Xuchan Bao" - authors: "Sageev Oore" - authors: "Roger B. Grosse" - repositories: { - url: "https://github.com/edivadiranatnom/Machine-Learning-Project" - framework: FRAMEWORK_OTHERS - number_of_stars: 3 - description: "Machine Learning course project" - } - repositories: { - is_official: true - url: "https://github.com/huangsicong/TimbreTron" - framework: FRAMEWORK_OTHERS - number_of_stars: 37 - description: "The repo accompanying the paper: TimbreTron: A WaveNet(CycleGAN(CQT(Audio))) Pipeline for Musical Timbre Transfer" - } - methods: { - name: "WaveNet" - full_name: "WaveNet" - description: "**WaveNet** is an audio generative model based on the [PixelCNN](https://paperswithcode.com/method/pixelcnn) architecture. In order to deal with long-range temporal dependencies needed for raw audio generation, architectures are developed based on dilated causal convolutions, which exhibit very large receptive fields.\r\n\r\nThe joint probability of a waveform $\\vec{x} = \\{ x_1, \\dots, x_T \\}$ is factorised as a product of conditional probabilities as follows:\r\n\r\n$$p\\left(\\vec{x}\\right) = \\prod_{t=1}^{T} p\\left(x_t \\mid x_1, \\dots ,x_{t-1}\\right)$$\r\n\r\nEach audio sample $x_t$ is therefore conditioned on the samples at all previous timesteps." - } - methods: { - name: "Dilated Causal Convolution" - full_name: "Dilated Causal Convolution" - description: "A **Dilated Causal Convolution** is a causal convolution where the filter is applied over an area larger than its length by skipping input values with a certain step. A dilated causal convolution effectively allows the network to have very large receptive fields with just a few layers." - } - methods: { - name: "Mixture of Logistic Distributions" - full_name: "Mixture of Logistic Distributions" - description: "**Mixture of Logistic Distributions (MoL)** is a type of output function, and an alternative to a [softmax](https://paperswithcode.com/method/softmax) layer. Discretized logistic mixture likelihood is used in PixelCNN++ and [WaveNet](https://paperswithcode.com/method/wavenet) to predict discrete values.\r\n\r\nImage Credit: [Hao Gao](https://medium.com/@smallfishbigsea/an-explanation-of-discretized-logistic-mixture-likelihood-bdfe531751f0)" - } - } - video: { - video_id: "5eofa6SksKU" - video_title: "PR-128: TimbreTron: A Wavenet(CycleGAN(CQT(Audio))) pipeline for musical timbre transfer" - number_of_likes: 6 - number_of_views: 548 - published_date: { - seconds: 1544973323 - } - uploader: "taesu" - } - } -} -pr_id_to_video: { - key: 129 - value: { - papers: { - paper_id: "horovod-fast-and-easy-distributed-deep" - title: "Horovod: fast and easy distributed deep learning in TensorFlow" - arxiv_id: "1802.05799" - abstract: "Training modern deep learning models requires large amounts of computation,\noften provided by GPUs. Scaling computation from one GPU to many can enable\nmuch faster training and research progress but entails two complications.\nFirst, the training library must support inter-GPU communication. Depending on\nthe particular methods employed, this communication may entail anywhere from\nnegligible to significant overhead. Second, the user must modify his or her\ntraining code to take advantage of inter-GPU communication. Depending on the\ntraining library's API, the modification required may be either significant or\nminimal.\n Existing methods for enabling multi-GPU training under the TensorFlow library\nentail non-negligible communication overhead and require users to heavily\nmodify their model-building code, leading many researchers to avoid the whole\nmess and stick with slower single-GPU training. In this paper we introduce\nHorovod, an open source library that improves on both obstructions to scaling:\nit employs efficient inter-GPU communication via ring reduction and requires\nonly a few lines of modification to user code, enabling faster, easier\ndistributed training in TensorFlow. Horovod is available under the Apache 2.0\nlicense at https://github.com/uber/horovod" - pub_date: { - seconds: 1518652800 - } - authors: "Alexander Sergeev" - authors: "Mike Del Balso" - repositories: { - url: "https://github.com/hcyang99/horovod" - framework: FRAMEWORK_TENSORFLOW - description: "Modify horovod/horovod to support nic switching" - } - repositories: { - url: "https://github.com/gridgentoo/UberHorovod" - framework: FRAMEWORK_TENSORFLOW - description: "Реверс инжиниринг архитектуры UberHorovod, Distributed training framework for TensorFlow, Keras, PyTorch, and Apache MXNet." - } - repositories: { - url: "https://github.com/teja5832/horovod-elastic" - framework: FRAMEWORK_TENSORFLOW - description: "Adding Transparent Gradient Aggregation to Horovod." - } - repositories: { - is_official: true - url: "https://github.com/uber/horovod" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 11339 - description: "Distributed training framework for TensorFlow, Keras, PyTorch, and Apache MXNet." - } - repositories: { - url: "https://github.com/horovod/horovod" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 11339 - description: "Distributed training framework for TensorFlow, Keras, PyTorch, and Apache MXNet." - } - repositories: { - url: "https://github.com/markWJJ/horovod" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/ctcyang/horovod" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Distributed training framework for TensorFlow, Keras, and PyTorch." - } - repositories: { - url: "https://github.com/karakusc/horovod" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Distributed training framework for TensorFlow, Keras, and PyTorch." - } - repositories: { - url: "https://github.com/zhonghual1206/horvodval" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "use gan for val" - } - repositories: { - url: "https://github.com/axbaretto/horovod" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Distributed training framework for TensorFlow, Keras, PyTorch, and MXNet. " - } - } - video: { - video_id: "8zQECRiONAo" - video_title: "PR-129: Horovod: fast and easy distributed deep learning in TensorFlow" - number_of_likes: 9 - number_of_views: 715 - published_date: { - seconds: 1546077765 - } - uploader: "Taekmin Kim" - } - } -} -pr_id_to_video: { - key: 130 - value: { - papers: { - paper_id: "generative-adversarial-imitation-learning" - title: "Generative Adversarial Imitation Learning" - arxiv_id: "1606.03476" - abstract: "Consider learning a policy from example expert behavior, without interaction\nwith the expert or access to reinforcement signal. One approach is to recover\nthe expert's cost function with inverse reinforcement learning, then extract a\npolicy from that cost function with reinforcement learning. This approach is\nindirect and can be slow. We propose a new general framework for directly\nextracting a policy from data, as if it were obtained by reinforcement learning\nfollowing inverse reinforcement learning. We show that a certain instantiation\nof our framework draws an analogy between imitation learning and generative\nadversarial networks, from which we derive a model-free imitation learning\nalgorithm that obtains significant performance gains over existing model-free\nmethods in imitating complex behaviors in large, high-dimensional environments." - pub_date: { - seconds: 1465516800 - } - authors: "Jonathan Ho" - authors: "Stefano Ermon" - repositories: { - url: "https://github.com/morikatron/GAIL_PPO" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Generative Adversarial Imitation Learning" - } - repositories: { - url: "https://github.com/HumanCompatibleAI/deep-rlsp" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 23 - description: "Code accompanying \"Learning What To Do by Simulating the Past\", ICLR 2021." - } - repositories: { - url: "https://github.com/HumanCompatibleAI/imitation" - framework: FRAMEWORK_PYTORCH - number_of_stars: 253 - description: "Clean PyTorch implementations of imitation learning algorithms" - } - repositories: { - url: "https://github.com/Khrylx/PyTorch-RL" - framework: FRAMEWORK_PYTORCH - number_of_stars: 699 - description: "PyTorch implementation of Deep Reinforcement Learning: Policy Gradient methods (TRPO, PPO, A2C) and Generative Adversarial Imitation Learning (GAIL). Fast Fisher vector product TRPO." - } - repositories: { - url: "https://github.com/sisl/ngsim_env" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 99 - description: "Learning human driver models from NGSIM data with imitation learning." - } - repositories: { - url: "https://github.com/170928/-Review-Generative-Adversarial-Imitation-Learning" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "[Review & Code]" - } - repositories: { - url: "https://github.com/bukysun/gail-traj-eb" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/nav74neet/gail-tf-gym" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 55 - description: "Implementation of Generatve Adversarial Imitation Learning (GAIL) for classic environments from OpenAI Gym. " - } - repositories: { - url: "https://github.com/hill-a/stable-baselines" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3173 - description: "A fork of OpenAI Baselines, implementations of reinforcement learning algorithms" - } - repositories: { - url: "https://github.com/KAIST-AILab/deeprl_practice_colab" - framework: FRAMEWORK_OTHERS - number_of_stars: 4 - description: "Preparation for Deep Reinforcement Learning using Google Colab" - } - methods: { - name: "GAIL" - full_name: "Generative Adversarial Imitation Learning" - description: "**Generative Adversarial Imitation Learning** presents a new general framework for directly extracting a policy from data, as if it were obtained by reinforcement learning following inverse reinforcement learning." - } - } - video: { - video_id: "XHmRsgFrCTM" - video_title: "PR-130: Generative Adversarial Imitation Learning" - number_of_likes: 14 - number_of_views: 2558 - published_date: { - seconds: 1545573404 - } - uploader: "Jinsung Yoon" - } - } -} -pr_id_to_video: { - key: 131 - value: { - papers: { - paper_id: "a-style-based-generator-architecture-for" - title: "A Style-Based Generator Architecture for Generative Adversarial Networks" - arxiv_id: "1812.04948" - abstract: "We propose an alternative generator architecture for generative adversarial\nnetworks, borrowing from style transfer literature. The new architecture leads\nto an automatically learned, unsupervised separation of high-level attributes\n(e.g., pose and identity when trained on human faces) and stochastic variation\nin the generated images (e.g., freckles, hair), and it enables intuitive,\nscale-specific control of the synthesis. The new generator improves the\nstate-of-the-art in terms of traditional distribution quality metrics, leads to\ndemonstrably better interpolation properties, and also better disentangles the\nlatent factors of variation. To quantify interpolation quality and\ndisentanglement, we propose two new, automated methods that are applicable to\nany generator architecture. Finally, we introduce a new, highly varied and\nhigh-quality dataset of human faces." - pub_date: { - seconds: 1544572800 - } - authors: "Tero Karras" - authors: "Samuli Laine" - authors: "Timo Aila" - repositories: { - url: "https://github.com/comp-imaging-sci/pic-recon" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 - description: "Code associated with the paper \"Prior Image-Constrained Reconstruction using Style-Based Generative Models\" accepted to ICML 2021." - } - repositories: { - url: "https://github.com/toshas/torch-fidelity" - framework: FRAMEWORK_PYTORCH - number_of_stars: 247 - description: "High-fidelity performance metrics for generative models in PyTorch" - } - repositories: { - url: "https://github.com/roberttwomey/machine-imagination-workshop" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "text to image notebook with CLIP for workshop on Machine Imagination, Spring 2021" - } - repositories: { - url: "https://github.com/ariel415el/SimplePytorch-ALAE" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "Implementation of Adverserial autoencoders" - } - repositories: { - url: "https://github.com/jhKessler/Progressively-Growing-Generative-Adverserial-Network" - framework: FRAMEWORK_PYTORCH - description: "Generative Adverserial Network for Image Generation" - } - repositories: { - url: "https://github.com/genforce/interfacegan" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 950 - description: "[CVPR 2020] Interpreting the Latent Space of GANs for Semantic Face Editing" - } - repositories: { - url: "https://github.com/a514514772/hijackgan" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 8 - description: "[CVPR 2021] Pytorch implementation of Hijack-GAN: Unintended-Use of Pretrained, Black-Box GANs" - } - repositories: { - url: "https://github.com/yaxingwang/DeepI2I" - framework: FRAMEWORK_PYTORCH - number_of_stars: 18 - description: "Image-to-image translation, knowledge transfer" - } - repositories: { - url: "https://github.com/ariel415el/ALAE" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "Implementation of Adverserial autoencoders" - } - repositories: { - url: "https://github.com/ayushgupta9198/stylegan" - framework: FRAMEWORK_TENSORFLOW - description: "The model is based for fake person creation based on stylegan technique. I have trained the model on my data set and generates the result basis of grids and seeds" - } - methods: { - name: "Feedforward Network" - full_name: "Feedforward Network" - description: "A **Feedforward Network**, or a **Multilayer Perceptron (MLP)**, is a neural network with solely densely connected layers. This is the classic neural network architecture of the literature. It consists of inputs $x$ passed through units $h$ (of which there can be many layers) to predict a target $y$. Activation functions are generally chosen to be non-linear to allow for flexible functional approximation.\r\n\r\nImage Source: Deep Learning, Goodfellow et al" - } - methods: { - name: "Leaky ReLU" - full_name: "Leaky ReLU" - description: "**Leaky Rectified Linear Unit**, or **Leaky ReLU**, is a type of activation function based on a [ReLU](https://paperswithcode.com/method/relu), but it has a small slope for negative values instead of a flat slope. The slope coefficient is determined before training, i.e. it is not learnt during training. This type of activation function is popular in tasks where we we may suffer from sparse gradients, for example training generative adversarial networks." - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "Adaptive Instance Normalization" - full_name: "Adaptive Instance Normalization" - description: "**Adaptive Instance Normalization** is a normalization method that aligns the mean and variance of the content features with those of the style features. \r\n\r\n[Instance Normalization](https://paperswithcode.com/method/instance-normalization) normalizes the input to a single style specified by the affine parameters. Adaptive Instance Normaliation is an extension. In AdaIN, we receive a content input $x$ and a style input $y$, and we simply align the channel-wise mean and variance of $x$ to match those of $y$. Unlike [Batch Normalization](https://paperswithcode.com/method/batch-normalization), Instance Normalization or Conditional Instance Normalization, AdaIN has no learnable affine parameters. Instead, it adaptively computes the affine parameters from the style input:\r\n\r\n$$\r\n\\textrm{AdaIN}(x, y)= \\sigma(y)\\left(\\frac{x-\\mu(x)}{\\sigma(x)}\\right)+\\mu(y)\r\n$$" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "WGAN-GP Loss" - full_name: "WGAN-GP Loss" - description: "**Wasserstein Gradient Penalty Loss**, or **WGAN-GP Loss**, is a loss used for generative adversarial networks that augments the Wasserstein loss with a gradient norm penalty for random samples $\\mathbf{\\hat{x}} \\sim \\mathbb{P}\\_{\\hat{\\mathbf{x}}}$ to achieve Lipschitz continuity:\r\n\r\n$$ L = \\mathbb{E}\\_{\\mathbf{\\hat{x}} \\sim \\mathbb{P}\\_{g}}\\left[D\\left(\\tilde{\\mathbf{x}}\\right)\\right] - \\mathbb{E}\\_{\\mathbf{x} \\sim \\mathbb{P}\\_{r}}\\left[D\\left(\\mathbf{x}\\right)\\right] + \\lambda\\mathbb{E}\\_{\\mathbf{\\hat{x}} \\sim \\mathbb{P}\\_{\\hat{\\mathbf{x}}}}\\left[\\left(||\\nabla\\_{\\tilde{\\mathbf{x}}}D\\left(\\mathbf{\\tilde{x}}\\right)||\\_{2}-1\\right)^{2}\\right]$$\r\n\r\nIt was introduced as part of the [WGAN-GP](https://paperswithcode.com/method/wgan-gp) overall model." - } - methods: { - name: "R1 Regularization" - full_name: "R1 Regularization" - description: "**R$\\_{1}$ Regularization** is a regularization technique and gradient penalty for training [generative adversarial networks](https://paperswithcode.com/methods/category/generative-adversarial-networks). It penalizes the discriminator from deviating from the Nash Equilibrium via penalizing the gradient on real data alone: when the generator distribution produces the true data distribution and the discriminator is equal to 0 on the data manifold, the gradient penalty ensures that the discriminator cannot create a non-zero gradient orthogonal to the data manifold without suffering a loss in the GAN game.\r\n\r\nThis leads to the following regularization term:\r\n\r\n$$ R\\_{1}\\left(\\psi\\right) = \\frac{\\gamma}{2}E\\_{p\\_{D}\\left(x\\right)}\\left[||\\nabla{D\\_{\\psi}\\left(x\\right)}||^{2}\\right] $$" - } - methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." - } - methods: { - name: "StyleGAN" - full_name: "StyleGAN" - description: "**StyleGAN** is a type of generative adversarial network. It uses an alternative generator architecture for generative adversarial networks, borrowing from style transfer literature; in particular, the use of adaptive instance normalization. Otherwise it follows Progressive GAN in using a progressively growing training regime. Other quirks include the fact it generates from a fixed value tensor not stochastically generated latent variables as in regular GANs. The stochastically generated latent variables are used as style vectors in the adaptive instance normalization at each resolution after being transformed by an 8-layer feedforward network. Lastly, it employs a form of regularization called mixing regularization, which mixes two style latent variables during training." - } - } - video: { - video_id: "TWzEbMrH59o" - video_title: "PR-131: A Style-Based Generator Architecture for Generative Adversarial Networks" - number_of_likes: 64 - number_of_views: 3881 - published_date: { - seconds: 1546903803 - } - uploader: "정지원" - } - } -} -pr_id_to_video: { - key: 132 - value: { - papers: { - paper_id: "ssd-single-shot-multibox-detector" - title: "SSD: Single Shot MultiBox Detector" - arxiv_id: "1512.02325" - abstract: "We present a method for detecting objects in images using a single deep\nneural network. Our approach, named SSD, discretizes the output space of\nbounding boxes into a set of default boxes over different aspect ratios and\nscales per feature map location. At prediction time, the network generates\nscores for the presence of each object category in each default box and\nproduces adjustments to the box to better match the object shape. Additionally,\nthe network combines predictions from multiple feature maps with different\nresolutions to naturally handle objects of various sizes. Our SSD model is\nsimple relative to methods that require object proposals because it completely\neliminates proposal generation and subsequent pixel or feature resampling stage\nand encapsulates all computation in a single network. This makes SSD easy to\ntrain and straightforward to integrate into systems that require a detection\ncomponent. Experimental results on the PASCAL VOC, MS COCO, and ILSVRC datasets\nconfirm that SSD has comparable accuracy to methods that utilize an additional\nobject proposal step and is much faster, while providing a unified framework\nfor both training and inference. Compared to other single stage methods, SSD\nhas much better accuracy, even with a smaller input image size. For $300\\times\n300$ input, SSD achieves 72.1% mAP on VOC2007 test at 58 FPS on a Nvidia Titan\nX and for $500\\times 500$ input, SSD achieves 75.1% mAP, outperforming a\ncomparable state of the art Faster R-CNN model. Code is available at\nhttps://github.com/weiliu89/caffe/tree/ssd ." - pub_date: { - seconds: 1449532800 - } - authors: "Wei Liu" - authors: "Dragomir Anguelov" - authors: "Dumitru Erhan" - authors: "Christian Szegedy" - authors: "Scott Reed" - authors: "Cheng-Yang Fu" - authors: "Alexander C. Berg" - repositories: { - url: "https://github.com/huytranvan2010/SSD" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - } - repositories: { - url: "https://github.com/stevensmiley1989/MrRobot" - framework: FRAMEWORK_TENSORFLOW - description: "This is a robot I designed in Fusion 360 and 3D printed with my FlashForge Creator Pro in PLA, Main Hardware: 1 x Raspberry Pi 3b, 3 x Arduinos with I2C, 5 x ultrasonic sensors, 4 x 60Kg Servos, 4 x 12V 200rpm DC motors, 1 x stepper motor for loading ammo into custom built coil gun. The coil gun uses 2 x 450V 1000uF Capacitors in parallel with a boost converter, yielding 380V maximum charge discharge from a 12V input, firing with a 1.2kV maximum peak non-repetitive surge current 1.1kA rated Thyristor SCR, Main Software: Uses TensorFlow and Python for Object Detection with some C++ for motor controls. The model used is a retrained Single Shot Detection MobileNet V2 algorithm trained on a toy reindeer. Signal processing allows proportional controller feedback to adjust movement of the robot for moving, aiming, and shooting. An application for IOS was written in Swift to control the robot as well, using Mosquito MQTT Broker for communication. " - } - repositories: { - url: "https://github.com/birosjh/pytorch_ssd" - framework: FRAMEWORK_PYTORCH - description: "A project for me to play around and experiment with the different components of the Single Shot Multibox Detector." - } - repositories: { - url: "https://github.com/Chubbyman2/SSD_MobileNet_Hand_Tracker" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "A hand tracker created using OpenCV and a re-trained SSD MobileNet v2 via transfer learning on the EgoHands Dataset." - } - repositories: { - url: "https://github.com/serengil/deepface" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1690 - description: "A Lightweight Deep Face Recognition and Facial Attribute Analysis (Age, Gender, Emotion and Race) Framework for Python" - } - repositories: { - url: "https://github.com/AmirDavoodi/Hand-Gestures-Human-Robot-Interaction" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "This project is the final project of the course Robotics 2019 and we are implementing hand gesture classifier to using it for controlling Mighty Thymio robot which is a differential robot." - } - repositories: { - url: "https://github.com/bleedingfight/caffe-env" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/KostadinovShalon/UAVDetectionTrackingBenchmark" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - } - repositories: { - url: "https://github.com/jaykshirsagar05/CrowdCounting" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/ashwath007/amenity-detection" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "aminity-detection" - } - methods: { - name: "SGD with Momentum" - full_name: "SGD with Momentum" - description: "**SGD with Momentum** is a stochastic optimization method that adds a momentum term to regular stochastic gradient descent:\r\n\r\n$$v\\_{t} = \\gamma{v}\\_{t-1} + \\eta\\nabla\\_{\\theta}J\\left(\\theta\\right)$$\r\n$$\\theta\\_{t} = \\theta\\_{t-1} - v\\_{t} $$\r\n\r\nA typical value for $\\gamma$ is $0.9$. The momentum name comes from an analogy to physics, such as ball accelerating down a slope. In the case of weight updates, we can think of the weights as a particle traveling through parameter space which incurs acceleration from the gradient of the loss.\r\n\r\nImage Source: [Juan Du](https://www.researchgate.net/figure/The-compare-of-the-SGD-algorithms-with-and-without-momentum-Take-Task-1-as-example-The_fig1_333469047)" - } - methods: { - name: "SSD" - full_name: "SSD" - description: "**SSD** is a single-stage object detection method that discretizes the output space of bounding boxes into a set of default boxes over different aspect ratios and scales per feature map location. At prediction time, the network generates scores for the presence of each object category in each default box and produces adjustments to the box to better match the object shape. Additionally, the network combines predictions from multiple feature maps with different resolutions to naturally handle objects of various sizes. \r\n\r\nThe fundamental improvement in speed comes from eliminating bounding box proposals and the subsequent pixel or feature resampling stage. Improvements over competing single-stage methods include using a small convolutional filter to predict object categories and offsets in bounding box locations, using separate predictors (filters) for different aspect ratio detections, and applying these filters to multiple feature maps from the later stages of a network in order to perform detection at multiple scales." - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "Non Maximum Suppression" - full_name: "Non Maximum Suppression" - description: "**Non Maximum Suppression** is a computer vision method that selects a single entity out of many overlapping entities (for example bounding boxes in object detection). The criteria is usually discarding entities that are below a given probability bound. With remaining entities we repeatedly pick the entity with the highest probability, output that as the prediction, and discard any remaining box where a $\\text{IoU} \\geq 0.5$ with the box output in the previous step.\r\n\r\nImage Credit: [Martin Kersner](https://github.com/martinkersner/non-maximum-suppression-cpp)" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "VGG" - full_name: "VGG" - description: "**VGG** is a classical convolutional neural network architecture. It was based on an analysis of how to increase the depth of such networks. The network utilises small 3 x 3 filters. Otherwise the network is characterized by its simplicity: the only other components being pooling layers and a fully connected layer.\r\n\r\nImage: [Davi Frossard](https://www.cs.toronto.edu/frossard/post/vgg16/)" - } - methods: { - name: "Max Pooling" - full_name: "Max Pooling" - description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" - } - methods: { - name: "Weight Decay" - full_name: "Weight Decay" - description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - } - video: { - video_id: "ej1ISEoAK5g" - video_title: "PR-132: SSD: Single Shot MultiBox Detector" - number_of_likes: 118 - number_of_views: 10398 - published_date: { - seconds: 1546786878 - } - uploader: "JinWon Lee" - } - } -} -pr_id_to_video: { - key: 133 - value: { - papers: { - paper_id: "accurate-large-minibatch-sgd-training" - title: "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour" - arxiv_id: "1706.02677" - abstract: "Deep learning thrives with large neural networks and large datasets. However,\nlarger networks and larger datasets result in longer training times that impede\nresearch and development progress. Distributed synchronous SGD offers a\npotential solution to this problem by dividing SGD minibatches over a pool of\nparallel workers. Yet to make this scheme efficient, the per-worker workload\nmust be large, which implies nontrivial growth in the SGD minibatch size. In\nthis paper, we empirically show that on the ImageNet dataset large minibatches\ncause optimization difficulties, but when these are addressed the trained\nnetworks exhibit good generalization. Specifically, we show no loss of accuracy\nwhen training with large minibatch sizes up to 8192 images. To achieve this\nresult, we adopt a hyper-parameter-free linear scaling rule for adjusting\nlearning rates as a function of minibatch size and develop a new warmup scheme\nthat overcomes optimization challenges early in training. With these simple\ntechniques, our Caffe2-based system trains ResNet-50 with a minibatch size of\n8192 on 256 GPUs in one hour, while matching small minibatch accuracy. Using\ncommodity hardware, our implementation achieves ~90% scaling efficiency when\nmoving from 8 to 256 GPUs. Our findings enable training visual recognition\nmodels on internet-scale data with high efficiency." - pub_date: { - seconds: 1496880000 - } - authors: "Priya Goyal" - authors: "Piotr Dollár" - authors: "Ross Girshick" - authors: "Pieter Noordhuis" - authors: "Lukasz Wesolowski" - authors: "Aapo Kyrola" - authors: "Andrew Tulloch" - authors: "Yangqing Jia" - authors: "Kaiming He" - repositories: { - url: "https://github.com/luminxu/ViPNAS" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "The official repo for CVPR2021——ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search." - } - repositories: { - url: "https://github.com/nerminsamet/HPRNet" - framework: FRAMEWORK_PYTORCH - number_of_stars: 19 - } - repositories: { - url: "https://github.com/IVRL/FG-NIC" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "Fidelity-Guided Noisy Image Classification" - } - repositories: { - url: "https://github.com/vycezhong/byteps-compress" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/YeLyuUT/VOSDetectron" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "Combination of Mask RCNN with ConvGRU for video object segmentation" - } - repositories: { - url: "https://github.com/MarcAntoineAlex/darts" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/kikacaty/adv_guide" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/HRNet/Lite-HRNet" - framework: FRAMEWORK_PYTORCH - number_of_stars: 340 - description: "This is an official pytorch implementation of Lite-HRNet: A Lightweight High-Resolution Network. " - } - repositories: { - url: "https://github.com/serend1p1ty/SeqNet" - framework: FRAMEWORK_PYTORCH - number_of_stars: 113 - description: "Code for AAAI 2021 paper: Sequential End-to-end Network for Efficient Person Search" - } - repositories: { - url: "https://github.com/ericyang789/Parallel-Compute-Project" - framework: FRAMEWORK_OTHERS - description: "C implementation of t-SNE with parallelization optimization" - } - methods: { - name: "SGD" - full_name: "Stochastic Gradient Descent" - description: "**Stochastic Gradient Descent** is an iterative optimization technique that uses minibatches of data to form an expectation of the gradient, rather than the full gradient using all available data. That is for weights $w$ and a loss function $L$ we have:\r\n\r\n$$ w\\_{t+1} = w\\_{t} - \\eta\\hat{\\nabla}\\_{w}{L(w\\_{t})} $$\r\n\r\nWhere $\\eta$ is a learning rate. SGD reduces redundancy compared to batch gradient descent - which recomputes gradients for similar examples before each parameter update - so it is usually much faster.\r\n\r\n(Image Source: [here](http://rasbt.github.io/mlxtend/user_guide/general_concepts/gradient-optimization/))" - } - } - video: { - video_id: "g3McZgloCJo" - video_title: "PR-133: Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour" - number_of_likes: 9 - number_of_views: 652 - published_date: { - seconds: 1547454308 - } - uploader: "Young Seok Kim" - } - } -} -pr_id_to_video: { - key: 134 - value: { - papers: { - paper_id: "how-does-batch-normalization-help" - title: "How Does Batch Normalization Help Optimization?" - arxiv_id: "1805.11604" - abstract: "Batch Normalization (BatchNorm) is a widely adopted technique that enables\nfaster and more stable training of deep neural networks (DNNs). Despite its\npervasiveness, the exact reasons for BatchNorm's effectiveness are still poorly\nunderstood. The popular belief is that this effectiveness stems from\ncontrolling the change of the layers' input distributions during training to\nreduce the so-called \"internal covariate shift\". In this work, we demonstrate\nthat such distributional stability of layer inputs has little to do with the\nsuccess of BatchNorm. Instead, we uncover a more fundamental impact of\nBatchNorm on the training process: it makes the optimization landscape\nsignificantly smoother. This smoothness induces a more predictive and stable\nbehavior of the gradients, allowing for faster training." - pub_date: { - seconds: 1527552000 - } - authors: "Shibani Santurkar" - authors: "Dimitris Tsipras" - authors: "Andrew Ilyas" - authors: "Aleksander Madry" - repositories: { - url: "https://github.com/yaoshiang/MobileNetV2-CIFAR-Cleverhans" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/AchintyaX/Brain_tumor_segmentation" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - } - repositories: { - url: "https://github.com/utsawk/CarND-Traffic-Sign-Classifier-Project" - framework: FRAMEWORK_OTHERS - description: "Udacity CarND Traffic Sign Classifier Project" - } - repositories: { - url: "https://github.com/jadevaibhav/Brain-Tumor-Segmentation-using-Deep-Neural-networks" - framework: FRAMEWORK_OTHERS - number_of_stars: 79 - description: "Keras implementation of paper by the same name" - } - repositories: { - url: "https://github.com/peteraugustine/seg3" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/ajinas-ibrahim/brain_tumor" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/tobinthankachan1/exam1" - framework: FRAMEWORK_OTHERS - } - } - video: { - video_id: "hiN0IMM50FM" - video_title: "PR-134 How Does Batch Normalization Help Optimization?" - number_of_likes: 14 - number_of_views: 1001 - published_date: { - seconds: 1548117640 - } - uploader: "visionNoob" - } - } -} -pr_id_to_video: { - key: 135 - value: { - papers: { - paper_id: "photo-wake-up-3d-character-animation-from-a" - title: "Photo Wake-Up: 3D Character Animation from a Single Photo" - arxiv_id: "1812.02246" - abstract: "We present a method and application for animating a human subject from a\nsingle photo. E.g., the character can walk out, run, sit, or jump in 3D. The\nkey contributions of this paper are: 1) an application of viewing and animating\nhumans in single photos in 3D, 2) a novel 2D warping method to deform a posable\ntemplate body model to fit the person's complex silhouette to create an\nanimatable mesh, and 3) a method for handling partial self occlusions. We\ncompare to state-of-the-art related methods and evaluate results with human\nstudies. Further, we present an interactive interface that allows re-posing the\nperson in 3D, and an augmented reality setup where the animated 3D person can\nemerge from the photo into the real world. We demonstrate the method on photos,\nposters, and art." - pub_date: { - seconds: 1543968000 - } - authors: "Chung-Yi Weng" - authors: "Brian Curless" - authors: "Ira Kemelmacher-Shlizerman" - } - video: { - video_id: "LSlBoNNbULg" - video_title: "PR-135: Photo Wake-Up: 3D Character Animation from a Single Photo" - number_of_likes: 55 - number_of_views: 2976 - published_date: { - seconds: 1548003936 - } - uploader: "Soyeon Kim" - } - } -} -pr_id_to_video: { - key: 136 - value: { - papers: { - paper_id: "self-supervised-generative-adversarial" - title: "Self-Supervised GANs via Auxiliary Rotation Loss" - arxiv_id: "1811.11212" - abstract: "Conditional GANs are at the forefront of natural image synthesis. The main\ndrawback of such models is the necessity for labeled data. In this work we\nexploit two popular unsupervised learning techniques, adversarial training and\nself-supervision, and take a step towards bridging the gap between conditional\nand unconditional GANs. In particular, we allow the networks to collaborate on\nthe task of representation learning, while being adversarial with respect to\nthe classic GAN game. The role of self-supervision is to encourage the\ndiscriminator to learn meaningful feature representations which are not\nforgotten during training. We test empirically both the quality of the learned\nimage representations, and the quality of the synthesized images. Under the\nsame conditions, the self-supervised GAN attains a similar performance to\nstate-of-the-art conditional counterparts. Finally, we show that this approach\nto fully unsupervised learning can be scaled to attain an FID of 23.4 on\nunconditional ImageNet generation." - pub_date: { - seconds: 1543276800 - } - authors: "Ting Chen" - authors: "Xiaohua Zhai" - authors: "Marvin Ritter" - authors: "Mario Lucic" - authors: "Neil Houlsby" - repositories: { - is_official: true - url: "https://github.com/google/compare_gan" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1731 - description: "Compare GAN code." - } - repositories: { - url: "https://github.com/zhangqianhui/Self-Supervised-GANs" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 30 - description: "Tensorflow Implementation for paper \"self-supervised generative adversarial networks\"" - } - repositories: { - url: "https://github.com/vandit15/Self-Supervised-Gans-Pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 50 - description: "Ready to train Pytorch implementation of the CVPR'19 paper \"Self-Supervised GANs via Auxiliary Rotation Loss\"" - } - methods: { - name: "GAN" - full_name: "Generative Adversarial Network" - description: "A **GAN**, or **Generative Adversarial Network**, is a generative model that simultaneously trains\r\ntwo models: a generative model $G$ that captures the data distribution, and a discriminative model $D$ that estimates the\r\nprobability that a sample came from the training data rather than $G$.\r\n\r\nThe training procedure for $G$ is to maximize the probability of $D$ making\r\na mistake. This framework corresponds to a minimax two-player game. In the\r\nspace of arbitrary functions $G$ and $D$, a unique solution exists, with $G$\r\nrecovering the training data distribution and $D$ equal to $\\frac{1}{2}$\r\neverywhere. In the case where $G$ and $D$ are defined by multilayer perceptrons,\r\nthe entire system can be trained with backpropagation. \r\n\r\n(Image Source: [here](http://www.kdnuggets.com/2017/01/generative-adversarial-networks-hot-topic-machine-learning.html))" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - } - video: { - video_id: "_wpDP-6afM4" - video_title: "PR-136 Self-Supervised Generative Adversarial Networks" - number_of_likes: 23 - number_of_views: 1085 - published_date: { - seconds: 1547995361 - } - uploader: "강민국" - } - } -} -pr_id_to_video: { - key: 137 - value: { - papers: { - paper_id: "mine-mutual-information-neural-estimation" - title: "MINE: Mutual Information Neural Estimation" - arxiv_id: "1801.04062" - abstract: "We argue that the estimation of mutual information between high dimensional\ncontinuous random variables can be achieved by gradient descent over neural\nnetworks. We present a Mutual Information Neural Estimator (MINE) that is\nlinearly scalable in dimensionality as well as in sample size, trainable\nthrough back-prop, and strongly consistent. We present a handful of\napplications on which MINE can be used to minimize or maximize mutual\ninformation. We apply MINE to improve adversarially trained generative models.\nWe also use MINE to implement Information Bottleneck, applying it to supervised\nclassification; our results demonstrate substantial improvement in flexibility\nand performance in these settings." - pub_date: { - seconds: 1515715200 - } - authors: "Mohamed Ishmael Belghazi" - authors: "Aristide Baratin" - authors: "Sai Rajeswar" - authors: "Sherjil Ozair" - authors: "Yoshua Bengio" - authors: "Aaron Courville" - authors: "R Devon Hjelm" - repositories: { - url: "https://github.com/ahujak/KKLE" - framework: FRAMEWORK_OTHERS - description: "Estimating KL Divergence" - } - repositories: { - url: "https://github.com/sambklein/MINE_demo" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/gtegner/hyper-gan" - framework: FRAMEWORK_PYTORCH - description: "Uncertainty Estimation with HyperGANS in PyTorch!" - } - repositories: { - url: "https://github.com/MasanoriYamada/Mine_pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 156 - description: "MINE: Mutual Information Neural Estimation in pytorch (unofficial)" - } - repositories: { - url: "https://github.com/mzgubic/MINE" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 37 - description: "Mutual Information Neural Estimator implemented in Tensorflow" - } - repositories: { - url: "https://github.com/csliuwei/Emotion_MI" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/Avi-avidan/MINE" - framework: FRAMEWORK_PYTORCH - description: "Multi Information Neural Encoder " - } - repositories: { - url: "https://github.com/dizcza/EmbedderSDR" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "Embedder with binary sparse distributed representation." - } - repositories: { - url: "https://github.com/shannonycj/simple-mine" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "A tensorflow implementation of Mutual Information Nerual Estimation" - } - repositories: { - url: "https://github.com/ChengzhangZhu/MINE" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 14 - description: "Keras implementation (only for tensorflow backend) of MINE: Mutual Information Neural Estimation" - } - } - video: {} - } -} -pr_id_to_video: { - key: 138 - value: { - papers: { - paper_id: "overcoming-limitations-of-mixture-density-1" - title: "Overcoming Limitations of Mixture Density Networks: A Sampling and Fitting Framework for Multimodal Future Prediction" - arxiv_id: "1906.03631" - abstract: "Future prediction is a fundamental principle of intelligence that helps plan actions and avoid possible dangers. As the future is uncertain to a large extent, modeling the uncertainty and multimodality of the future states is of great relevance. Existing approaches are rather limited in this regard and mostly yield a single hypothesis of the future or, at the best, strongly constrained mixture components that suffer from instabilities in training and mode collapse. In this work, we present an approach that involves the prediction of several samples of the future with a winner-takes-all loss and iterative grouping of samples to multiple modes. Moreover, we discuss how to evaluate predicted multimodal distributions, including the common real scenario, where only a single sample from the ground-truth distribution is available for evaluation. We show on synthetic and real data that the proposed approach triggers good estimates of multimodal distributions and avoids mode collapse. Source code is available at $\\href{https://github.com/lmb-freiburg/Multimodal-Future-Prediction}{\\text{this https URL.}}$" - pub_date: { - seconds: 1560038400 - } - authors: "Osama Makansi" - authors: "Eddy Ilg" - authors: "Özgün Cicek" - authors: "Thomas Brox" - repositories: { - is_official: true - url: "https://github.com/lmb-freiburg/Multimodal-Future-Prediction" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 27 - description: "The official repository for the CVPR 2019 paper \"Overcoming Limitations of Mixture Density Networks: A Sampling and Fitting Framework for Multimodal Future Prediction\"" - } - } - video: { - video_id: "VORJQQUphuw" - video_title: "PR-138: Mixture Density Network" - number_of_views: 2387 - published_date: { - seconds: 1548599784 - } - uploader: "Byung-Hak Kim" - } - } -} -pr_id_to_video: { - key: 139 - value: { - papers: { - paper_id: "fully-convolutional-siamese-networks-for-1" - title: "Fully-Convolutional Siamese Networks for Object Tracking" - arxiv_id: "1606.09549" - abstract: "The problem of arbitrary object tracking has traditionally been tackled by\nlearning a model of the object's appearance exclusively online, using as sole\ntraining data the video itself. Despite the success of these methods, their\nonline-only approach inherently limits the richness of the model they can\nlearn. Recently, several attempts have been made to exploit the expressive\npower of deep convolutional networks. However, when the object to track is not\nknown beforehand, it is necessary to perform Stochastic Gradient Descent online\nto adapt the weights of the network, severely compromising the speed of the\nsystem. In this paper we equip a basic tracking algorithm with a novel\nfully-convolutional Siamese network trained end-to-end on the ILSVRC15 dataset\nfor object detection in video. Our tracker operates at frame-rates beyond\nreal-time and, despite its extreme simplicity, achieves state-of-the-art\nperformance in multiple benchmarks." - pub_date: { - seconds: 1467244800 - } - authors: "Luca Bertinetto" - authors: "Jack Valmadre" - authors: "João F. Henriques" - authors: "Andrea Vedaldi" - authors: "Philip H. S. Torr" - repositories: { - url: "https://github.com/logiklesuraj/siamfcex" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/logiklesuraj/SiamFC" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/suraj-maniyar/Object-Tracking-SSD300" - framework: FRAMEWORK_PYTORCH - description: "Object tracking using SSD" - } - repositories: { - url: "https://github.com/zllrunning/SiameseX.PyTorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 340 - description: "A simplified PyTorch implementation of Siamese networks for tracking: SiamFC, SiamRPN, SiamRPN++, SiamVGG, SiamDW, SiamRPN-VGG." - } - repositories: { - url: "https://github.com/shallowtoil/DROL" - framework: FRAMEWORK_PYTORCH - number_of_stars: 58 - description: "Discriminative and Robust Online Learning for Siamese Visual Tracking [AAAI2020]" - } - methods: { - name: "Siamese Network" - full_name: "Siamese Network" - description: "A **Siamese Network** consists of twin networks which accept distinct inputs but are joined by an energy function at the top. This function computes a metric between the highest level feature representation on each side. The parameters between the twin networks are tied. Weight tying guarantees that two extremely similar images are not mapped by each network to very different locations in feature space because each network computes the same function. The network is symmetric, so that whenever we present two distinct images to the twin networks, the top conjoining layer will compute the same metric as if we were to we present the same two images but to the opposite twins.\r\n\r\nIntuitively instead of trying to classify inputs, a siamese network learns to differentiate between inputs, learning their similarity. The loss function used is usually a form of contrastive loss.\r\n\r\nSource: [Koch et al](https://www.cs.cmu.edu/~rsalakhu/papers/oneshot1.pdf)" - } - } - video: { - video_id: "dv5yUl6Lw1g" - video_title: "PR-139: Fully Convolutional Siamese Networks for Object Tracking" - number_of_likes: 38 - number_of_views: 2849 - published_date: { - seconds: 1549845265 - } - uploader: "Taeoh Kim" - } - } -} -pr_id_to_video: { - key: 140 - value: { - papers: { - paper_id: "training-set-debugging-using-trusted-items" - title: "Training Set Debugging Using Trusted Items" - arxiv_id: "1801.08019" - abstract: "Training set bugs are flaws in the data that adversely affect machine\nlearning. The training set is usually too large for man- ual inspection, but\none may have the resources to verify a few trusted items. The set of trusted\nitems may not by itself be adequate for learning, so we propose an algorithm\nthat uses these items to identify bugs in the training set and thus im- proves\nlearning. Specifically, our approach seeks the smallest set of changes to the\ntraining set labels such that the model learned from this corrected training\nset predicts labels of the trusted items correctly. We flag the items whose\nlabels are changed as potential bugs, whose labels can be checked for veracity\nby human experts. To find the bugs in this way is a challenging combinatorial\nbilevel optimization problem, but it can be relaxed into a continuous\noptimization problem. Ex- periments on toy and real data demonstrate that our\napproach can identify training set bugs effectively and suggest appro- priate\nchanges to the labels. Our algorithm is a step toward trustworthy machine\nlearning." - pub_date: { - seconds: 1516752000 - } - authors: "Xuezhou Zhang" - authors: "Xiaojin Zhu" - authors: "Stephen J. Wright" - } - video: { - video_id: "_2l2UFIF08Q" - video_title: "PR-140: Training Set Debugging Using Trusted Items" - number_of_likes: 5 - number_of_views: 613 - published_date: { - seconds: 1549810486 - } - uploader: "taesu" - } - } -} -pr_id_to_video: { - key: 141 - value: { - papers: { - paper_id: "auto-deeplab-hierarchical-neural-architecture" - title: "Auto-DeepLab: Hierarchical Neural Architecture Search for Semantic Image Segmentation" - arxiv_id: "1901.02985" - abstract: "Recently, Neural Architecture Search (NAS) has successfully identified neural\nnetwork architectures that exceed human designed ones on large-scale image\nclassification. In this paper, we study NAS for semantic image segmentation.\nExisting works often focus on searching the repeatable cell structure, while\nhand-designing the outer network structure that controls the spatial resolution\nchanges. This choice simplifies the search space, but becomes increasingly\nproblematic for dense image prediction which exhibits a lot more network level\narchitectural variations. Therefore, we propose to search the network level\nstructure in addition to the cell level structure, which forms a hierarchical\narchitecture search space. We present a network level search space that\nincludes many popular designs, and develop a formulation that allows efficient\ngradient-based architecture search (3 P100 GPU days on Cityscapes images). We\ndemonstrate the effectiveness of the proposed method on the challenging\nCityscapes, PASCAL VOC 2012, and ADE20K datasets. Auto-DeepLab, our\narchitecture searched specifically for semantic image segmentation, attains\nstate-of-the-art performance without any ImageNet pretraining." - pub_date: { - seconds: 1547078400 - } - authors: "Chenxi Liu" - authors: "Liang-Chieh Chen" - authors: "Florian Schroff" - authors: "Hartwig Adam" - authors: "Wei Hua" - authors: "Alan Yuille" - authors: "Li Fei-Fei" - repositories: { - is_official: true - url: "https://github.com/tensorflow/models" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70333 - description: "Models and examples built with TensorFlow" - } - repositories: { - url: "https://github.com/Dawars/auto_deeplab-pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 29 - description: "[wip] Implementation of found architecture in Auto Deeplab" - } - repositories: { - url: "https://github.com/MenghaoGuo/AutoDeeplab" - framework: FRAMEWORK_PYTORCH - number_of_stars: 380 - description: "Pytorch Implementation the paper Auto-DeepLab Hierarchical Neural Architecture Search for Semantic Image Segmentation" - } - repositories: { - url: "https://github.com/NoamRosenberg/autodeeplab" - framework: FRAMEWORK_PYTORCH - number_of_stars: 270 - description: "AutoDeeplab / auto-deeplab / AutoML for semantic segmentation, implemented in Pytorch" - } - methods: { - name: "Tanh Activation" - full_name: "Tanh Activation" - description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "LSTM" - full_name: "Long Short-Term Memory" - description: "An **LSTM** is a type of [recurrent neural network](https://paperswithcode.com/methods/category/recurrent-neural-networks) that addresses the vanishing gradient problem in vanilla RNNs through additional cells, input and output gates. Intuitively, vanishing gradients are solved through additional *additive* components, and forget gate activations, that allow the gradients to flow through the network without vanishing as quickly.\r\n\r\n(Image Source [here](https://medium.com/datadriveninvestor/how-do-lstm-networks-solve-the-problem-of-vanishing-gradients-a6784971a577))\r\n\r\n(Introduced by Hochreiter and Schmidhuber)" - } - methods: { - name: "Sigmoid Activation" - full_name: "Sigmoid Activation" - description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." - } - } - video: { - video_id: "ltlhQXHGzgE" - video_title: "PR-141: Auto-DeepLab: Hierarchical Neural Architecture Search for Semantic Image Segmentation" - number_of_likes: 19 - number_of_views: 1693 - published_date: { - seconds: 1550413961 - } - uploader: "Taekmin Kim" - } - } -} -pr_id_to_video: { - key: 142 - value: { - papers: { - paper_id: "wasserstein-gan" - title: "Wasserstein GAN" - arxiv_id: "1701.07875" - abstract: "We introduce a new algorithm named WGAN, an alternative to traditional GAN\ntraining. In this new model, we show that we can improve the stability of\nlearning, get rid of problems like mode collapse, and provide meaningful\nlearning curves useful for debugging and hyperparameter searches. Furthermore,\nwe show that the corresponding optimization problem is sound, and provide\nextensive theoretical work highlighting the deep connections to other distances\nbetween distributions." - pub_date: { - seconds: 1485388800 - } - authors: "Martin Arjovsky" - authors: "Soumith Chintala" - authors: "Léon Bottou" - repositories: { - url: "https://github.com/bhargavajs07/Packed-Wasserstein-GAN-with-GradientPenalty-Example" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/VitoRazor/Gan_Architecture" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - } - repositories: { - url: "https://github.com/sanghyun-son/EDSR-PyTorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1654 - description: "PyTorch version of the paper 'Enhanced Deep Residual Networks for Single Image Super-Resolution' (CVPRW 2017) " - } - repositories: { - url: "https://github.com/shekkizh/WassersteinGAN.tensorflow" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 419 - description: "Tensorflow implementation of Wasserstein GAN - arxiv: https://arxiv.org/abs/1701.07875" - } - repositories: { - url: "https://github.com/lab-ml/annotated_deep_learning_paper_implementations/tree/master/labml_nn/gan/wasserstein" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3053 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." - } - repositories: { - url: "https://github.com/Ars235/Novelty_Detection" - framework: FRAMEWORK_PYTORCH - description: "PyTorch implementation of Adversarially Learned One-Class Classifier for Novelty Detection" - } - repositories: { - url: "https://github.com/ChristophReich1996/Mode_Collapse" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "Mode collapse example of GANs in 2D (PyTorch)." - } - repositories: { - url: "https://github.com/rkem1542/EDSR-pytorch" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/kynk94/TF2-Image-Generation" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 9 - description: "Tensorflow2 reimplementation of image generation model (GAN, Style Transfer, Image to Image Translation, etc)" - } - repositories: { - url: "https://github.com/laowng/GISR" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "From EDSR" - } - methods: { - name: "WGAN" - full_name: "Wasserstein GAN" - description: "**Wasserstein GAN**, or **WGAN**, is a type of generative adversarial network that minimizes an approximation of the Earth-Mover's distance (EM) rather than the Jensen-Shannon divergence as in the original GAN formulation. It leads to more stable training than original GANs with less evidence of mode collapse, as well as meaningful curves that can be used for debugging and searching hyperparameters." - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - } - video: { - video_id: "tKQwlf-DAl0" - video_title: "PR-142: Wasserstein GAN" - number_of_likes: 34 - number_of_views: 1980 - published_date: { - seconds: 1550412193 - } - uploader: "Jinsung Yoon" - } - } -} -pr_id_to_video: { - key: 143 - value: { - papers: { - paper_id: "recurrent-world-models-facilitate-policy" - title: "Recurrent World Models Facilitate Policy Evolution" - arxiv_id: "1809.01999" - abstract: "A generative recurrent neural network is quickly trained in an unsupervised\nmanner to model popular reinforcement learning environments through compressed\nspatio-temporal representations. The world model's extracted features are fed\ninto compact and simple policies trained by evolution, achieving state of the\nart results in various environments. We also train our agent entirely inside of\nan environment generated by its own internal world model, and transfer this\npolicy back into the actual environment. Interactive version of paper at\nhttps://worldmodels.github.io" - pub_date: { - seconds: 1536019200 - } - authors: "David Ha" - authors: "Jürgen Schmidhuber" - } - video: { - video_id: "APjGjwBR6o8" - video_title: "PR-143: Recurrent World Models Facilitate Policy Evolution" - number_of_likes: 8 - number_of_views: 550 - published_date: { - seconds: 1551026446 - } - uploader: "정지원" - } - } -} -pr_id_to_video: { - key: 144 - value: { - papers: { - paper_id: "squeezenext-hardware-aware-neural-network" - title: "SqueezeNext: Hardware-Aware Neural Network Design" - arxiv_id: "1803.10615" - abstract: "One of the main barriers for deploying neural networks on embedded systems\nhas been large memory and power consumption of existing neural networks. In\nthis work, we introduce SqueezeNext, a new family of neural network\narchitectures whose design was guided by considering previous architectures\nsuch as SqueezeNet, as well as by simulation results on a neural network\naccelerator. This new network is able to match AlexNet's accuracy on the\nImageNet benchmark with $112\\times$ fewer parameters, and one of its deeper\nvariants is able to achieve VGG-19 accuracy with only 4.4 Million parameters,\n($31\\times$ smaller than VGG-19). SqueezeNext also achieves better top-5\nclassification accuracy with $1.3\\times$ fewer parameters as compared to\nMobileNet, but avoids using depthwise-separable convolutions that are\ninefficient on some mobile processor platforms. This wide range of accuracy\ngives the user the ability to make speed-accuracy tradeoffs, depending on the\navailable resources on the target hardware. Using hardware simulation results\nfor power and inference speed on an embedded system has guided us to design\nvariations of the baseline model that are $2.59\\times$/$8.26\\times$ faster and\n$2.25\\times$/$7.5\\times$ more energy efficient as compared to\nSqueezeNet/AlexNet without any accuracy degradation." - pub_date: { - seconds: 1521763200 - } - authors: "Amir Gholami" - authors: "Kiseok Kwon" - authors: "Bichen Wu" - authors: "Zizheng Tai" - authors: "Xiangyu Yue" - authors: "Peter Jin" - authors: "Sicheng Zhao" - authors: "Kurt Keutzer" - repositories: { - url: "https://github.com/osmr/imgclsmob" - framework: FRAMEWORK_OTHERS - number_of_stars: 2200 - description: "Sandbox for training deep learning networks" - } - repositories: { - url: "https://github.com/luuuyi/SqueezeNext.PyTorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 25 - description: "re-implement for paper: SqueezeNext: Hardware-Aware Neural Network Design. (SqueezeNext)" - } - repositories: { - url: "https://github.com/Timen/squeezenext-tensorflow" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 54 - description: "A tensorflow implementation of squeezenext. (includes link to trained model)" - } - repositories: { - url: "https://github.com/x5675602/SqeezeNet" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/amirgholami/SqueezeNext" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 105 - } - methods: { - name: "Spatially Separable Convolution" - full_name: "Spatially Separable Convolution" - description: "A **Spatially Separable Convolution** decomposes a convolution into two separate operations. In regular convolution, if we have a 3 x 3 kernel then we directly convolve this with the image. We can divide a 3 x 3 kernel into a 3 x 1 kernel and a 1 x 3 kernel. Then, in spatially separable convolution, we first convolve the 3 x 1 kernel then the 1 x 3 kernel. This requires 6 instead of 9 parameters compared to regular convolution, and so it is more parameter efficient (additionally less matrix multiplications are required).\r\n\r\nImage Source: [Kunlun Bai](https://towardsdatascience.com/a-comprehensive-introduction-to-different-types-of-convolutions-in-deep-learning-669281e58215)" - } - methods: { - name: "Dropout" - full_name: "Dropout" - description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." - } - methods: { - name: "Weight Decay" - full_name: "Weight Decay" - description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" - } - methods: { - name: "Fire Module" - full_name: "Fire Module" - description: "A **Fire Module** is a building block for convolutional neural networks, notably used as part of [SqueezeNet](https://paperswithcode.com/method/squeezenet). A Fire module is comprised of: a squeeze convolution layer (which has only 1x1 filters), feeding into an expand layer that has a mix of 1x1 and 3x3 convolution filters. We expose three tunable dimensions (hyperparameters) in a Fire module: $s\\_{1x1}$, $e\\_{1x1}$, and $e\\_{3x3}$. In a Fire module, $s\\_{1x1}$ is the number of filters in the squeeze layer (all 1x1), $e\\_{1x1}$ is the number of 1x1 filters in the expand layer, and $e\\_{3x3}$ is the number of 3x3 filters in the expand layer. When we use Fire modules we set $s\\_{1x1}$ to be less than ($e\\_{1x1}$ + $e\\_{3x3}$), so the squeeze layer helps to limit the number of input channels to the 3x3 filters." - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - methods: { - name: "SqueezeNet" - full_name: "SqueezeNet" - description: "**SqueezeNet** is a convolutional neural network that employs design strategies to reduce the number of parameters, notably with the use of fire modules that \"squeeze\" parameters using 1x1 convolutions." - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." - } - methods: { - name: "Xavier Initialization" - full_name: "Xavier Initialization" - description: "**Xavier Initialization**, or **Glorot Initialization**, is an initialization scheme for neural networks. Biases are initialized be 0 and the weights $W\\_{ij}$ at each layer are initialized as:\r\n\r\n$$ W\\_{ij} \\sim U\\left[-\\frac{1}{\\sqrt{n}}, \\frac{1}{\\sqrt{n}}\\right] $$\r\n\r\nWhere $U$ is a uniform distribution and $n$ is the size of the previous layer (number of columns in $W$)." - } - } - video: { - video_id: "WReWeADJ3Pw" - video_title: "PR-144: SqueezeNext: Hardware-Aware Neural Network Design" - number_of_likes: 33 - number_of_views: 2014 - published_date: { - seconds: 1551018415 - } - uploader: "JinWon Lee" - } - } -} -pr_id_to_video: { - key: 145 - value: { - papers: { - paper_id: "visualizing-attention-in-transformer-based" - title: "Visualizing Attention in Transformer-Based Language Representation Models" - arxiv_id: "1904.02679" - abstract: "We present an open-source tool for visualizing multi-head self-attention in\nTransformer-based language representation models. The tool extends earlier work\nby visualizing attention at three levels of granularity: the attention-head\nlevel, the model level, and the neuron level. We describe how each of these\nviews can help to interpret the model, and we demonstrate the tool on the BERT\nmodel and the OpenAI GPT-2 model. We also present three use cases for analyzing\nGPT-2: detecting model bias, identifying recurring patterns, and linking\nneurons to model behavior." - pub_date: { - seconds: 1554336000 - } - authors: "Jesse Vig" - methods: { - name: "Scaled Dot-Product Attention" - full_name: "Scaled Dot-Product Attention" - description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "Weight Decay" - full_name: "Weight Decay" - description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" - } - methods: { - name: "Linear Warmup With Cosine Annealing" - full_name: "Linear Warmup With Cosine Annealing" - description: "**Linear Warmup With Cosine Annealing** is a learning rate schedule where we increase the learning rate linearly for $n$ updates and then anneal according to a cosine schedule afterwards." - } - methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." - } - methods: { - name: "Dropout" - full_name: "Dropout" - description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." - } - methods: { - name: "BPE" - full_name: "Byte Pair Encoding" - description: "**Byte Pair Encoding**, or **BPE**, is a subword segmentation algorithm that encodes rare and unknown words as sequences of subword units. The intuition is that various word classes are translatable via smaller units than words, for instance names (via character copying or transliteration), compounds (via compositional translation), and cognates and loanwords (via phonological and morphological transformations).\r\n\r\n[Lei Mao](https://leimao.github.io/blog/Byte-Pair-Encoding/) has a detailed blog post that explains how this works." - } - methods: { - name: "Discriminative Fine-Tuning" - full_name: "Discriminative Fine-Tuning" - description: "**Discriminative Fine-Tuning** is a fine-tuning strategy that is used for ULMFiT type models. Instead of using the same learning rate for all layers of the model, discriminative fine-tuning allows us to tune each layer with different learning rates. For context, the regular stochastic gradient descent (SGD) update of a model’s parameters $\\theta$ at time step $t$ looks like the following (Ruder, 2016):\r\n\r\n$$ \\theta\\_{t} = \\theta\\_{t-1} − \\eta\\cdot\\nabla\\_{\\theta}J\\left(\\theta\\right)$$\r\n\r\nwhere $\\eta$ is the learning rate and $\\nabla\\_{\\theta}J\\left(\\theta\\right)$ is the gradient with regard to the model’s objective function. For discriminative fine-tuning, we split the parameters $\\theta$ into {$\\theta\\_{1}, \\ldots, \\theta\\_{L}$} where $\\theta\\_{l}$ contains the parameters of the model at the $l$-th layer and $L$ is the number of layers of the model. Similarly, we obtain {$\\eta\\_{1}, \\ldots, \\eta\\_{L}$} where $\\theta\\_{l}$ where $\\eta\\_{l}$ is the learning rate of the $l$-th layer. The SGD update with discriminative finetuning is then:\r\n\r\n$$ \\theta\\_{t}^{l} = \\theta\\_{t-1}^{l} - \\eta^{l}\\cdot\\nabla\\_{\\theta^{l}}J\\left(\\theta\\right) $$\r\n\r\nThe authors find that empirically it worked well to first choose the learning rate $\\eta^{L}$ of the last layer by fine-tuning only the last layer and using $\\eta^{l-1}=\\eta^{l}/2.6$ as the learning rate for lower layers." - } - methods: { - name: "Attention Dropout" - full_name: "Attention Dropout" - description: "**Attention Dropout** is a type of dropout used in attention-based architectures, where elements are randomly dropped out of the softmax in the attention equation. For example, for scaled-dot product attention, we would drop elements from the first term:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}\\left(\\frac{QK^{T}}{\\sqrt{d_k}}\\right)V $$" - } - methods: { - name: "GELU" - full_name: "Gaussian Error Linear Units" - description: "The **Gaussian Error Linear Unit**, or **GELU**, is an activation function. The GELU activation function is $x\\Phi(x)$, where $\\Phi(x)$ the standard Gaussian cumulative distribution function. The GELU nonlinearity weights inputs by their percentile, rather than gates inputs by their sign as in [ReLUs](https://paperswithcode.com/method/relu) ($x\\mathbf{1}_{x>0}$). Consequently the GELU can be thought of as a smoother ReLU.\r\n\r\n$$\\text{GELU}\\left(x\\right) = x{P}\\left(X\\leq{x}\\right) = x\\Phi\\left(x\\right) = x \\cdot \\frac{1}{2}\\left[1 + \\text{erf}(x/\\sqrt{2})\\right],$$\r\nif $X\\sim \\mathcal{N}(0,1)$.\r\n\r\nOne can approximate the GELU with\r\n$0.5x\\left(1+\\tanh\\left[\\sqrt{2/\\pi}\\left(x + 0.044715x^{3}\\right)\\right]\\right)$ or $x\\sigma\\left(1.702x\\right),$\r\nbut PyTorch's exact implementation is sufficiently fast such that these approximations may be unnecessary. (See also the [SiLU](https://paperswithcode.com/method/silu) $x\\sigma(x)$ which was also coined in the paper that introduced the GELU.)\r\n\r\nGELUs are used in GPT-3, BERT, and most other Transformers." - } - } - video: { - video_id: "f5zULULWUwM" - video_title: "PR-145: Language Models are Unsupervised Multitask Learners (OpenAI GPT-2)" - number_of_likes: 13 - number_of_views: 965 - published_date: { - seconds: 1552226192 - } - uploader: "Young Seok Kim" - } - } -} -pr_id_to_video: { - key: 146 - value: { - papers: { - paper_id: "cornernet-detecting-objects-as-paired" - title: "CornerNet: Detecting Objects as Paired Keypoints" - arxiv_id: "1808.01244" - abstract: "We propose CornerNet, a new approach to object detection where we detect an\nobject bounding box as a pair of keypoints, the top-left corner and the\nbottom-right corner, using a single convolution neural network. By detecting\nobjects as paired keypoints, we eliminate the need for designing a set of\nanchor boxes commonly used in prior single-stage detectors. In addition to our\nnovel formulation, we introduce corner pooling, a new type of pooling layer\nthat helps the network better localize corners. Experiments show that CornerNet\nachieves a 42.2% AP on MS COCO, outperforming all existing one-stage detectors." - pub_date: { - seconds: 1533254400 - } - authors: "Hei Law" - authors: "Jia Deng" - repositories: { - url: "https://github.com/open-mmlab/mmdetection" - framework: FRAMEWORK_PYTORCH - number_of_stars: 15362 - description: "OpenMMLab Detection Toolbox and Benchmark" - } - repositories: { - url: "https://github.com/egeonat/MS-CornerNet" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "An extension of the CornerNet architecture for RGB+T image inputs" - } - repositories: { - is_official: true - url: "https://github.com/princeton-vl/CornerNet" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2214 - } - methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." - } - methods: { - name: "Random Horizontal Flip" - full_name: "Random Horizontal Flip" - description: "**RandomHorizontalFlip** is a type of image data augmentation which horizontally flips a given image with a given probability.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - methods: { - name: "Random Resized Crop" - full_name: "Random Resized Crop" - description: "**RandomResizedCrop** is a type of image data augmentation where a crop of random size of the original size and a random aspect ratio of the original aspect ratio is made. This crop is finally resized to given size.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" - } - methods: { - name: "CornerNet" - full_name: "CornerNet" - description: "**CornerNet** is an object detection model that detects an object bounding box as a pair of keypoints, the top-left corner and the bottom-right corner, using a single convolution neural network. By detecting objects as paired keypoints, we eliminate the need for designing a set of anchor boxes commonly used in prior single-stage detectors. It also utilises corner pooling, a new type of pooling layer than helps the network better localize corners." - } - methods: { - name: "Non Maximum Suppression" - full_name: "Non Maximum Suppression" - description: "**Non Maximum Suppression** is a computer vision method that selects a single entity out of many overlapping entities (for example bounding boxes in object detection). The criteria is usually discarding entities that are below a given probability bound. With remaining entities we repeatedly pick the entity with the highest probability, output that as the prediction, and discard any remaining box where a $\\text{IoU} \\geq 0.5$ with the box output in the previous step.\r\n\r\nImage Credit: [Martin Kersner](https://github.com/martinkersner/non-maximum-suppression-cpp)" - } - methods: { - name: "ColorJitter" - full_name: "Color Jitter" - description: "**ColorJitter** is a type of image data augmentation where we randomly change the brightness, contrast and saturation of an image.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" - } - methods: { - name: "Hourglass Module" - full_name: "Hourglass Module" - description: "An **Hourglass Module** is an image block module used mainly for pose estimation tasks. The design of the hourglass is motivated by the need to capture information at every scale. While local evidence is essential for identifying features like faces and hands, a final pose estimate requires a coherent understanding of the full body. The person’s orientation, the arrangement of their limbs, and the relationships of adjacent joints are among the many cues that are best recognized at different scales in the image. The hourglass is a simple, minimal design that has the capacity to capture all of these features and bring them together to output pixel-wise predictions.\r\n\r\nThe network must have some mechanism to effectively process and consolidate features across scales. The Hourglass uses a single pipeline with skip layers to preserve spatial information at each resolution. The network reaches its lowest resolution at 4x4 pixels allowing smaller spatial filters to be applied that compare features across the entire space of the image.\r\n\r\nThe hourglass is set up as follows: Convolutional and max pooling layers are used to process features down to a very low resolution. At each max pooling step, the network branches off and applies more convolutions at the original pre-pooled resolution. After reaching the lowest resolution, the network begins the top-down sequence of upsampling and combination of features across scales. To bring together information across two adjacent resolutions, we do nearest neighbor upsampling of the lower resolution followed by an elementwise addition of the two sets of features. The topology of the hourglass is symmetric, so for every layer present on the way down there is a corresponding layer going up.\r\n\r\nAfter reaching the output resolution of the network, two consecutive rounds of 1x1 convolutions are applied to produce the final network predictions. The output of the network is a set of heatmaps where for a given heatmap the network predicts the probability of a joint’s presence at each and every pixel." - } - methods: { - name: "Stacked Hourglass Network" - full_name: "Stacked Hourglass Network" - description: "**Stacked Hourglass Networks** are a type of convolutional neural network for pose estimation. They are based on the successive steps of pooling and upsampling that are done to produce a final set of predictions." - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - } - video: { - video_id: "6OYmOtivQY8" - video_title: "PR-146: CornerNet: Detecting Objects as Paired Keypoints" - number_of_likes: 24 - number_of_views: 1871 - published_date: { - seconds: 1570081370 - } - uploader: "visionNoob" - } - } -} -pr_id_to_video: { - key: 147 - value: { - papers: { - paper_id: "learning-deep-structure-preserving-image-text" - title: "Learning Deep Structure-Preserving Image-Text Embeddings" - arxiv_id: "1511.06078" - abstract: "This paper proposes a method for learning joint embeddings of images and text\nusing a two-branch neural network with multiple layers of linear projections\nfollowed by nonlinearities. The network is trained using a large margin\nobjective that combines cross-view ranking constraints with within-view\nneighborhood structure preservation constraints inspired by metric learning\nliterature. Extensive experiments show that our approach gains significant\nimprovements in accuracy for image-to-text and text-to-image retrieval. Our\nmethod achieves new state-of-the-art results on the Flickr30K and MSCOCO\nimage-sentence datasets and shows promise on the new task of phrase\nlocalization on the Flickr30K Entities dataset." - pub_date: { - seconds: 1447891200 - } - authors: "Liwei Wang" - authors: "Yin Li" - authors: "Svetlana Lazebnik" - } - video: { - video_id: "7lyxexSjshc" - video_title: "PR-147: Learning Deep Structure-Preserving Image-Text Embeddings" - number_of_likes: 20 - number_of_views: 576 - published_date: { - seconds: 1552667121 - } - uploader: "Soyeon Kim" - } - } -} -pr_id_to_video: { - key: 148 - value: { - papers: { - paper_id: "deep-anomaly-detection-using-geometric" - title: "Deep Anomaly Detection Using Geometric Transformations" - arxiv_id: "1805.10917" - abstract: "We consider the problem of anomaly detection in images, and present a new\ndetection technique. Given a sample of images, all known to belong to a\n\"normal\" class (e.g., dogs), we show how to train a deep neural model that can\ndetect out-of-distribution images (i.e., non-dog objects). The main idea behind\nour scheme is to train a multi-class model to discriminate between dozens of\ngeometric transformations applied on all the given images. The auxiliary\nexpertise learned by the model generates feature detectors that effectively\nidentify, at test time, anomalous images based on the softmax activation\nstatistics of the model when applied on transformed images. We present\nextensive experiments using the proposed detector, which indicate that our\nalgorithm improves state-of-the-art methods by a wide margin." - pub_date: { - seconds: 1527465600 - } - authors: "Izhak Golan" - authors: "Ran El-Yaniv" - repositories: { - url: "https://github.com/ninatu/anomaly_detection" - framework: FRAMEWORK_PYTORCH - number_of_stars: 12 - description: "This is the official implementation of \"Anomaly Detection with Deep Perceptual Autoencoders\". " - } - repositories: { - is_official: true - url: "https://github.com/izikgo/AnomalyDetectionTransformations" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 129 - description: "A simple and effective method for single-class classification of images" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - } - video: { - video_id: "TgJuUxtLO3s" - video_title: "PR-148 deep anomaly detection using geometric transformations" - number_of_likes: 25 - number_of_views: 1628 - published_date: { - seconds: 1552831505 - } - uploader: "MinGuk Kang" - } - } -} -pr_id_to_video: { - key: 149 - value: { - papers: { - paper_id: "perceptual-losses-for-real-time-style" - title: "Perceptual Losses for Real-Time Style Transfer and Super-Resolution" - arxiv_id: "1603.08155" - abstract: "We consider image transformation problems, where an input image is\ntransformed into an output image. Recent methods for such problems typically\ntrain feed-forward convolutional neural networks using a \\emph{per-pixel} loss\nbetween the output and ground-truth images. Parallel work has shown that\nhigh-quality images can be generated by defining and optimizing\n\\emph{perceptual} loss functions based on high-level features extracted from\npretrained networks. We combine the benefits of both approaches, and propose\nthe use of perceptual loss functions for training feed-forward networks for\nimage transformation tasks. We show results on image style transfer, where a\nfeed-forward network is trained to solve the optimization problem proposed by\nGatys et al in real-time. Compared to the optimization-based method, our\nnetwork gives similar qualitative results but is three orders of magnitude\nfaster. We also experiment with single-image super-resolution, where replacing\na per-pixel loss with a perceptual loss gives visually pleasing results." - pub_date: { - seconds: 1459036800 - } - authors: "Justin Johnson" - authors: "Alexandre Alahi" - authors: "Li Fei-Fei" - repositories: { - url: "https://github.com/Josien94/MLiP" - framework: FRAMEWORK_TENSORFLOW - description: "This repository contains code and supplementary material for participated Kaggle Challenges." - } - repositories: { - url: "https://github.com/Arthur-ZHAO-001/Fast-style-transfer" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/milmor/perceptual-losses-neural-style" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5 - description: "Perceptual Losses for Real-Time Style Transfer and Super-Resolution Tensorflow 2 implementation" - } - repositories: { - url: "https://github.com/vijishmadhavan/SkinDeep" - framework: FRAMEWORK_PYTORCH - number_of_stars: 703 - description: "Get Deinked!!" - } - repositories: { - url: "https://github.com/back8/github_vijishmadhavan_ArtLine" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/kynk94/TF2-Image-Generation" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 9 - description: "Tensorflow2 reimplementation of image generation model (GAN, Style Transfer, Image to Image Translation, etc)" - } - repositories: { - url: "https://github.com/rrrepsac/tb_vc" - framework: FRAMEWORK_PYTORCH - description: "telebot" - } - repositories: { - url: "https://github.com/vijishmadhavan/Toon-Me" - framework: FRAMEWORK_PYTORCH - number_of_stars: 316 - description: "A Deep Learning project to Toon Portrait Images" - } - repositories: { - url: "https://github.com/WalterJohnson0/tf-keras-implementation-of-Image-Style-transformation-network" - framework: FRAMEWORK_TENSORFLOW - description: "Computer Vision Final Project- implementation of Neural style transfer" - } - repositories: { - url: "https://github.com/samsh19/ML_project" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - } - } - video: { - video_id: "OKDaGzeUz4U" - video_title: "PR-149: Perceptual Losses for Real-Time Style Transfer and Super-Resolution" - number_of_views: 2116 - published_date: { - seconds: 1552832996 - } - uploader: "Byung-Hak Kim" - } - } -} -pr_id_to_video: { - key: 150 - value: { - papers: { - paper_id: "imagenet-trained-cnns-are-biased-towards" - title: "ImageNet-trained CNNs are biased towards texture; increasing shape bias improves accuracy and robustness" - arxiv_id: "1811.12231" - abstract: "Convolutional Neural Networks (CNNs) are commonly thought to recognise\nobjects by learning increasingly complex representations of object shapes. Some\nrecent studies suggest a more important role of image textures. We here put\nthese conflicting hypotheses to a quantitative test by evaluating CNNs and\nhuman observers on images with a texture-shape cue conflict. We show that\nImageNet-trained CNNs are strongly biased towards recognising textures rather\nthan shapes, which is in stark contrast to human behavioural evidence and\nreveals fundamentally different classification strategies. We then demonstrate\nthat the same standard architecture (ResNet-50) that learns a texture-based\nrepresentation on ImageNet is able to learn a shape-based representation\ninstead when trained on \"Stylized-ImageNet\", a stylized version of ImageNet.\nThis provides a much better fit for human behavioural performance in our\nwell-controlled psychophysical lab setting (nine experiments totalling 48,560\npsychophysical trials across 97 observers) and comes with a number of\nunexpected emergent benefits such as improved object detection performance and\npreviously unseen robustness towards a wide range of image distortions,\nhighlighting advantages of a shape-based representation." - pub_date: { - seconds: 1543449600 - } - authors: "Robert Geirhos" - authors: "Patricia Rubisch" - authors: "Claudio Michaelis" - authors: "Matthias Bethge" - authors: "Felix A. Wichmann" - authors: "Wieland Brendel" - repositories: { - url: "https://github.com/facebookresearch/augmentation-corruption" - framework: FRAMEWORK_PYTORCH - number_of_stars: 9 - description: "This repository provides code for \"On Interaction Between Augmentations and Corruptions in Natural Corruption Robustness\"." - } - repositories: { - url: "https://github.com/annstrange/breast-cancer-cnn" - framework: FRAMEWORK_TENSORFLOW - description: "Breast Cancer biopsy image analysis using CNN" - } - repositories: { - url: "https://github.com/LiYingwei/ShapeTextureDebiasedTraining" - framework: FRAMEWORK_PYTORCH - number_of_stars: 71 - description: "Code and models for the paper Shape-Texture Debiased Neural Network Training (ICLR 2021)" - } - repositories: { - is_official: true - url: "https://github.com/rgeirhos/texture-vs-shape" - framework: FRAMEWORK_PYTORCH - number_of_stars: 594 - description: "Pre-trained models, data, code & materials from the paper \"ImageNet-trained CNNs are biased towards texture; increasing shape bias improves accuracy and robustness\" (ICLR 2019 Oral)" - } - repositories: { - url: "https://github.com/mbuet2ner/local-global-features-cnn" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Code for my Master's Thesis: \"The Role of Local Versus Global Features in Convolutional Neural Networks\"" - } - methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Residual Block" - full_name: "Residual Block" - description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." - } - methods: { - name: "Max Pooling" - full_name: "Max Pooling" - description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - methods: { - name: "Kaiming Initialization" - full_name: "Kaiming Initialization" - description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." - } - methods: { - name: "Batch Normalization" - full_name: "Batch Normalization" - description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." - } - methods: { - name: "Bottleneck Residual Block" - full_name: "Bottleneck Residual Block" - description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." - } - methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." - } - } - video: { - video_id: "oBapZTL8LsE" - video_title: "PR-150: ImageNet-trained CNNs are Biased Towards Textures" - number_of_likes: 17 - number_of_views: 1110 - published_date: { - seconds: 1553435404 - } - uploader: "Taeoh Kim" - } - } -} -pr_id_to_video: { - key: 151 - value: { - papers: { - paper_id: "the-unreasonable-effectiveness-of-deep" - title: "The Unreasonable Effectiveness of Deep Features as a Perceptual Metric" - arxiv_id: "1801.03924" - abstract: "While it is nearly effortless for humans to quickly assess the perceptual\nsimilarity between two images, the underlying processes are thought to be quite\ncomplex. Despite this, the most widely used perceptual metrics today, such as\nPSNR and SSIM, are simple, shallow functions, and fail to account for many\nnuances of human perception. Recently, the deep learning community has found\nthat features of the VGG network trained on ImageNet classification has been\nremarkably useful as a training loss for image synthesis. But how perceptual\nare these so-called \"perceptual losses\"? What elements are critical for their\nsuccess? To answer these questions, we introduce a new dataset of human\nperceptual similarity judgments. We systematically evaluate deep features\nacross different architectures and tasks and compare them with classic metrics.\nWe find that deep features outperform all previous metrics by large margins on\nour dataset. More surprisingly, this result is not restricted to\nImageNet-trained VGG features, but holds across different deep architectures\nand levels of supervision (supervised, self-supervised, or even unsupervised).\nOur results suggest that perceptual similarity is an emergent property shared\nacross deep visual representations." - pub_date: { - seconds: 1515628800 - } - authors: "Richard Zhang" - authors: "Phillip Isola" - authors: "Alexei A. Efros" - authors: "Eli Shechtman" - authors: "Oliver Wang" - repositories: { - url: "https://github.com/tding1/CDFI" - framework: FRAMEWORK_PYTORCH - number_of_stars: 53 - description: "Code of paper \"CDFI: Compression-Driven Network Design for Frame Interpolation\", CVPR 2021" - } - repositories: { - url: "https://github.com/RudreshVeerkhare/StyleGan" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - } - repositories: { - url: "https://github.com/cassava-math-ubb/experiments" - framework: FRAMEWORK_TENSORFLOW - description: "This repo contains our experimental approaches. " - } - repositories: { - url: "https://github.com/ak9250/stylegan-art" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 342 - description: "train stylegan through transfer learning" - } - repositories: { - url: "https://github.com/ayushgupta9198/stylegan" - framework: FRAMEWORK_TENSORFLOW - description: "The model is based for fake person creation based on stylegan technique. I have trained the model on my data set and generates the result basis of grids and seeds" - } - repositories: { - url: "https://github.com/isaacschaal/SG_training" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/stefkim/stylegan-batik" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/MrWednes/CopyNVlab" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/khurram702/StyleBasedGAN" - framework: FRAMEWORK_TENSORFLOW - description: "Style Base Architecture of Generator" - } - repositories: { - url: "https://github.com/ayushgupta9198/gan" - framework: FRAMEWORK_TENSORFLOW - description: "The model is based for fake person creation based on stylegan technique. I have trained the model on my data set and generates the result basis of grids and seeds" - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "Dropout" - full_name: "Dropout" - description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "VGG" - full_name: "VGG" - description: "**VGG** is a classical convolutional neural network architecture. It was based on an analysis of how to increase the depth of such networks. The network utilises small 3 x 3 filters. Otherwise the network is characterized by its simplicity: the only other components being pooling layers and a fully connected layer.\r\n\r\nImage: [Davi Frossard](https://www.cs.toronto.edu/frossard/post/vgg16/)" - } - methods: { - name: "Max Pooling" - full_name: "Max Pooling" - description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" - } - } - video: { - video_id: "VDeJFb5jt5M" - video_title: "PR-151: The Unreasonable Effectiveness of Deep Features as a Perceptual Metric" - number_of_likes: 6 - number_of_views: 715 - published_date: { - seconds: 1553438571 - } - uploader: "taesu" - } - } -} -pr_id_to_video: { - key: 152 - value: { - papers: { - paper_id: "stargan-unified-generative-adversarial" - title: "StarGAN: Unified Generative Adversarial Networks for Multi-Domain Image-to-Image Translation" - arxiv_id: "1711.09020" - abstract: "Recent studies have shown remarkable success in image-to-image translation\nfor two domains. However, existing approaches have limited scalability and\nrobustness in handling more than two domains, since different models should be\nbuilt independently for every pair of image domains. To address this\nlimitation, we propose StarGAN, a novel and scalable approach that can perform\nimage-to-image translations for multiple domains using only a single model.\nSuch a unified model architecture of StarGAN allows simultaneous training of\nmultiple datasets with different domains within a single network. This leads to\nStarGAN's superior quality of translated images compared to existing models as\nwell as the novel capability of flexibly translating an input image to any\ndesired target domain. We empirically demonstrate the effectiveness of our\napproach on a facial attribute transfer and a facial expression synthesis\ntasks." - pub_date: { - seconds: 1511481600 - } - authors: "Yunjey Choi" - authors: "Minje Choi" - authors: "Munyoung Kim" - authors: "Jung-Woo Ha" - authors: "Sunghun Kim" - authors: "Jaegul Choo" - repositories: { - url: "https://github.com/Kal213/StarGAN-Tutorial-Tensorflow-2.3" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 6 - description: "Intuitive StarGAN Code written in Tensorflow 2.3" - } - repositories: { - url: "https://github.com/MACderRu/StarGan_pytorch" - framework: FRAMEWORK_PYTORCH - description: "My implementation of StarGan paper" - } - repositories: { - url: "https://github.com/Masao-Taketani/StarGAN-tf2" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "TensorFlow 2 Implementation of \"StarGAN: Unified Generative Adversarial Networks for Multi-Domain Image-to-Image Translation\"." - } - repositories: { - url: "https://github.com/hello-world-cc/starGANv1-Pytorch" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/shaominghe/stargan_adience" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/stevebong31/stargan" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/nguyen-nhat-anh/Star-GAN" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/shridhivyah/starGAN" - framework: FRAMEWORK_TENSORFLOW - description: "FaceAttributeChange_StarGAN" - } - repositories: { - url: "https://github.com/aditiasthana1004/StarGAN" - framework: FRAMEWORK_OTHERS - description: "StarGAN" - } - repositories: { - url: "https://github.com/MasterXiYu/stargan_mul" - framework: FRAMEWORK_PYTORCH - description: "face_for_stargan" - } - } - video: { - video_id: "i3-rTEFpyv0" - video_title: "PR-152:StarGAN: Unified Generative Adversarial Networks for Multi-Domain Image-to-Image Translation" - number_of_likes: 21 - number_of_views: 1591 - published_date: { - seconds: 1554040628 - } - uploader: "Jinsung Yoon" - } - } -} -pr_id_to_video: { - key: 153 - value: { - papers: { - paper_id: "a-simple-neural-attentive-meta-learner" - title: "A Simple Neural Attentive Meta-Learner" - arxiv_id: "1707.03141" - abstract: "Deep neural networks excel in regimes with large amounts of data, but tend to\nstruggle when data is scarce or when they need to adapt quickly to changes in\nthe task. In response, recent work in meta-learning proposes training a\nmeta-learner on a distribution of similar tasks, in the hopes of generalization\nto novel but related tasks by learning a high-level strategy that captures the\nessence of the problem it is asked to solve. However, many recent meta-learning\napproaches are extensively hand-designed, either using architectures\nspecialized to a particular application, or hard-coding algorithmic components\nthat constrain how the meta-learner solves the task. We propose a class of\nsimple and generic meta-learner architectures that use a novel combination of\ntemporal convolutions and soft attention; the former to aggregate information\nfrom past experience and the latter to pinpoint specific pieces of information.\nIn the most extensive set of meta-learning experiments to date, we evaluate the\nresulting Simple Neural AttentIve Learner (or SNAIL) on several\nheavily-benchmarked tasks. On all tasks, in both supervised and reinforcement\nlearning, SNAIL attains state-of-the-art performance by significant margins." - pub_date: { - seconds: 1499731200 - } - authors: "Nikhil Mishra" - authors: "Mostafa Rohaninejad" - authors: "Xi Chen" - authors: "Pieter Abbeel" - repositories: { - url: "https://github.com/seujung/SNAIL-gluon" - framework: FRAMEWORK_OTHERS - number_of_stars: 10 - description: "Implementation of SNAIL(A Simple Neural Attentive Meta-Learner) with Gluon" - } - repositories: { - url: "https://github.com/eambutu/snail-pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 117 - description: "Implementation of \"A Simple Neural Attentive Meta-Learner\" (SNAIL, https://arxiv.org/pdf/1707.03141.pdf) in PyTorch" - } - repositories: { - url: "https://github.com/Michedev/snail" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "Pytorch implementation of SNAIL (Simple Neural Attentive Meta-Learner)" - } - methods: { - name: "Scaled Dot-Product Attention" - full_name: "Scaled Dot-Product Attention" - description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." - } - methods: { - name: "Dilated Causal Convolution" - full_name: "Dilated Causal Convolution" - description: "A **Dilated Causal Convolution** is a causal convolution where the filter is applied over an area larger than its length by skipping input values with a certain step. A dilated causal convolution effectively allows the network to have very large receptive fields with just a few layers." - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "SNAIL" - full_name: "Simple Neural Attention Meta-Learner" - description: "The **Simple Neural Attention Meta-Learner**, or **SNAIL**, combines the benefits of temporal convolutions and attention to solve meta-learning tasks. They introduce positional dependence through temporal convolutions to make the model applicable to reinforcement tasks - where the observations, actions, and rewards are intrinsically sequential. They also introduce attention in order to provide pinpoint access over an infinitely large context. SNAIL is constructing by combining the two: we use temporal convolutions to produce the context over which we use a causal attention operation." - } - } - video: { - video_id: "zGrwpa5-_0Y" - video_title: "PR-153: SNAIL: A Simple Neural Attentive Meta-Learner" - number_of_likes: 11 - number_of_views: 921 - published_date: { - seconds: 1554043097 - } - uploader: "Taekmin Kim" - } - } -} -pr_id_to_video: { - key: 154 - value: { - papers: { - paper_id: "semantic-image-synthesis-with-spatially" - title: "Semantic Image Synthesis with Spatially-Adaptive Normalization" - arxiv_id: "1903.07291" - abstract: "We propose spatially-adaptive normalization, a simple but effective layer for synthesizing photorealistic images given an input semantic layout. Previous methods directly feed the semantic layout as input to the deep network, which is then processed through stacks of convolution, normalization, and nonlinearity layers. We show that this is suboptimal as the normalization layers tend to ``wash away'' semantic information. To address the issue, we propose using the input layout for modulating the activations in normalization layers through a spatially-adaptive, learned transformation. Experiments on several challenging datasets demonstrate the advantage of the proposed method over existing approaches, regarding both visual fidelity and alignment with input layouts. Finally, our model allows user control over both semantic and style. Code is available at https://github.com/NVlabs/SPADE ." - pub_date: { - seconds: 1552867200 - } - authors: "Taesung Park" - authors: "Ming-Yu Liu" - authors: "Ting-Chun Wang" - authors: "Jun-Yan Zhu" - repositories: { - url: "https://github.com/KushajveerSingh/SPADE-PyTorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 22 - description: "PyTorch unofficial implementation of Semantic Image Synthesis with Spatially-Adaptive Normalization paper by Nvidia Research" - } - repositories: { - url: "https://github.com/PacktPublishing/Hands-On-Image-Generation-with-TensorFlow-2.0/tree/master/Chapter06" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 62 - description: "Hands-On Image Generation with TensorFlow 2.0, published by Packt" - } - repositories: { - url: "https://github.com/GrahamRigby/GauGanPlus" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/AhmedAmraniAkdi/BudgetNvidiaGaugan" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/LoganOneal/neuralpaint-server" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/noyoshi/hacksc" - framework: FRAMEWORK_PYTORCH - number_of_stars: 195 - description: "🖌 photorealistic drawings from simple sketches using NVIDIA's GauGAN " - } - repositories: { - url: "https://github.com/noyoshi/smart-sketch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 195 - description: "🖌 photorealistic drawings from simple sketches using NVIDIA's GauGAN " - } - repositories: { - url: "https://github.com/taki0112/SPADE-Tensorflow" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 312 - description: "Simple Tensorflow implementation of \"Semantic Image Synthesis with Spatially-Adaptive Normalization\" a.k.a. GauGAN, SPADE (CVPR 2019 Oral)" - } - repositories: { - url: "https://github.com/Dominioncher/smart-sketch" - framework: FRAMEWORK_PYTORCH - description: "NVidia netural network for sketches" - } - repositories: { - url: "https://github.com/manicman1999/StyleGAN-Keras" - framework: FRAMEWORK_OTHERS - number_of_stars: 160 - description: "StyleGAN made with Keras" - } - } - video: { - video_id: "1nJf35TSYtE" - video_title: "PR-154: Semantic Image Synthesis with Spatially-Adaptive Normalization" - number_of_likes: 19 - number_of_views: 1467 - published_date: { - seconds: 1554651283 - } - uploader: "정지원" - } - } -} -pr_id_to_video: { - key: 155 - value: { - papers: { - paper_id: "exploring-randomly-wired-neural-networks-for" - title: "Exploring Randomly Wired Neural Networks for Image Recognition" - arxiv_id: "1904.01569" - abstract: "Neural networks for image recognition have evolved through extensive manual\ndesign from simple chain-like models to structures with multiple wiring paths.\nThe success of ResNets and DenseNets is due in large part to their innovative\nwiring plans. Now, neural architecture search (NAS) studies are exploring the\njoint optimization of wiring and operation types, however, the space of\npossible wirings is constrained and still driven by manual design despite being\nsearched. In this paper, we explore a more diverse set of connectivity patterns\nthrough the lens of randomly wired neural networks. To do this, we first define\nthe concept of a stochastic network generator that encapsulates the entire\nnetwork generation process. Encapsulation provides a unified view of NAS and\nrandomly wired networks. Then, we use three classical random graph models to\ngenerate randomly wired graphs for networks. The results are surprising:\nseveral variants of these random generators yield network instances that have\ncompetitive accuracy on the ImageNet benchmark. These results suggest that new\nefforts focusing on designing better network generators may lead to new\nbreakthroughs by exploring less constrained search spaces with more room for\nnovel design." - pub_date: { - seconds: 1554163200 - } - authors: "Saining Xie" - authors: "Alexander Kirillov" - authors: "Ross Girshick" - authors: "Kaiming He" - repositories: { - url: "https://github.com/JihaoLee/Randomly_Wired_reproducibility" - framework: FRAMEWORK_PYTORCH - description: "This is a reimplementation of Exploring Randomly Wired Neural Networks for Image Recognition" - } - repositories: { - url: "https://github.com/wolszhang/randWireNN" - framework: FRAMEWORK_OTHERS - description: "compare different randomly wired neural network" - } - repositories: { - url: "https://github.com/swdsld/RandWire_tensorflow" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 29 - description: "tensorflow implementation of Exploring Randomly Wired Neural Networks for Image Recognition" - } - repositories: { - url: "https://github.com/leaderj1001/RandWireNN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 77 - description: "Implementing Randomly Wired Neural Networks for Image Recognition, Using CIFAR-10 dataset, CIFAR-100 dataset" - } - repositories: { - url: "https://github.com/timctho/random-wired-nn-tensorflow" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 8 - description: "Tensorflow implementation of \"Exploring Randomly Wired Neural Networks for Image Recognition\"" - } - repositories: { - url: "https://github.com/seungwonpark/RandWireNN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 676 - description: "Implementation of: \"Exploring Randomly Wired Neural Networks for Image Recognition\"" - } - repositories: { - url: "https://github.com/hebo1221/RandWireNN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 10 - description: "Unofficial Implementation of: \"Exploring Randomly Wired Neural Networks for Image Recognition\"" - } - repositories: { - url: "https://github.com/AbdouJaouhar/Exploring-Randomly-Wired-Neural-Networks-for-Image-Recognition" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Neural Architechture Search based on https://arxiv.org/abs/1904.01569" - } - repositories: { - url: "https://github.com/facebookresearch/pycls" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1596 - description: "Codebase for Image Classification Research, written in PyTorch." - } - repositories: { - url: "https://github.com/JiaminRen/RandWireNN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 270 - description: "Pytorch Implementation of: \"Exploring Randomly Wired Neural Networks for Image Recognition\"" - } - methods: { - name: "Weight Decay" - full_name: "Weight Decay" - description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - methods: { - name: "Random Resized Crop" - full_name: "Random Resized Crop" - description: "**RandomResizedCrop** is a type of image data augmentation where a crop of random size of the original size and a random aspect ratio of the original aspect ratio is made. This crop is finally resized to given size.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" - } - methods: { - name: "Batch Normalization" - full_name: "Batch Normalization" - description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." - } - methods: { - name: "SGD with Momentum" - full_name: "SGD with Momentum" - description: "**SGD with Momentum** is a stochastic optimization method that adds a momentum term to regular stochastic gradient descent:\r\n\r\n$$v\\_{t} = \\gamma{v}\\_{t-1} + \\eta\\nabla\\_{\\theta}J\\left(\\theta\\right)$$\r\n$$\\theta\\_{t} = \\theta\\_{t-1} - v\\_{t} $$\r\n\r\nA typical value for $\\gamma$ is $0.9$. The momentum name comes from an analogy to physics, such as ball accelerating down a slope. In the case of weight updates, we can think of the weights as a particle traveling through parameter space which incurs acceleration from the gradient of the loss.\r\n\r\nImage Source: [Juan Du](https://www.researchgate.net/figure/The-compare-of-the-SGD-algorithms-with-and-without-momentum-Take-Task-1-as-example-The_fig1_333469047)" - } - methods: { - name: "Average Pooling" - full_name: "Average Pooling" - description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "Cosine Annealing" - full_name: "Cosine Annealing" - description: "**Cosine Annealing** is a type of learning rate schedule that has the effect of starting with a large learning rate that is relatively rapidly decreased to a minimum value before being increased rapidly again. The resetting of the learning rate acts like a simulated restart of the learning process and the re-use of good weights as the starting point of the restart is referred to as a \"warm restart\" in contrast to a \"cold restart\" where a new set of small random numbers may be used as a starting point.\r\n\r\n$$\\eta\\_{t} = \\eta\\_{min}^{i} + \\frac{1}{2}\\left(\\eta\\_{max}^{i}-\\eta\\_{min}^{i}\\right)\\left(1+\\cos\\left(\\frac{T\\_{cur}}{T\\_{i}}\\pi\\right)\\right)\r\n$$\r\n\r\nWhere where $\\eta\\_{min}^{i}$ and $ \\eta\\_{max}^{i}$ are ranges for the learning rate, and $T\\_{cur}$ account for how many epochs have been performed since the last restart.\r\n\r\nText Source: [Jason Brownlee](https://machinelearningmastery.com/snapshot-ensemble-deep-learning-neural-network/)\r\n\r\nImage Source: [Gao Huang](https://www.researchgate.net/figure/Training-loss-of-100-layer-DenseNet-on-CIFAR10-using-standard-learning-rate-blue-and-M_fig2_315765130)" - } - methods: { - name: "Label Smoothing" - full_name: "Label Smoothing" - description: "**Label Smoothing** is a regularization technique that introduces noise for the labels. This accounts for the fact that datasets may have mistakes in them, so maximizing the likelihood of $\\log{p}\\left(y\\mid{x}\\right)$ directly can be harmful. Assume for a small constant $\\epsilon$, the training set label $y$ is correct with probability $1-\\epsilon$ and incorrect otherwise. Label Smoothing regularizes a model based on a softmax with $k$ output values by replacing the hard $0$ and $1$ classification targets with targets of $\\frac{\\epsilon}{k-1}$ and $1-\\epsilon$ respectively.\r\n\r\nSource: Deep Learning, Goodfellow et al\r\n\r\nImage Source: [When Does Label Smoothing Help?](https://arxiv.org/abs/1906.02629)" - } - } - video: { - video_id: "qnGm1h365tc" - video_title: "PR-155: Exploring Randomly Wired Neural Networks for Image Recognition" - number_of_likes: 92 - number_of_views: 4138 - published_date: { - seconds: 1554649684 - } - uploader: "JinWon Lee" - } - } -} -pr_id_to_video: { - key: 156 - value: { - papers: { - paper_id: "channelnets-compact-and-efficient" - title: "ChannelNets: Compact and Efficient Convolutional Neural Networks via Channel-Wise Convolutions" - arxiv_id: "1809.01330" - abstract: "Convolutional neural networks (CNNs) have shown great capability of solving\nvarious artificial intelligence tasks. However, the increasing model size has\nraised challenges in employing them in resource-limited applications. In this\nwork, we propose to compress deep models by using channel-wise convolutions,\nwhich re- place dense connections among feature maps with sparse ones in CNNs.\nBased on this novel operation, we build light-weight CNNs known as ChannelNets.\nChannel- Nets use three instances of channel-wise convolutions; namely group\nchannel-wise convolutions, depth-wise separable channel-wise convolutions, and\nthe convolu- tional classification layer. Compared to prior CNNs designed for\nmobile devices, ChannelNets achieve a significant reduction in terms of the\nnumber of parameters and computational cost without loss in accuracy. Notably,\nour work represents the first attempt to compress the fully-connected\nclassification layer, which usually accounts for about 25% of total parameters\nin compact CNNs. Experimental results on the ImageNet dataset demonstrate that\nChannelNets achieve consistently better performance compared to prior methods." - pub_date: { - seconds: 1536105600 - } - authors: "Hongyang Gao" - authors: "Zhengyang Wang" - authors: "Shuiwang Ji" - repositories: { - url: "https://github.com/osmr/imgclsmob" - framework: FRAMEWORK_OTHERS - number_of_stars: 2200 - description: "Sandbox for training deep learning networks" - } - repositories: { - url: "https://github.com/HongyangGao/ChannelNets" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 77 - description: "Tensorflow Implementation of ChannelNets (NeurIPS 18)" - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - } - video: { - video_id: "oZbKWOBfNhk" - video_title: "PR-156: ChannelNets: Compact and Efficient CNN via Channel-Wise Convolutions" - number_of_likes: 1 - number_of_views: 222 - published_date: { - seconds: 1565744830 - } - uploader: "Young Seok Kim" - } - } -} -pr_id_to_video: { - key: 157 - value: { - papers: { - paper_id: "fluid-annotation-a-human-machine" - title: "Fluid Annotation: A Human-Machine Collaboration Interface for Full Image Annotation" - arxiv_id: "1806.07527" - abstract: "We introduce Fluid Annotation, an intuitive human-machine collaboration\ninterface for annotating the class label and outline of every object and\nbackground region in an image. Fluid annotation is based on three principles:\n(I) Strong Machine-Learning aid. We start from the output of a strong neural\nnetwork model, which the annotator can edit by correcting the labels of\nexisting regions, adding new regions to cover missing objects, and removing\nincorrect regions. The edit operations are also assisted by the model. (II)\nFull image annotation in a single pass. As opposed to performing a series of\nsmall annotation tasks in isolation, we propose a unified interface for full\nimage annotation in a single pass. (III) Empower the annotator. We empower the\nannotator to choose what to annotate and in which order. This enables\nconcentrating on what the machine does not already know, i.e. putting human\neffort only on the errors it made. This helps using the annotation budget\neffectively. Through extensive experiments on the COCO+Stuff dataset, we\ndemonstrate that Fluid Annotation leads to accurate annotations very\nefficiently, taking three times less annotation time than the popular LabelMe\ninterface." - pub_date: { - seconds: 1529452800 - } - authors: "Mykhaylo Andriluka" - authors: "Jasper R. R. Uijlings" - authors: "Vittorio Ferrari" - } - video: { - video_id: "JbXdn44myP4" - video_title: "PR-157: Best of both worlds: human-machine collaboration for object annotation" - number_of_views: 349 - published_date: { - seconds: 1556532811 - } - uploader: "visionNoob" - } - } -} -pr_id_to_video: { - key: 158 - value: { - papers: { - paper_id: "fots-fast-oriented-text-spotting-with-a" - title: "FOTS: Fast Oriented Text Spotting with a Unified Network" - arxiv_id: "1801.01671" - abstract: "Incidental scene text spotting is considered one of the most difficult and\nvaluable challenges in the document analysis community. Most existing methods\ntreat text detection and recognition as separate tasks. In this work, we\npropose a unified end-to-end trainable Fast Oriented Text Spotting (FOTS)\nnetwork for simultaneous detection and recognition, sharing computation and\nvisual information among the two complementary tasks. Specially, RoIRotate is\nintroduced to share convolutional features between detection and recognition.\nBenefiting from convolution sharing strategy, our FOTS has little computation\noverhead compared to baseline text detection network, and the joint training\nmethod learns more generic features to make our method perform better than\nthese two-stage methods. Experiments on ICDAR 2015, ICDAR 2017 MLT, and ICDAR\n2013 datasets demonstrate that the proposed method outperforms state-of-the-art\nmethods significantly, which further allows us to develop the first real-time\noriented text spotting system which surpasses all previous state-of-the-art\nresults by more than 5% on ICDAR 2015 text spotting task while keeping 22.6\nfps." - pub_date: { - seconds: 1515110400 - } - authors: "Xuebo Liu" - authors: "Ding Liang" - authors: "Shi Yan" - authors: "Dagui Chen" - authors: "Yu Qiao" - authors: "Junjie Yan" - repositories: { - url: "https://github.com/ArashJavan/FOTS" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - } - repositories: { - url: "https://github.com/Kaushal28/FOTS-PyTorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "PyTorch Implementation of Fast Oriented Text Spotting (FOTS)" - } - repositories: { - url: "https://github.com/xieyufei1993/FOTS" - framework: FRAMEWORK_PYTORCH - number_of_stars: 171 - description: "An Implementation of the FOTS: Fast Oriented Text Spotting with a Unified Network" - } - repositories: { - url: "https://github.com/Pay20Y/FOTS_TF" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 176 - description: "This an implementation of FOTS with tensorflow" - } - repositories: { - url: "https://github.com/yu20103983/FOTS" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 88 - description: "FOTS tensorflow implementation, Include train and test, EAST+Rotate+CRNN. FOTS: Fast Oriented Text Spotting with a Unified Network" - } - repositories: { - url: "https://github.com/jiangxiluning/FOTS.PyTorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 533 - description: "FOTS Pytorch Implementation" - } - repositories: { - url: "https://github.com/Masao-Taketani/FOTS_OCR" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 41 - description: "TensorFlow Implementation of FOTS, Fast Oriented Text Spotting with a Unified Network." - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - } - video: { - video_id: "hOFViMbYnrs" - video_title: "PR-158: FOTS: Fast Oriented Text Spotting with a Unified Network" - number_of_likes: 25 - number_of_views: 990 - published_date: { - seconds: 1556529052 - } - uploader: "Soyeon Kim" - } - } -} -pr_id_to_video: { - key: 159 - value: { - papers: { - paper_id: "synergistic-image-and-feature-adaptation" - title: "Synergistic Image and Feature Adaptation: Towards Cross-Modality Domain Adaptation for Medical Image Segmentation" - arxiv_id: "1901.08211" - abstract: "This paper presents a novel unsupervised domain adaptation framework, called Synergistic Image and Feature Adaptation (SIFA), to effectively tackle the problem of domain shift. Domain adaptation has become an important and hot topic in recent studies on deep learning, aiming to recover performance degradation when applying the neural networks to new testing domains. Our proposed SIFA is an elegant learning diagram which presents synergistic fusion of adaptations from both image and feature perspectives. In particular, we simultaneously transform the appearance of images across domains and enhance domain-invariance of the extracted features towards the segmentation task. The feature encoder layers are shared by both perspectives to grasp their mutual benefits during the end-to-end learning procedure. Without using any annotation from the target domain, the learning of our unified model is guided by adversarial losses, with multiple discriminators employed from various aspects. We have extensively validated our method with a challenging application of cross-modality medical image segmentation of cardiac structures. Experimental results demonstrate that our SIFA model recovers the degraded performance from 17.2% to 73.0%, and outperforms the state-of-the-art methods by a significant margin." - pub_date: { - seconds: 1548288000 - } - authors: "Cheng Chen" - authors: "Qi Dou" - authors: "Hao Chen" - authors: "Jing Qin" - authors: "Pheng-Ann Heng" - repositories: { - is_official: true - url: "https://github.com/cchen-cc/SIFA" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 152 - } - } - video: { - video_id: "sR7hBJGpwQo" - video_title: "PR-159: SIFA: Towards Cross- Modality Domain Adaptation for Medical Image Segmentation" - number_of_likes: 10 - number_of_views: 476 - published_date: { - seconds: 1557132628 - } - uploader: "Sunghoon Joo" - } - } -} -pr_id_to_video: { - key: 160 - value: { - papers: { - paper_id: "glomo-unsupervisedly-learned-relational" - title: "GLoMo: Unsupervisedly Learned Relational Graphs as Transferable Representations" - arxiv_id: "1806.05662" - abstract: "Modern deep transfer learning approaches have mainly focused on learning\ngeneric feature vectors from one task that are transferable to other tasks,\nsuch as word embeddings in language and pretrained convolutional features in\nvision. However, these approaches usually transfer unary features and largely\nignore more structured graphical representations. This work explores the\npossibility of learning generic latent relational graphs that capture\ndependencies between pairs of data units (e.g., words or pixels) from\nlarge-scale unlabeled data and transferring the graphs to downstream tasks. Our\nproposed transfer learning framework improves performance on various tasks\nincluding question answering, natural language inference, sentiment analysis,\nand image classification. We also show that the learned graphs are generic\nenough to be transferred to different embeddings on which the graphs have not\nbeen trained (including GloVe embeddings, ELMo embeddings, and task-specific\nRNN hidden unit), or embedding-free units such as image pixels." - pub_date: { - seconds: 1528934400 - } - authors: "Zhilin Yang" - authors: "Jake Zhao" - authors: "Bhuwan Dhingra" - authors: "Kaiming He" - authors: "William W. Cohen" - authors: "Ruslan Salakhutdinov" - authors: "Yann LeCun" - repositories: { - url: "https://github.com/YJHMITWEB/GLoMo-tensorflow" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 14 - description: "This is a tensorflow implementation of 2018 NIPS paper: [GLoMo: Unsupervisedly Learned Relational Graphs as Transferable Representations.]" - } - methods: { - name: "ELMo" - full_name: "ELMo" - description: "**Embeddings from Language Models**, or **ELMo**, is a type of deep contextualized word representation that models both (1) complex characteristics of word use (e.g., syntax and semantics), and (2) how these uses vary across linguistic contexts (i.e., to model polysemy). Word vectors are learned functions of the internal states of a deep bidirectional language model (biLM), which is pre-trained on a large text corpus.\r\n\r\nA biLM combines both a forward and backward LM. ELMo jointly maximizes the log likelihood of the forward and backward directions. To add ELMo to a supervised model, we freeze the weights of the biLM and then concatenate the ELMo vector $\\textbf{ELMO}^{task}_k$ with $\\textbf{x}_k$ and pass the ELMO enhanced representation $[\\textbf{x}_k; \\textbf{ELMO}^{task}_k]$ into the task RNN. Here $\\textbf{x}_k$ is a context-independent token representation for each token position. \r\n\r\nImage Source: [here](https://medium.com/@duyanhnguyen_38925/create-a-strong-text-classification-with-the-help-from-elmo-e90809ba29da)" - } - methods: { - name: "Tanh Activation" - full_name: "Tanh Activation" - description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "GloVe" - full_name: "GloVe Embeddings" - description: "**GloVe Embeddings** are a type of word embedding that encode the co-occurrence probability ratio between two words as vector differences. GloVe uses a weighted least squares objective $J$ that minimizes the difference between the dot product of the vectors of two words and the logarithm of their number of co-occurrences:\r\n\r\n$$ J=\\sum\\_{i, j=1}^{V}f\\left(𝑋\\_{i j}\\right)(w^{T}\\_{i}\\tilde{w}_{j} + b\\_{i} + \\tilde{b}\\_{j} - \\log{𝑋}\\_{ij})^{2} $$\r\n\r\nwhere $w\\_{i}$ and $b\\_{i}$ are the word vector and bias respectively of word $i$, $\\tilde{w}_{j}$ and $b\\_{j}$ are the context word vector and bias respectively of word $k$, $X\\_{ij}$ is the number of times word $i$ occurs in the context of word $j$, and $f$ is a weighting function that assigns lower weights to rare and frequent co-occurrences." - } - methods: { - name: "BiLSTM" - full_name: "Bidirectional LSTM" - description: "A **Bidirectional LSTM**, or **biLSTM**, is a sequence processing model that consists of two LSTMs: one taking the input in a forward direction, and the other in a backwards direction. BiLSTMs effectively increase the amount of information available to the network, improving the context available to the algorithm (e.g. knowing what words immediately follow *and* precede a word in a sentence).\r\n\r\nImage Source: Modelling Radiological Language with Bidirectional Long Short-Term Memory Networks, Cornegruta et al" - } - methods: { - name: "LSTM" - full_name: "Long Short-Term Memory" - description: "An **LSTM** is a type of [recurrent neural network](https://paperswithcode.com/methods/category/recurrent-neural-networks) that addresses the vanishing gradient problem in vanilla RNNs through additional cells, input and output gates. Intuitively, vanishing gradients are solved through additional *additive* components, and forget gate activations, that allow the gradients to flow through the network without vanishing as quickly.\r\n\r\n(Image Source [here](https://medium.com/datadriveninvestor/how-do-lstm-networks-solve-the-problem-of-vanishing-gradients-a6784971a577))\r\n\r\n(Introduced by Hochreiter and Schmidhuber)" - } - methods: { - name: "Sigmoid Activation" - full_name: "Sigmoid Activation" - description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." - } - } - video: { - video_id: "V9KusGzsx10" - video_title: "PR-160: GLoMo Unsupervised Learning of Transferable Relational Graph" - number_of_likes: 13 - number_of_views: 622 - published_date: { - seconds: 1557076158 - } - uploader: "Doyup Lee" - } - } -} -pr_id_to_video: { - key: 161 - value: { - papers: { - paper_id: "transformer-xl-attentive-language-models" - title: "Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context" - arxiv_id: "1901.02860" - abstract: "Transformers have a potential of learning longer-term dependency, but are limited by a fixed-length context in the setting of language modeling. We propose a novel neural architecture Transformer-XL that enables learning dependency beyond a fixed length without disrupting temporal coherence. It consists of a segment-level recurrence mechanism and a novel positional encoding scheme. Our method not only enables capturing longer-term dependency, but also resolves the context fragmentation problem. As a result, Transformer-XL learns dependency that is 80% longer than RNNs and 450% longer than vanilla Transformers, achieves better performance on both short and long sequences, and is up to 1,800+ times faster than vanilla Transformers during evaluation. Notably, we improve the state-of-the-art results of bpc/perplexity to 0.99 on enwiki8, 1.08 on text8, 18.3 on WikiText-103, 21.8 on One Billion Word, and 54.5 on Penn Treebank (without finetuning). When trained only on WikiText-103, Transformer-XL manages to generate reasonably coherent, novel text articles with thousands of tokens. Our code, pretrained models, and hyperparameters are available in both Tensorflow and PyTorch." - pub_date: { - seconds: 1546992000 - } - authors: "Zihang Dai" - authors: "Zhilin Yang" - authors: "Yiming Yang" - authors: "Jaime Carbonell" - authors: "Quoc V. Le" - authors: "Ruslan Salakhutdinov" - repositories: { - url: "https://github.com/PaddlePaddle/PaddleNLP/tree/develop/examples/language_model/transformer-xl" - framework: FRAMEWORK_OTHERS - number_of_stars: 1363 - description: "An NLP library with Awesome pre-trained Transformer models and easy-to-use interface, supporting wide-range of NLP tasks from research to industrial applications." - } - repositories: { - url: "https://github.com/huggingface/transformers" - framework: FRAMEWORK_PYTORCH - number_of_stars: 47573 - description: "🤗Transformers: State-of-the-art Natural Language Processing for Pytorch, TensorFlow, and JAX." - } - repositories: { - url: "https://github.com/facebookresearch/code-prediction-transformer" - framework: FRAMEWORK_PYTORCH - number_of_stars: 62 - description: "This repo will contain replication package for the paper \"Feeding Trees to Transformers for Code Completion\"" - } - repositories: { - url: "https://github.com/lab-ml/nn/tree/master/labml_nn/transformers/xl" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3053 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." - } - repositories: { - url: "https://github.com/Jmkernes/PAR-Transformer-XL" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 - description: "An implementation of the Pay Attention when Required transformer: https://arxiv.org/pdf/2009.04534.pdf" - } - repositories: { - url: "https://github.com/sooftware/conformer" - framework: FRAMEWORK_PYTORCH - number_of_stars: 122 - description: "PyTorch implementation of \"Conformer: Convolution-augmented Transformer for Speech Recognition\" (INTERSPEECH 2020)" - } - repositories: { - url: "https://github.com/sooftware/nlp-attentions" - framework: FRAMEWORK_PYTORCH - number_of_stars: 51 - description: "PyTorch implementation of some attentions for Deep Learning Researchers. " - } - repositories: { - url: "https://github.com/sooftware/Attention-Implementation" - framework: FRAMEWORK_PYTORCH - number_of_stars: 51 - description: "PyTorch implementation of some attentions for Deep Learning Researchers. " - } - repositories: { - url: "https://github.com/sh951011/Attention-Implementation" - framework: FRAMEWORK_PYTORCH - number_of_stars: 51 - description: "PyTorch implementation of some attentions for Deep Learning Researchers. " - } - repositories: { - url: "https://github.com/cedrickchee/pytorch-pretrained-BERT" - framework: FRAMEWORK_PYTORCH - number_of_stars: 8 - description: "PyTorch version of Google AI's BERT model with script to load Google's pre-trained models" - } - methods: { - name: "Variational Dropout" - full_name: "Variational Dropout" - description: "**Variational Dropout** is a regularization technique based on [dropout](https://paperswithcode.com/method/dropout), but uses a variational inference grounded approach. In Variational Dropout, we repeat the same dropout mask at each time step for both inputs, outputs, and recurrent layers (drop the same network units at each time step). This is in contrast to ordinary Dropout where different dropout masks are sampled at each time step for the inputs and outputs alone." - } - methods: { - name: "Adaptive Input Representations" - full_name: "Adaptive Input Representations" - description: "**Adaptive Input Embeddings** extend the adaptive softmax to input word representations. The factorization assigns more capacity to frequent words and reduces the capacity for less frequent words with the benefit of reducing overfitting to rare words." - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "Cosine Annealing" - full_name: "Cosine Annealing" - description: "**Cosine Annealing** is a type of learning rate schedule that has the effect of starting with a large learning rate that is relatively rapidly decreased to a minimum value before being increased rapidly again. The resetting of the learning rate acts like a simulated restart of the learning process and the re-use of good weights as the starting point of the restart is referred to as a \"warm restart\" in contrast to a \"cold restart\" where a new set of small random numbers may be used as a starting point.\r\n\r\n$$\\eta\\_{t} = \\eta\\_{min}^{i} + \\frac{1}{2}\\left(\\eta\\_{max}^{i}-\\eta\\_{min}^{i}\\right)\\left(1+\\cos\\left(\\frac{T\\_{cur}}{T\\_{i}}\\pi\\right)\\right)\r\n$$\r\n\r\nWhere where $\\eta\\_{min}^{i}$ and $ \\eta\\_{max}^{i}$ are ranges for the learning rate, and $T\\_{cur}$ account for how many epochs have been performed since the last restart.\r\n\r\nText Source: [Jason Brownlee](https://machinelearningmastery.com/snapshot-ensemble-deep-learning-neural-network/)\r\n\r\nImage Source: [Gao Huang](https://www.researchgate.net/figure/Training-loss-of-100-layer-DenseNet-on-CIFAR10-using-standard-learning-rate-blue-and-M_fig2_315765130)" - } - methods: { - name: "Multi-Head Attention" - full_name: "Multi-Head Attention" - description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" - } - methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." - } - methods: { - name: "Adaptive Softmax" - full_name: "Adaptive Softmax" - description: "**Adaptive Softmax** is a speedup technique for the computation of probability distributions over words. The adaptive softmax is inspired by the class-based hierarchical softmax, where the word classes are built to minimize the computation time. Adaptive softmax achieves efficiency by explicitly taking into account the computation time of matrix-multiplication on parallel systems and combining it with a few important observations, namely keeping a shortlist of frequent words in the root node\r\nand reducing the capacity of rare words." - } - methods: { - name: "Layer Normalization" - full_name: "Layer Normalization" - description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." - } - methods: { - name: "Scaled Dot-Product Attention" - full_name: "Scaled Dot-Product Attention" - description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - } - video: { - video_id: "lSTljZy8ag4" - video_title: "PR-161: Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context" - number_of_likes: 36 - number_of_views: 2218 - published_date: { - seconds: 1557744220 - } - uploader: "박성남" - } - } -} -pr_id_to_video: { - key: 162 - value: { - papers: { - paper_id: "deeppermnet-visual-permutation-learning" - title: "DeepPermNet: Visual Permutation Learning" - arxiv_id: "1704.02729" - abstract: "We present a principled approach to uncover the structure of visual data by\nsolving a novel deep learning task coined visual permutation learning. The goal\nof this task is to find the permutation that recovers the structure of data\nfrom shuffled versions of it. In the case of natural images, this task boils\ndown to recovering the original image from patches shuffled by an unknown\npermutation matrix. Unfortunately, permutation matrices are discrete, thereby\nposing difficulties for gradient-based methods. To this end, we resort to a\ncontinuous approximation of these matrices using doubly-stochastic matrices\nwhich we generate from standard CNN predictions using Sinkhorn iterations.\nUnrolling these iterations in a Sinkhorn network layer, we propose DeepPermNet,\nan end-to-end CNN model for this task. The utility of DeepPermNet is\ndemonstrated on two challenging computer vision problems, namely, (i) relative\nattributes learning and (ii) self-supervised representation learning. Our\nresults show state-of-the-art performance on the Public Figures and OSR\nbenchmarks for (i) and on the classification and segmentation tasks on the\nPASCAL VOC dataset for (ii)." - pub_date: { - seconds: 1491782400 - } - authors: "Rodrigo Santa Cruz" - authors: "Basura Fernando" - authors: "Anoop Cherian" - authors: "Stephen Gould" - } - video: { - video_id: "AqStpR29lTA" - video_title: "PR-162: DeepPermNet: Visual Permutation Learning" - number_of_likes: 2 - number_of_views: 311 - published_date: { - seconds: 1557675917 - } - uploader: "강민국" - } - } -} -pr_id_to_video: { - key: 163 - value: { - papers: { - paper_id: "explainable-cnn-attention-networks-c" - title: "Explainable CNN-attention Networks (C-Attention Network) for Automated Detection of Alzheimer's Disease" - arxiv_id: "2006.14135" - abstract: "In this work, we propose three explainable deep learning architectures to automatically detect patients with Alzheimer`s disease based on their language abilities. The architectures use: (1) only the part-of-speech features; (2) only language embedding features and (3) both of these feature classes via a unified architecture. We use self-attention mechanisms and interpretable 1-dimensional ConvolutionalNeural Network (CNN) to generate two types of explanations of the model`s action: intra-class explanation and inter-class explanation. The inter-class explanation captures the relative importance of each of the different features in that class, while the inter-class explanation captures the relative importance between the classes. Note that although we have considered two classes of features in this paper, the architecture is easily expandable to more classes because of its modularity. Extensive experimentation and comparison with several recent models show that our method outperforms these methods with an accuracy of 92.2% and F1 score of 0.952on the DementiaBank dataset while being able to generate explanations. We show by examples, how to generate these explanations using attention values." - pub_date: { - seconds: 1593043200 - } - authors: "Ning Wang" - authors: "Mingxuan Chen" - authors: "K. P. Subbalakshmi" - } - video: { - video_id: "Dvi5_YC8Yts" - video_title: "PR-163: CNN Attention Networks" - number_of_likes: 125 - number_of_views: 8260 - published_date: { - seconds: 1558274434 - } - uploader: "Taeoh Kim" - } - } -} -pr_id_to_video: { - key: 164 - value: { - papers: { - paper_id: "infovae-information-maximizing-variational" - title: "InfoVAE: Information Maximizing Variational Autoencoders" - arxiv_id: "1706.02262" - abstract: "A key advance in learning generative models is the use of amortized inference\ndistributions that are jointly trained with the models. We find that existing\ntraining objectives for variational autoencoders can lead to inaccurate\namortized inference distributions and, in some cases, improving the objective\nprovably degrades the inference quality. In addition, it has been observed that\nvariational autoencoders tend to ignore the latent variables when combined with\na decoding distribution that is too flexible. We again identify the cause in\nexisting training criteria and propose a new class of objectives (InfoVAE) that\nmitigate these problems. We show that our model can significantly improve the\nquality of the variational posterior and can make effective use of the latent\nfeatures regardless of the flexibility of the decoding distribution. Through\nextensive qualitative and quantitative analyses, we demonstrate that our models\noutperform competing approaches on multiple performance metrics." - pub_date: { - seconds: 1496793600 - } - authors: "Shengjia Zhao" - authors: "Jiaming Song" - authors: "Stefano Ermon" - repositories: { - url: "https://github.com/zacheberhart/Convolutional-Disentangled-Variational-Autoencoder" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - description: "A Convolutional β-VAE in PyTorch based loosely off of the Conv VAE used in the World Models research paper." - } - repositories: { - url: "https://github.com/JakobHavtorn/vae" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "VAE in PyTorch" - } - repositories: { - url: "https://github.com/Saswatm123/MMD-VAE" - framework: FRAMEWORK_PYTORCH - number_of_stars: 34 - description: "Pytorch implementation of Maximum Mean Discrepancy Variational Autoencoder, a member of the InfoVAE family that maximizes Mutual Information between the Isotropic Gaussian Prior (as the latent space) and the Data Distribution." - } - repositories: { - url: "https://github.com/AntixK/PyTorch-VAE" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2105 - description: "A Collection of Variational Autoencoders (VAE) in PyTorch." - } - repositories: { - url: "https://github.com/zacheberhart/Maximum-Mean-Discrepancy-Variational-Autoencoder" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 37 - description: "A PyTorch implementation of the MMD-VAE, an Information-Maximizing Variational Autoencoder (InfoVAE) based off of the TensorFlow implementation published by the author of the original InfoVAE paper." - } - } - video: { - video_id: "29QcXLoYC60" - video_title: "PR-164: InfoVAE: Balancing Learning and Inference in Variational Autoencoders" - number_of_views: 593 - published_date: { - seconds: 1558883112 - } - uploader: "Byung-Hak Kim" - } - } -} -pr_id_to_video: { - key: 165 - value: { - papers: { - paper_id: "few-shot-adversarial-learning-of-realistic" - title: "Few-Shot Adversarial Learning of Realistic Neural Talking Head Models" - arxiv_id: "1905.08233" - abstract: "Several recent works have shown how highly realistic human head images can be obtained by training convolutional neural networks to generate them. In order to create a personalized talking head model, these works require training on a large dataset of images of a single person. However, in many practical scenarios, such personalized talking head models need to be learned from a few image views of a person, potentially even a single image. Here, we present a system with such few-shot capability. It performs lengthy meta-learning on a large dataset of videos, and after that is able to frame few- and one-shot learning of neural talking head models of previously unseen people as adversarial training problems with high capacity generators and discriminators. Crucially, the system is able to initialize the parameters of both the generator and the discriminator in a person-specific way, so that training can be based on just a few images and done quickly, despite the need to tune tens of millions of parameters. We show that such an approach is able to learn highly realistic and personalized talking head models of new people and even portrait paintings." - pub_date: { - seconds: 1558310400 - } - authors: "Egor Zakharov" - authors: "Aliaksandra Shysheya" - authors: "Egor Burkov" - authors: "Victor Lempitsky" - repositories: { - url: "https://github.com/krisrjohnson/Realistic-Neural-Talking-Head-Models" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/Ierezell/PapierFewShot" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Reimplementation in pytorch of the paper https://arxiv.org/pdf/1905.08233.pdf" - } - repositories: { - url: "https://github.com/times2049/talkinghead" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/ZVK/Talking-Heads" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - } - repositories: { - url: "https://github.com/shoutOutYangJie/Few-Shot-Adversarial-Learning-for-face-swap" - framework: FRAMEWORK_PYTORCH - number_of_stars: 123 - description: "This is a unofficial re-implementation of the paper \"Few-Shot Adversarial Learning of Realistic Neural Talking Head Models\"" - } - repositories: { - url: "https://github.com/vincent-thevenin/Realistic-Neural-Talking-Head-Models" - framework: FRAMEWORK_PYTORCH - number_of_stars: 643 - description: "My implementation of Few-Shot Adversarial Learning of Realistic Neural Talking Head Models (Egor Zakharov et al.)." - } - repositories: { - url: "https://github.com/ZVK/talking_heads" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - } - repositories: { - url: "https://github.com/grey-eye/talking-heads" - framework: FRAMEWORK_PYTORCH - number_of_stars: 507 - description: "Our implementation of \"Few-Shot Adversarial Learning of Realistic Neural Talking Head Models\" (Egor Zakharov et al.)" - } - } - video: { - video_id: "4pY_6VG4npc" - video_title: "PR-165: Few-Shot Adversarial Learning of Realistic Neural Talking Head Models" - number_of_likes: 40 - number_of_views: 2878 - published_date: { - seconds: 1558879643 - } - uploader: "taesu" - } - } -} -pr_id_to_video: { - key: 166 - value: { - papers: { - paper_id: "nas-fpn-learning-scalable-feature-pyramid" - title: "NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection" - arxiv_id: "1904.07392" - abstract: "Current state-of-the-art convolutional architectures for object detection are\nmanually designed. Here we aim to learn a better architecture of feature\npyramid network for object detection. We adopt Neural Architecture Search and\ndiscover a new feature pyramid architecture in a novel scalable search space\ncovering all cross-scale connections. The discovered architecture, named\nNAS-FPN, consists of a combination of top-down and bottom-up connections to\nfuse features across scales. NAS-FPN, combined with various backbone models in\nthe RetinaNet framework, achieves better accuracy and latency tradeoff compared\nto state-of-the-art object detection models. NAS-FPN improves mobile detection\naccuracy by 2 AP compared to state-of-the-art SSDLite with MobileNetV2 model in\n[32] and achieves 48.3 AP which surpasses Mask R-CNN [10] detection accuracy\nwith less computation time." - pub_date: { - seconds: 1555372800 - } - authors: "Golnaz Ghiasi" - authors: "Tsung-Yi Lin" - authors: "Ruoming Pang" - authors: "Quoc V. Le" - repositories: { - url: "https://github.com/open-mmlab/mmdetection" - framework: FRAMEWORK_PYTORCH - number_of_stars: 15362 - description: "OpenMMLab Detection Toolbox and Benchmark" - } - repositories: { - url: "https://github.com/tensorflow/tpu/tree/master/models/official/detection" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4338 - description: "Reference models and tools for Cloud TPUs." - } - methods: { - name: "Average Pooling" - full_name: "Average Pooling" - description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" - } - methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." - } - methods: { - name: "Bottleneck Residual Block" - full_name: "Bottleneck Residual Block" - description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." - } - methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." - } - methods: { - name: "Sigmoid Activation" - full_name: "Sigmoid Activation" - description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." - } - methods: { - name: "RoIAlign" - full_name: "RoIAlign" - description: "**Region of Interest Align**, or **RoIAlign**, is an operation for extracting a small feature map from each RoI in detection and segmentation based tasks. It removes the harsh quantization of [RoI Pool](https://paperswithcode.com/method/roi-pooling), properly *aligning* the extracted features with the input. To avoid any quantization of the RoI boundaries or bins (using $x/16$ instead of $[x/16]$), RoIAlign uses bilinear interpolation to compute the exact values of the input features at four regularly sampled locations in each RoI bin, and the result is then aggregated (using max or average)." - } - methods: { - name: "Pointwise Convolution" - full_name: "Pointwise Convolution" - description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "MobileNetV2" - full_name: "MobileNetV2" - description: "**MobileNetV2** is a convolutional neural network architecture that seeks to perform well on mobile devices. It is based on an inverted residual structure where the residual connections are between the bottleneck layers. The intermediate expansion layer uses lightweight depthwise convolutions to filter features as a source of non-linearity. As a whole, the architecture of MobileNetV2 contains the initial fully convolution layer with 32 filters, followed by 19 residual bottleneck layers." - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - } - video: { - video_id: "FAAt0jejWOA" - video_title: "PR-166: NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection" - number_of_likes: 15 - number_of_views: 2115 - published_date: { - seconds: 1560917270 - } - uploader: "Taekmin Kim" - } - } -} -pr_id_to_video: { - key: 167 - value: { - papers: { - paper_id: "interpretability-beyond-feature-attribution" - title: "Interpretability Beyond Feature Attribution: Quantitative Testing with Concept Activation Vectors (TCAV)" - arxiv_id: "1711.11279" - abstract: "The interpretation of deep learning models is a challenge due to their size,\ncomplexity, and often opaque internal state. In addition, many systems, such as\nimage classifiers, operate on low-level features rather than high-level\nconcepts. To address these challenges, we introduce Concept Activation Vectors\n(CAVs), which provide an interpretation of a neural net's internal state in\nterms of human-friendly concepts. The key idea is to view the high-dimensional\ninternal state of a neural net as an aid, not an obstacle. We show how to use\nCAVs as part of a technique, Testing with CAVs (TCAV), that uses directional\nderivatives to quantify the degree to which a user-defined concept is important\nto a classification result--for example, how sensitive a prediction of \"zebra\"\nis to the presence of stripes. Using the domain of image classification as a\ntesting ground, we describe how CAVs may be used to explore hypotheses and\ngenerate insights for a standard image classification network as well as a\nmedical application." - pub_date: { - seconds: 1512000000 - } - authors: "Been Kim" - authors: "Martin Wattenberg" - authors: "Justin Gilmer" - authors: "Carrie Cai" - authors: "James Wexler" - authors: "Fernanda Viegas" - authors: "Rory Sayres" - repositories: { - url: "https://github.com/jwendyr/tcav" - framework: FRAMEWORK_TENSORFLOW - description: "tcav" - } - repositories: { - url: "https://github.com/giovannimaffei/concept_activation_vectors" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Simple implementation of \"Interpretability Beyond Feature Attribution: Quantitative Testing with Concept Activation Vectors (TCAV)\", Been Kim et al., 2017 " - } - repositories: { - url: "https://github.com/medgift/iMIMIC-RCVs" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 14 - description: "This repository contains the code for implementing Bidirectional Relevance scores for Digital Histopathology, which was used for the results in the iMIMIC workshop paper: Regression Concept Vectors for Bidirectional Explanations in Histopathology" - } - repositories: { - url: "https://github.com/maragraziani/iMIMIC-RCVs" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 - description: "This repository contains the code for implementing Bidirectional Relevance scores for Digital Histopathology, which was used for the results in the iMIMIC workshop paper: Regression Concept Vectors for Bidirectional Explanations in Histopathology" - } - repositories: { - url: "https://github.com/fursovia/tcav_nlp" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 - description: "\"Interpretability Beyond Feature Attribution: Quantitative Testing with Concept Activation Vectors (TCAV)\" paper implementation" - } - repositories: { - url: "https://github.com/pnxenopoulos/cav-keras" - framework: FRAMEWORK_OTHERS - number_of_stars: 9 - description: "Concept activation vectors for Keras" - } - repositories: { - url: "https://github.com/tensorflow/tcav" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 466 - description: "Code for the TCAV ML interpretability project" - } - repositories: { - url: "https://github.com/soumyadip1995/TCAV" - framework: FRAMEWORK_OTHERS - description: " ⚙📲Interpretability Beyond Feature Attribution: Quantitative Testing with Concept Activation Vectors (TCAV)" - } - } - video: { - video_id: "-y0oghbEHMM" - video_title: "PR-167: Interpretability Beyond Feature Attribution: Testing with Concept Activation Vector (TCAV)" - number_of_likes: 4 - number_of_views: 768 - published_date: { - seconds: 1559486974 - } - uploader: "Jinsung Yoon" - } - } -} -pr_id_to_video: { - key: 168 - value: { - papers: { - paper_id: "few-shot-unsupervised-image-to-image" - title: "Few-Shot Unsupervised Image-to-Image Translation" - arxiv_id: "1905.01723" - abstract: "Unsupervised image-to-image translation methods learn to map images in a given class to an analogous image in a different class, drawing on unstructured (non-registered) datasets of images. While remarkably successful, current methods require access to many images in both source and destination classes at training time. We argue this greatly limits their use. Drawing inspiration from the human capability of picking up the essence of a novel object from a small number of examples and generalizing from there, we seek a few-shot, unsupervised image-to-image translation algorithm that works on previously unseen target classes that are specified, at test time, only by a few example images. Our model achieves this few-shot generation capability by coupling an adversarial training scheme with a novel network design. Through extensive experimental validation and comparisons to several baseline methods on benchmark datasets, we verify the effectiveness of the proposed framework. Our implementation and datasets are available at https://github.com/NVlabs/FUNIT ." - pub_date: { - seconds: 1557014400 - } - authors: "Ming-Yu Liu" - authors: "Xun Huang" - authors: "Arun Mallya" - authors: "Tero Karras" - authors: "Timo Aila" - authors: "Jaakko Lehtinen" - authors: "Jan Kautz" - repositories: { - url: "https://github.com/samuelchassot/FUNIT" - framework: FRAMEWORK_PYTORCH - description: "Translate images to unseen domains in the test time with few example images." - } - repositories: { - url: "https://github.com/mkolodny/funit" - framework: FRAMEWORK_PYTORCH - } - repositories: { - is_official: true - url: "https://github.com/NVlabs/FUNIT" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1429 - description: "Translate images to unseen domains in the test time with few example images." - } - repositories: { - url: "https://github.com/taki0112/FUNIT-Tensorflow" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 62 - description: "Simple Tensorflow implementation of \"Few-Shot Unsupervised Image-to-Image Translation\" (ICCV 2019)" - } - repositories: { - url: "https://github.com/chipsi/FUNIT" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/yaxingwang/SEMIT" - framework: FRAMEWORK_PYTORCH - number_of_stars: 38 - description: "Image to Image translation, image generataton, few shot learning" - } - repositories: { - url: "https://github.com/shaoanlu/fewshot-face-translation-GAN" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 720 - description: "Generative adversarial networks integrating modules from FUNIT and SPADE for face-swapping." - } - repositories: { - url: "https://github.com/sumfish/music-style-transfer" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/cleye/FUNIT-Fringe" - framework: FRAMEWORK_PYTORCH - description: "Using NVlabs FUNIT to make a photobooth transforming your face into an animal. Displayed at FRINGE Festival 2020" - } - } - video: { - video_id: "ANwAhuOeaiE" - video_title: "PR-168: Few Shot Unsupervised Image to Image Translation" - number_of_likes: 11 - number_of_views: 895 - published_date: { - seconds: 1560267339 - } - uploader: "정지원" - } - } -} -pr_id_to_video: { - key: 169 - value: { - papers: { - paper_id: "efficientnet-rethinking-model-scaling-for" - title: "EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" - arxiv_id: "1905.11946" - abstract: "Convolutional Neural Networks (ConvNets) are commonly developed at a fixed resource budget, and then scaled up for better accuracy if more resources are available. In this paper, we systematically study model scaling and identify that carefully balancing network depth, width, and resolution can lead to better performance. Based on this observation, we propose a new scaling method that uniformly scales all dimensions of depth/width/resolution using a simple yet highly effective compound coefficient. We demonstrate the effectiveness of this method on scaling up MobileNets and ResNet. To go even further, we use neural architecture search to design a new baseline network and scale it up to obtain a family of models, called EfficientNets, which achieve much better accuracy and efficiency than previous ConvNets. In particular, our EfficientNet-B7 achieves state-of-the-art 84.3% top-1 accuracy on ImageNet, while being 8.4x smaller and 6.1x faster on inference than the best existing ConvNet. Our EfficientNets also transfer well and achieve state-of-the-art accuracy on CIFAR-100 (91.7%), Flowers (98.8%), and 3 other transfer learning datasets, with an order of magnitude fewer parameters. Source code is at https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet." - pub_date: { - seconds: 1559001600 - } - authors: "Mingxing Tan" - authors: "Quoc V. Le" - repositories: { - url: "https://github.com/Tirth27/Skin-Cancer-Classification-using-Deep-Learning" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Classify Skin cancer from the skin lesion images using Image classification. The dataset for the project is obtained from the Kaggle SIIM-ISIC-Melanoma-Classification competition. " - } - repositories: { - url: "https://github.com/reyvaz/pneumothorax_detection" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Pneumothorax Disease Detection and Segmentation using X-Ray Images" - } - repositories: { - url: "https://github.com/reyvaz/steel-defect-segmentation" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "EfficientNet-Based Unet++ Model for Steel Defect Segmentation" - } - repositories: { - url: "https://github.com/gladwinyjh/FYP-2020" - framework: FRAMEWORK_TENSORFLOW - description: "Deep learning of brain images with EfficientNet" - } - repositories: { - url: "https://github.com/jaketae/mlp-mixer" - framework: FRAMEWORK_PYTORCH - number_of_stars: 7 - description: "PyTorch implementation of MLP-Mixer: An all-MLP Architecture for Vision" - } - repositories: { - url: "https://github.com/lpirola13/flower-recognizer" - framework: FRAMEWORK_TENSORFLOW - description: "This project aims to create a deep learning model suitable in a mobile context that can recognize flowers from images." - } - repositories: { - url: "https://github.com/HyeonhoonLee/MAIC2021_Sleep" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "4th place in MAIC2021 Sleep AI Challenge (SleepingDragon)" - } - repositories: { - url: "https://github.com/lukemelas/EfficientNet-PyTorch" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 6130 - description: "A PyTorch implementation of EfficientNet and EfficientNetV2 (coming soon!)" - } - repositories: { - url: "https://github.com/BobMcDear/pytorch-efficientnet" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - } - repositories: { - url: "https://github.com/facebookresearch/ClassyVision" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1290 - description: "An end-to-end PyTorch framework for image and video classification" - } - methods: { - name: "Average Pooling" - full_name: "Average Pooling" - description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." - } - methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." - } - methods: { - name: "Bottleneck Residual Block" - full_name: "Bottleneck Residual Block" - description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." - } - methods: { - name: "Sigmoid Activation" - full_name: "Sigmoid Activation" - description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." - } - methods: { - name: "AutoAugment" - full_name: "AutoAugment" - description: "**AutoAugment** is an automated approach to find data augmentation policies from data. It formulates the problem of finding the best augmentation policy as a discrete search problem. It consists of two components: a search algorithm and a search space. \r\n\r\nAt a high level, the search algorithm (implemented as a controller RNN) samples a data augmentation policy $S$, which has information about what image processing operation to use, the probability of using the operation in each batch, and the magnitude of the operation. The policy $S$ is used to train a neural network with a fixed architecture, whose validation accuracy $R$ is sent back to update the controller. Since $R$ is not differentiable, the controller will be updated by policy gradient methods. \r\n\r\nThe operations used are from PIL, a popular Python image library: all functions in PIL that accept an image as input and output an image. It additionally uses two other augmentation techniques: Cutout and SamplePairing. The operations searched over are ShearX/Y, TranslateX/Y, Rotate, AutoContrast, Invert, Equalize, Solarize, Posterize, Contrast, Color, Brightness, Sharpness, Cutout and Sample Pairing." - } - methods: { - name: "Squeeze-and-Excitation Block" - full_name: "Squeeze-and-Excitation Block" - description: "The **Squeeze-and-Excitation Block** is an architectural unit designed to improve the representational power of a network by enabling it to perform dynamic channel-wise feature recalibration. The process is:\r\n\r\n- The block has a convolutional block as an input.\r\n- Each channel is \"squeezed\" into a single numeric value using average pooling.\r\n- A dense layer followed by a ReLU adds non-linearity and output channel complexity is reduced by a ratio.\r\n- Another dense layer followed by a sigmoid gives each channel a smooth gating function.\r\n- Finally, we weight each feature map of the convolutional block based on the side network; the \"excitation\"." - } - methods: { - name: "Pointwise Convolution" - full_name: "Pointwise Convolution" - description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" - } - methods: { - name: "Stochastic Depth" - full_name: "Stochastic Depth" - description: "**Stochastic Depth** aims to shrink the depth of a network during training, while\r\nkeeping it unchanged during testing. This is achieved by randomly dropping entire [ResBlocks](https://paperswithcode.com/method/residual-block) during training and bypassing their transformations through skip connections. \r\n\r\nLet $b\\_{l} \\in$ {$0, 1$} denote a Bernoulli random variable, which indicates whether the $l$th ResBlock is active ($b\\_{l} = 1$) or inactive ($b\\_{l} = 0$). Further, let us denote the “survival” probability of ResBlock $l$ as $p\\_{l} = \\text{Pr}\\left(b\\_{l} = 1\\right)$. With this definition we can bypass the $l$th ResBlock by multiplying its function $f\\_{l}$ with $b\\_{l}$ and we extend the update rule to:\r\n\r\n$$ H\\_{l} = \\text{ReLU}\\left(b\\_{l}f\\_{l}\\left(H\\_{l-1}\\right) + \\text{id}\\left(H\\_{l-1}\\right)\\right) $$\r\n\r\nIf $b\\_{l} = 1$, this reduces to the original ResNet update and this ResBlock remains unchanged. If $b\\_{l} = 0$, the ResBlock reduces to the identity function, $H\\_{l} = \\text{id}\\left((H\\_{l}−1\\right)$." - } - } - video: { - video_id: "Vhz0quyvR7I" - video_title: "PR-169: EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" - number_of_likes: 99 - number_of_views: 6331 - published_date: { - seconds: 1560496231 - } - uploader: "JinWon Lee" - } - } -} -pr_id_to_video: { - key: 170 - value: { - papers: { - paper_id: "deep-residual-learning-for-image-recognition" - title: "Deep Residual Learning for Image Recognition" - arxiv_id: "1512.03385" - abstract: "Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation." - pub_date: { - seconds: 1449705600 - } - authors: "Kaiming He" - authors: "Xiangyu Zhang" - authors: "Shaoqing Ren" - authors: "Jian Sun" - repositories: { - url: "https://github.com/tensorflow/models/tree/master/research/deeplab" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70334 - description: "Models and examples built with TensorFlow" - } - repositories: { - url: "https://github.com/vinod377/STN-OCR" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Implementation of \"STN-OCR: A single Neural Network for Text Detection and Text Recognition\" in natural Scenes by Christian Bartz." - } - repositories: { - url: "https://github.com/facebookresearch/pycls" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1596 - description: "Codebase for Image Classification Research, written in PyTorch." - } - repositories: { - url: "https://github.com/MarkHershey/arxiv-dl" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "Command-line arXiv.org Papers Downloader" - } - repositories: { - url: "https://github.com/FrancescoSaverioZuppichini/ResNet" - framework: FRAMEWORK_PYTORCH - number_of_stars: 94 - description: "Clean, scalable and easy to use ResNet implementation in Pytorch" - } - repositories: { - url: "https://github.com/Masao-Taketani/FOTS_OCR" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 41 - description: "TensorFlow Implementation of FOTS, Fast Oriented Text Spotting with a Unified Network." - } - repositories: { - url: "https://github.com/amogh7joshi/plant-health-detection" - framework: FRAMEWORK_TENSORFLOW - description: "Detecting plant health using neural networks." - } - repositories: { - url: "https://github.com/tiagoCuervo/JapaNet" - framework: FRAMEWORK_TENSORFLOW - description: "Detection and classification of Kuzushiji characters for the Kuzushiji Recognition Kaggle challenge using CenterNet as detector and multiple classifiers" - } - repositories: { - url: "https://github.com/pytorch/vision" - framework: FRAMEWORK_PYTORCH - number_of_stars: 9293 - description: "Datasets, Transforms and Models specific to Computer Vision" - } - repositories: { - url: "https://github.com/winycg/MCL-OKD" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "Multi-view contrastive learning for online knowledge distillation (MCL-OKD)" - } - methods: { - name: "Residual Block" - full_name: "Residual Block" - description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." - } - methods: { - name: "Weight Decay" - full_name: "Weight Decay" - description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" - } - methods: { - name: "Bottleneck Residual Block" - full_name: "Bottleneck Residual Block" - description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." - } - methods: { - name: "Kaiming Initialization" - full_name: "Kaiming Initialization" - description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." - } - methods: { - name: "Random Horizontal Flip" - full_name: "Random Horizontal Flip" - description: "**RandomHorizontalFlip** is a type of image data augmentation which horizontally flips a given image with a given probability.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - methods: { - name: "Random Resized Crop" - full_name: "Random Resized Crop" - description: "**RandomResizedCrop** is a type of image data augmentation where a crop of random size of the original size and a random aspect ratio of the original aspect ratio is made. This crop is finally resized to given size.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" - } - methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." - } - methods: { - name: "SGD with Momentum" - full_name: "SGD with Momentum" - description: "**SGD with Momentum** is a stochastic optimization method that adds a momentum term to regular stochastic gradient descent:\r\n\r\n$$v\\_{t} = \\gamma{v}\\_{t-1} + \\eta\\nabla\\_{\\theta}J\\left(\\theta\\right)$$\r\n$$\\theta\\_{t} = \\theta\\_{t-1} - v\\_{t} $$\r\n\r\nA typical value for $\\gamma$ is $0.9$. The momentum name comes from an analogy to physics, such as ball accelerating down a slope. In the case of weight updates, we can think of the weights as a particle traveling through parameter space which incurs acceleration from the gradient of the loss.\r\n\r\nImage Source: [Juan Du](https://www.researchgate.net/figure/The-compare-of-the-SGD-algorithms-with-and-without-momentum-Take-Task-1-as-example-The_fig1_333469047)" - } - methods: { - name: "Average Pooling" - full_name: "Average Pooling" - description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" - } - } - video: { - video_id: "7fSgqlC7Wdo" - video_title: "PR-170: ResNet - Deep Residual Learning for Image Recognition" - number_of_likes: 16 - number_of_views: 1728 - published_date: { - seconds: 1565744287 - } - uploader: "Young Seok Kim" - } - } -} -pr_id_to_video: { - key: 171 - value: { - papers: { - paper_id: "large-margin-softmax-loss-for-convolutional" - title: "Large-Margin Softmax Loss for Convolutional Neural Networks" - arxiv_id: "1612.02295" - abstract: "Cross-entropy loss together with softmax is arguably one of the most common\nused supervision components in convolutional neural networks (CNNs). Despite\nits simplicity, popularity and excellent performance, the component does not\nexplicitly encourage discriminative learning of features. In this paper, we\npropose a generalized large-margin softmax (L-Softmax) loss which explicitly\nencourages intra-class compactness and inter-class separability between learned\nfeatures. Moreover, L-Softmax not only can adjust the desired margin but also\ncan avoid overfitting. We also show that the L-Softmax loss can be optimized by\ntypical stochastic gradient descent. Extensive experiments on four benchmark\ndatasets demonstrate that the deeply-learned features with L-softmax loss\nbecome more discriminative, hence significantly boosting the performance on a\nvariety of visual classification and verification tasks." - pub_date: { - seconds: 1481068800 - } - authors: "Weiyang Liu" - authors: "Yandong Wen" - authors: "Zhiding Yu" - authors: "Meng Yang" - repositories: { - is_official: true - url: "https://github.com/wy1iu/LargeMargin_Softmax_Loss" - framework: FRAMEWORK_PYTORCH - number_of_stars: 321 - description: "Implementation for in ICML'16." - } - repositories: { - url: "https://github.com/amirhfarzaneh/lsoftmax-pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 141 - description: "The Pytorch Implementation of L-Softmax" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - } - video: { - video_id: "7TugLIfExKM" - video_title: "PR-171: Large margin softmax loss for Convolutional Neural Networks" - number_of_likes: 4 - number_of_views: 1050 - published_date: { - seconds: 1561534996 - } - uploader: "visionNoob" - } - } -} -pr_id_to_video: { - key: 172 - value: { - papers: { - paper_id: "generalized-intersection-over-union-a-metric" - title: "Generalized Intersection over Union: A Metric and A Loss for Bounding Box Regression" - arxiv_id: "1902.09630" - abstract: "Intersection over Union (IoU) is the most popular evaluation metric used in\nthe object detection benchmarks. However, there is a gap between optimizing the\ncommonly used distance losses for regressing the parameters of a bounding box\nand maximizing this metric value. The optimal objective for a metric is the\nmetric itself. In the case of axis-aligned 2D bounding boxes, it can be shown\nthat $IoU$ can be directly used as a regression loss. However, $IoU$ has a\nplateau making it infeasible to optimize in the case of non-overlapping\nbounding boxes. In this paper, we address the weaknesses of $IoU$ by\nintroducing a generalized version as both a new loss and a new metric. By\nincorporating this generalized $IoU$ ($GIoU$) as a loss into the state-of-the\nart object detection frameworks, we show a consistent improvement on their\nperformance using both the standard, $IoU$ based, and new, $GIoU$ based,\nperformance measures on popular object detection benchmarks such as PASCAL VOC\nand MS COCO." - pub_date: { - seconds: 1551052800 - } - authors: "Hamid Rezatofighi" - authors: "Nathan Tsoi" - authors: "JunYoung Gwak" - authors: "Amir Sadeghian" - authors: "Ian Reid" - authors: "Silvio Savarese" - repositories: { - url: "https://github.com/AnselmC/bamot" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Bundle Adjustment for Multiple Object Tracking" - } - repositories: { - url: "https://github.com/sremes/a2d2" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - } - repositories: { - url: "https://github.com/JaryHuang/awesome_SSD_FPN_GIoU" - framework: FRAMEWORK_PYTORCH - number_of_stars: 97 - description: "This repository carries out some paper recurring work" - } - repositories: { - url: "https://github.com/OFRIN/Tensorflow_GIoU" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 11 - description: "Generalized Intersection Over Union: A Metric and a Loss for Bounding Box Regression (CVPR2019)" - } - repositories: { - url: "https://github.com/RuiminChen/GIou_loss_caffe" - framework: FRAMEWORK_OTHERS - number_of_stars: 39 - description: "Caffe version Generalized & Distance & Complete Iou loss Implementation for Faster RCNN/FPN bbox regression" - } - repositories: { - url: "https://github.com/RuiminChen/GIouloss_CIouloss_caffe" - framework: FRAMEWORK_OTHERS - number_of_stars: 39 - description: "Caffe version Generalized & Distance & Complete Iou loss Implementation for Faster RCNN/FPN bbox regression" - } - repositories: { - url: "https://github.com/LinRiver/YOLOv3-on-LISA-Traffic-Sign-Detection-with-darknet" - framework: FRAMEWORK_OTHERS - number_of_stars: 3 - description: "This project is to improve YOLOv3 performance by GIOU instead of IOU and the integration of conv and batch_normalization layers" - } - repositories: { - url: "https://github.com/kalubin-awym/GIoU-loss-for-RetinaNet" - framework: FRAMEWORK_OTHERS - number_of_stars: 11 - description: "Change smooth L1 loss to GIoU loss for RetinaNet" - } - } - video: { - video_id: "ENZBhDx0kqM" - video_title: "PR-172: Generalized Intersection over Union: A Metric and A Loss for Bounding Box Regression" - number_of_likes: 30 - number_of_views: 1822 - published_date: { - seconds: 1561353991 - } - uploader: "Soyeon Kim" - } - } -} -pr_id_to_video: { - key: 173 - value: { - papers: { - paper_id: "automatic-chemical-design-using-a-data-driven" - title: "Automatic chemical design using a data-driven continuous representation of molecules" - arxiv_id: "1610.02415" - abstract: "We report a method to convert discrete representations of molecules to and\nfrom a multidimensional continuous representation. This model allows us to\ngenerate new molecules for efficient exploration and optimization through\nopen-ended spaces of chemical compounds. A deep neural network was trained on\nhundreds of thousands of existing chemical structures to construct three\ncoupled functions: an encoder, a decoder and a predictor. The encoder converts\nthe discrete representation of a molecule into a real-valued continuous vector,\nand the decoder converts these continuous vectors back to discrete molecular\nrepresentations. The predictor estimates chemical properties from the latent\ncontinuous vector representation of the molecule. Continuous representations\nallow us to automatically generate novel chemical structures by performing\nsimple operations in the latent space, such as decoding random vectors,\nperturbing known chemical structures, or interpolating between molecules.\nContinuous representations also allow the use of powerful gradient-based\noptimization to efficiently guide the search for optimized functional\ncompounds. We demonstrate our method in the domain of drug-like molecules and\nalso in the set of molecules with fewer that nine heavy atoms." - pub_date: { - seconds: 1475798400 - } - authors: "Rafael Gómez-Bombarelli" - authors: "Jennifer N. Wei" - authors: "David Duvenaud" - authors: "José Miguel Hernández-Lobato" - authors: "Benjamín Sánchez-Lengeling" - authors: "Dennis Sheberla" - authors: "Jorge Aguilera-Iparraguirre" - authors: "Timothy D. Hirzel" - authors: "Ryan P. Adams" - authors: "Alán Aspuru-Guzik" - repositories: { - url: "https://github.com/Ishan-Kumar2/Molecular_VAE_Pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "PyTorch implementation of the paper \"Automatic Chemical Design Using a Data-Driven Continuous Representation of Molecules\"" - } - repositories: { - url: "https://github.com/TrentBrick/PAE" - framework: FRAMEWORK_PYTORCH - description: "Primary and Tertiary Sequence AutoEncoder" - } - repositories: { - url: "https://github.com/leungjch/drug_VAE" - framework: FRAMEWORK_OTHERS - description: "VAE trained on MOSES SMILES to produce novel molecules with druglike properties." - } - repositories: { - url: "https://github.com/shamelmerchant/keras-molecules" - framework: FRAMEWORK_TENSORFLOW - description: "Auto-encoder network for learning a continuous representation of chemical structures" - } - repositories: { - url: "https://github.com/brettin/keras-molecule" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - is_official: true - url: "https://github.com/aspuru-guzik-group/chemical_vae" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 302 - description: "Code for 10.1021/acscentsci.7b00572, now running on Keras 2.0 and Tensorflow" - } - repositories: { - is_official: true - url: "https://github.com/HIPS/molecule-autoencoder" - framework: FRAMEWORK_OTHERS - number_of_stars: 136 - description: "A project to enable optimization of molecules by transforming them to and from a continuous representation." - } - repositories: { - url: "https://github.com/aksub99/molecular-vae" - framework: FRAMEWORK_PYTORCH - number_of_stars: 20 - description: "Pytorch implementation of the paper \"Automatic Chemical Design Using a Data-Driven Continuous Representation of Molecules\"" - } - repositories: { - url: "https://github.com/tevang/keras-molecules" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - } - } - video: { - video_id: "hk4e8ZCkNWg" - video_title: "PR-173 : Automatic Chemical Design Using a Data-Driven Continuous Representation of Molecules" - number_of_likes: 10 - number_of_views: 389 - published_date: { - seconds: 1561309066 - } - uploader: "Sunghoon Joo" - } - } -} -pr_id_to_video: { - key: 174 - value: { - video: { - video_id: "yqFDyX4ErSI" - video_title: "PR-174: Restricted Boltzmann Machine and Deep Belief Networks" - number_of_likes: 12 - number_of_views: 977 - published_date: { - seconds: 1561903626 - } - uploader: "MinGuk Kang" - } - } -} -pr_id_to_video: { - key: 175 - value: { - papers: { - paper_id: "xlnet-generalized-autoregressive-pretraining" - title: "XLNet: Generalized Autoregressive Pretraining for Language Understanding" - arxiv_id: "1906.08237" - abstract: "With the capability of modeling bidirectional contexts, denoising autoencoding based pretraining like BERT achieves better performance than pretraining approaches based on autoregressive language modeling. However, relying on corrupting the input with masks, BERT neglects dependency between the masked positions and suffers from a pretrain-finetune discrepancy. In light of these pros and cons, we propose XLNet, a generalized autoregressive pretraining method that (1) enables learning bidirectional contexts by maximizing the expected likelihood over all permutations of the factorization order and (2) overcomes the limitations of BERT thanks to its autoregressive formulation. Furthermore, XLNet integrates ideas from Transformer-XL, the state-of-the-art autoregressive model, into pretraining. Empirically, under comparable experiment settings, XLNet outperforms BERT on 20 tasks, often by a large margin, including question answering, natural language inference, sentiment analysis, and document ranking." - pub_date: { - seconds: 1560902400 - } - authors: "Zhilin Yang" - authors: "Zihang Dai" - authors: "Yiming Yang" - authors: "Jaime Carbonell" - authors: "Ruslan Salakhutdinov" - authors: "Quoc V. Le" - repositories: { - url: "https://github.com/huggingface/transformers" - framework: FRAMEWORK_PYTORCH - number_of_stars: 47573 - description: "🤗Transformers: State-of-the-art Natural Language Processing for Pytorch, TensorFlow, and JAX." - } - repositories: { - url: "https://github.com/PaddlePaddle/PaddleNLP/tree/develop/examples/language_model/xlnet" - framework: FRAMEWORK_OTHERS - number_of_stars: 1363 - description: "An NLP library with Awesome pre-trained Transformer models and easy-to-use interface, supporting wide-range of NLP tasks from research to industrial applications." - } - repositories: { - url: "https://github.com/https-seyhan/BugAI" - framework: FRAMEWORK_OTHERS - number_of_stars: 4 - description: "Deep Learning Models (Long Short Term Memory (LSTM), Recurrent Neural Networks (RNN), Convolutional Neural Networks (CNN) for AI based Bug prediction" - } - repositories: { - url: "https://github.com/utterworks/fast-bert" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1593 - description: "Super easy library for BERT based NLP models" - } - repositories: { - url: "https://github.com/zaradana/Fast_BERT" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/studio-ousia/luke" - framework: FRAMEWORK_PYTORCH - number_of_stars: 312 - description: "LUKE -- Language Understanding with Knowledge-based Embeddings" - } - repositories: { - url: "https://github.com/huggingface/xlnet" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 25 - description: "XLNet: Generalized Autoregressive Pretraining for Language Understanding" - } - repositories: { - url: "https://github.com/cuhksz-nlp/SAPar" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - } - repositories: { - url: "https://github.com/graykode/xlnet-Pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 512 - description: "Simple XLNet implementation with Pytorch Wrapper" - } - repositories: { - url: "https://github.com/tomgoter/nlp_finalproject" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Repository for Final Project for W266: Natural Language Processing with Deep Learning" - } - methods: { - name: "Variational Dropout" - full_name: "Variational Dropout" - description: "**Variational Dropout** is a regularization technique based on [dropout](https://paperswithcode.com/method/dropout), but uses a variational inference grounded approach. In Variational Dropout, we repeat the same dropout mask at each time step for both inputs, outputs, and recurrent layers (drop the same network units at each time step). This is in contrast to ordinary Dropout where different dropout masks are sampled at each time step for the inputs and outputs alone." - } - methods: { - name: "GELU" - full_name: "Gaussian Error Linear Units" - description: "The **Gaussian Error Linear Unit**, or **GELU**, is an activation function. The GELU activation function is $x\\Phi(x)$, where $\\Phi(x)$ the standard Gaussian cumulative distribution function. The GELU nonlinearity weights inputs by their percentile, rather than gates inputs by their sign as in [ReLUs](https://paperswithcode.com/method/relu) ($x\\mathbf{1}_{x>0}$). Consequently the GELU can be thought of as a smoother ReLU.\r\n\r\n$$\\text{GELU}\\left(x\\right) = x{P}\\left(X\\leq{x}\\right) = x\\Phi\\left(x\\right) = x \\cdot \\frac{1}{2}\\left[1 + \\text{erf}(x/\\sqrt{2})\\right],$$\r\nif $X\\sim \\mathcal{N}(0,1)$.\r\n\r\nOne can approximate the GELU with\r\n$0.5x\\left(1+\\tanh\\left[\\sqrt{2/\\pi}\\left(x + 0.044715x^{3}\\right)\\right]\\right)$ or $x\\sigma\\left(1.702x\\right),$\r\nbut PyTorch's exact implementation is sufficiently fast such that these approximations may be unnecessary. (See also the [SiLU](https://paperswithcode.com/method/silu) $x\\sigma(x)$ which was also coined in the paper that introduced the GELU.)\r\n\r\nGELUs are used in GPT-3, BERT, and most other Transformers." - } - methods: { - name: "Adaptive Input Representations" - full_name: "Adaptive Input Representations" - description: "**Adaptive Input Embeddings** extend the adaptive softmax to input word representations. The factorization assigns more capacity to frequent words and reduces the capacity for less frequent words with the benefit of reducing overfitting to rare words." - } - methods: { - name: "XLNet" - full_name: "XLNet" - description: "**XLNet** is an autoregressive Transformer that leverages the best of both autoregressive language modeling and autoencoding while attempting to avoid their limitations. Instead of using a fixed forward or backward factorization order as in conventional autoregressive models, XLNet maximizes the expected log likelihood of a sequence w.r.t. all possible permutations of the factorization order. Thanks to the permutation operation, the context for each position can consist of tokens from both left and right. In expectation, each position learns to utilize contextual information from all positions, i.e., capturing bidirectional context.\r\n\r\nAdditionally, inspired by the latest advancements in autogressive language modeling, XLNet integrates the segment recurrence mechanism and relative encoding scheme of [Transformer-XL](https://paperswithcode.com/method/transformer-xl) into pretraining, which empirically improves the performance especially for tasks involving a longer text sequence." - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "SentencePiece" - full_name: "SentencePiece" - description: "**SentencePiece** is a subword tokenizer and detokenizer for natural language processing. It performs subword segmentation, supporting the byte-pair-encoding (BPE) algorithm and unigram language model, and then converts this text into an id sequence guarantee perfect reproducibility of the normalization and subword segmentation." - } - methods: { - name: "Cosine Annealing" - full_name: "Cosine Annealing" - description: "**Cosine Annealing** is a type of learning rate schedule that has the effect of starting with a large learning rate that is relatively rapidly decreased to a minimum value before being increased rapidly again. The resetting of the learning rate acts like a simulated restart of the learning process and the re-use of good weights as the starting point of the restart is referred to as a \"warm restart\" in contrast to a \"cold restart\" where a new set of small random numbers may be used as a starting point.\r\n\r\n$$\\eta\\_{t} = \\eta\\_{min}^{i} + \\frac{1}{2}\\left(\\eta\\_{max}^{i}-\\eta\\_{min}^{i}\\right)\\left(1+\\cos\\left(\\frac{T\\_{cur}}{T\\_{i}}\\pi\\right)\\right)\r\n$$\r\n\r\nWhere where $\\eta\\_{min}^{i}$ and $ \\eta\\_{max}^{i}$ are ranges for the learning rate, and $T\\_{cur}$ account for how many epochs have been performed since the last restart.\r\n\r\nText Source: [Jason Brownlee](https://machinelearningmastery.com/snapshot-ensemble-deep-learning-neural-network/)\r\n\r\nImage Source: [Gao Huang](https://www.researchgate.net/figure/Training-loss-of-100-layer-DenseNet-on-CIFAR10-using-standard-learning-rate-blue-and-M_fig2_315765130)" - } - methods: { - name: "Multi-Head Attention" - full_name: "Multi-Head Attention" - description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" - } - methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." - } - methods: { - name: "Adaptive Softmax" - full_name: "Adaptive Softmax" - description: "**Adaptive Softmax** is a speedup technique for the computation of probability distributions over words. The adaptive softmax is inspired by the class-based hierarchical softmax, where the word classes are built to minimize the computation time. Adaptive softmax achieves efficiency by explicitly taking into account the computation time of matrix-multiplication on parallel systems and combining it with a few important observations, namely keeping a shortlist of frequent words in the root node\r\nand reducing the capacity of rare words." - } - } - video: { - video_id: "koj9BKiu1rU" - video_title: "PR-175: XLNet: Generalized Autoregressive Pretraining for Language Understanding" - number_of_likes: 55 - number_of_views: 2413 - published_date: { - seconds: 1561964703 - } - uploader: "박성남" - } - } -} -pr_id_to_video: { - key: 176 - value: { - papers: { - paper_id: "combating-label-noise-in-deep-learning-using" - title: "Combating Label Noise in Deep Learning Using Abstention" - arxiv_id: "1905.10964" - abstract: "We introduce a novel method to combat label noise when training deep neural networks for classification. We propose a loss function that permits abstention during training thereby allowing the DNN to abstain on confusing samples while continuing to learn and improve classification performance on the non-abstained samples. We show how such a deep abstaining classifier (DAC) can be used for robust learning in the presence of different types of label noise. In the case of structured or systematic label noise -- where noisy training labels or confusing examples are correlated with underlying features of the data-- training with abstention enables representation learning for features that are associated with unreliable labels. In the case of unstructured (arbitrary) label noise, abstention during training enables the DAC to be used as an effective data cleaner by identifying samples that are likely to have label noise. We provide analytical results on the loss function behavior that enable dynamic adaption of abstention rates based on learning progress during training. We demonstrate the utility of the deep abstaining classifier for various image classification tasks under different types of label noise; in the case of arbitrary label noise, we show significant improvements over previously published results on multiple image benchmarks. Source code is available at https://github.com/thulas/dac-label-noise" - pub_date: { - seconds: 1558915200 - } - authors: "Sunil Thulasidasan" - authors: "Tanmoy Bhattacharya" - authors: "Jeff Bilmes" - authors: "Gopinath Chennupati" - authors: "Jamal Mohd-Yusof" - repositories: { - url: "https://github.com/eabarnes1010/controlled_abstention_networks" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Neural network loss functions for regression and classification tasks that can say \"I don't know\"." - } - repositories: { - is_official: true - url: "https://github.com/thulas/dac-label-noise" - framework: FRAMEWORK_PYTORCH - number_of_stars: 44 - description: "Label de-noising for deep learning" - } - } - video: { - video_id: "ihbEF6WGlrA" - video_title: "PR-176: Combating Label Noise in Deep Learning using Abstention" - number_of_likes: 18 - number_of_views: 897 - published_date: { - seconds: 1562509657 - } - uploader: "Doyup Lee" - } - } -} -pr_id_to_video: { - key: 177 - value: { - papers: { - paper_id: "framing-u-net-via-deep-convolutional" - title: "Framing U-Net via Deep Convolutional Framelets: Application to Sparse-view CT" - arxiv_id: "1708.08333" - abstract: "X-ray computed tomography (CT) using sparse projection views is a recent\napproach to reduce the radiation dose. However, due to the insufficient\nprojection views, an analytic reconstruction approach using the filtered back\nprojection (FBP) produces severe streaking artifacts. Recently, deep learning\napproaches using large receptive field neural networks such as U-Net have\ndemonstrated impressive performance for sparse- view CT reconstruction.\nHowever, theoretical justification is still lacking. Inspired by the recent\ntheory of deep convolutional framelets, the main goal of this paper is,\ntherefore, to reveal the limitation of U-Net and propose new multi-resolution\ndeep learning schemes. In particular, we show that the alternative U- Net\nvariants such as dual frame and the tight frame U-Nets satisfy the so-called\nframe condition which make them better for effective recovery of high frequency\nedges in sparse view- CT. Using extensive experiments with real patient data\nset, we demonstrate that the new network architectures provide better\nreconstruction performance." - pub_date: { - seconds: 1503878400 - } - authors: "Yoseob Han" - authors: "Jong Chul Ye" - repositories: { - url: "https://github.com/hjahan58/framing-u-net" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/jongcye/FramingUNet" - framework: FRAMEWORK_OTHERS - description: "improving U-Net using Frame Theory: Dual-Frame and Tight-Frame U-Nets" - } - repositories: { - is_official: true - url: "https://github.com/hanyoseob/framing-u-net" - framework: FRAMEWORK_OTHERS - number_of_stars: 15 - description: "Deep Convolutional Framelets: A General Deep Learning Framework for Inverse Problems" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Max Pooling" - full_name: "Max Pooling" - description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - methods: { - name: "U-Net" - full_name: "U-Net" - description: "**U-Net** is an architecture for semantic segmentation. It consists of a contracting path and an expansive path. The contracting path follows the typical architecture of a convolutional network. It consists of the repeated application of two 3x3 convolutions (unpadded convolutions), each followed by a rectified linear unit (ReLU) and a 2x2 max pooling operation with stride 2 for downsampling. At each downsampling step we double the number of feature channels. Every step in the expansive path consists of an upsampling of the feature map followed by a 2x2 convolution (“up-convolution”) that halves the number of feature channels, a concatenation with the correspondingly cropped feature map from the contracting path, and two 3x3 convolutions, each followed by a ReLU. The cropping is necessary due to the loss of border pixels in every convolution. At the final layer a 1x1 convolution is used to map each 64-component feature vector to the desired number of classes. In total the network has 23 convolutional layers." - } - methods: { - name: "Concatenated Skip Connection" - full_name: "Concatenated Skip Connection" - description: "A **Concatenated Skip Connection** is a type of skip connection that seeks to reuse features by concatenating them to new layers, allowing more information to be retained from previous layers of the network. This contrasts with say, residual connections, where element-wise summation is used instead to incorporate information from previous layers. This type of skip connection is prominently used in DenseNets (and also Inception networks), which the Figure to the right illustrates." - } - } - video: { - video_id: "KSJcQlEKI0Q" - video_title: "PR-177: Framing U-Net via Deep Convolutional Framelets" - number_of_views: 584 - published_date: { - seconds: 1562511247 - } - uploader: "Byung-Hak Kim" - } - } -} -pr_id_to_video: { - key: 178 - value: { - papers: { - paper_id: "semi-supervised-classification-with-graph" - title: "Semi-Supervised Classification with Graph Convolutional Networks" - arxiv_id: "1609.02907" - abstract: "We present a scalable approach for semi-supervised learning on\ngraph-structured data that is based on an efficient variant of convolutional\nneural networks which operate directly on graphs. We motivate the choice of our\nconvolutional architecture via a localized first-order approximation of\nspectral graph convolutions. Our model scales linearly in the number of graph\nedges and learns hidden layer representations that encode both local graph\nstructure and features of nodes. In a number of experiments on citation\nnetworks and on a knowledge graph dataset we demonstrate that our approach\noutperforms related methods by a significant margin." - pub_date: { - seconds: 1473379200 - } - authors: "Thomas N. Kipf" - authors: "Max Welling" - repositories: { - url: "https://github.com/andrejmiscic/gcn-pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Implementation of the Graph Convolutional Networks in Pytorch" - } - repositories: { - url: "https://github.com/switiz/gnn-gcn-gat" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "example of gnns" - } - repositories: { - url: "https://github.com/hazdzz/GCN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "The PyTorch version of GCN implemented by the paper." - } - repositories: { - url: "https://github.com/LouisDumont/GCN---re-implementation" - framework: FRAMEWORK_PYTORCH - description: "A re-implementation of the Graph Neural Networks described in https://arxiv.org/abs/1609.02907" - } - repositories: { - url: "https://github.com/thanhtrunghuynh93/pygcn" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/lipingcoding/pygcn" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/ChengSashankh/gcn-graph-classification" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/LeeWooJung/GCN_reproduce" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "Reproduce GCN in pytorch" - } - repositories: { - url: "https://github.com/dtriepke/Graph_Convolutional_Network" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - } - repositories: { - url: "https://github.com/Anieca/GCN" - framework: FRAMEWORK_PYTORCH - } - methods: { - name: "Graph Convolutional Networks" - full_name: "Graph Convolutional Networks" - description: "A Graph Convolutional Network, or GCN, is an approach for semi-supervised learning on graph-structured data. It is based on an efficient variant of convolutional neural networks which operate directly on graphs.\r\n\r\nImage source: [Semi-Supervised Classification with Graph Convolutional Networks](https://arxiv.org/pdf/1609.02907v4.pdf)" - } - methods: { - name: "GCN" - full_name: "Graph Convolutional Network" - description: "A **Graph Convolutional Network**, or **GCN**, is an approach for semi-supervised learning on graph-structured data. It is based on an efficient variant of [convolutional neural networks](https://paperswithcode.com/methods/category/convolutional-neural-networks) which operate directly on graphs. The choice of convolutional architecture is motivated via a localized first-order approximation of spectral graph convolutions. The model scales linearly in the number of graph edges and learns hidden layer representations that encode both local graph structure and features of nodes." - } - } - video: { - video_id: "uqBsvoOY8jM" - video_title: "PR-178: Graph Convolutional Network" - number_of_likes: 95 - number_of_views: 6062 - published_date: { - seconds: 1563112484 - } - uploader: "Taeoh Kim" - } - } -} -pr_id_to_video: { - key: 179 - value: { - papers: { - paper_id: "m3d-gan-multi-modal-multi-domain-translation" - title: "M3D-GAN: Multi-Modal Multi-Domain Translation with Universal Attention" - arxiv_id: "1907.04378" - abstract: "Generative adversarial networks have led to significant advances in cross-modal/domain translation. However, typically these networks are designed for a specific task (e.g., dialogue generation or image synthesis, but not both). We present a unified model, M3D-GAN, that can translate across a wide range of modalities (e.g., text, image, and speech) and domains (e.g., attributes in images or emotions in speech). Our model consists of modality subnets that convert data from different modalities into unified representations, and a unified computing body where data from different modalities share the same network architecture. We introduce a universal attention module that is jointly trained with the whole network and learns to encode a large range of domain information into a highly structured latent space. We use this to control synthesis in novel ways, such as producing diverse realistic pictures from a sketch or varying the emotion of synthesized speech. We evaluate our approach on extensive benchmark tasks, including image-to-image, text-to-image, image captioning, text-to-speech, speech recognition, and machine translation. Our results show state-of-the-art performance on some of the tasks." - pub_date: { - seconds: 1562630400 - } - authors: "Shuang Ma" - authors: "Daniel McDuff" - authors: "Yale Song" - } - video: { - video_id: "CpRGaFPIZnw" - video_title: "PR-179: M3D-GAN: Multi-Modal Multi-Domain Translation with Universal Attention" - number_of_likes: 8 - number_of_views: 558 - published_date: { - seconds: 1563115146 - } - uploader: "taesu" - } - } -} -pr_id_to_video: { - key: 180 - value: { - papers: { - paper_id: "the-lottery-ticket-hypothesis-finding-sparse" - title: "The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks" - arxiv_id: "1803.03635" - abstract: "Neural network pruning techniques can reduce the parameter counts of trained\nnetworks by over 90%, decreasing storage requirements and improving\ncomputational performance of inference without compromising accuracy. However,\ncontemporary experience is that the sparse architectures produced by pruning\nare difficult to train from the start, which would similarly improve training\nperformance.\n We find that a standard pruning technique naturally uncovers subnetworks\nwhose initializations made them capable of training effectively. Based on these\nresults, we articulate the \"lottery ticket hypothesis:\" dense,\nrandomly-initialized, feed-forward networks contain subnetworks (\"winning\ntickets\") that - when trained in isolation - reach test accuracy comparable to\nthe original network in a similar number of iterations. The winning tickets we\nfind have won the initialization lottery: their connections have initial\nweights that make training particularly effective.\n We present an algorithm to identify winning tickets and a series of\nexperiments that support the lottery ticket hypothesis and the importance of\nthese fortuitous initializations. We consistently find winning tickets that are\nless than 10-20% of the size of several fully-connected and convolutional\nfeed-forward architectures for MNIST and CIFAR10. Above this size, the winning\ntickets that we find learn faster than the original network and reach higher\ntest accuracy." - pub_date: { - seconds: 1520553600 - } - authors: "Jonathan Frankle" - authors: "Michael Carbin" - repositories: { - url: "https://github.com/phiandark/SiftingFeatures" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Code for the paper \"Sifting out the features by pruning: Are convolutional networks the winning lottery ticket of fully connected ones?\"" - } - repositories: { - url: "https://github.com/hdo0947/Lottery-Ticket-Hypothesis" - framework: FRAMEWORK_PYTORCH - description: "Project with Jack Weitze" - } - repositories: { - url: "https://github.com/JingtongSu/sanity-checking-pruning" - framework: FRAMEWORK_PYTORCH - number_of_stars: 21 - description: "Code for Sanity-Checking Pruning Methods: Random Tickets can Win the Jackpot" - } - repositories: { - url: "https://github.com/ARMargolis/melanoma-pytorch" - framework: FRAMEWORK_PYTORCH - description: "Development of a PyTorch model for Kaggle melanoma competition" - } - repositories: { - url: "https://github.com/zhangtj1996/lottery-ticket-hypothesis-Mxnet" - framework: FRAMEWORK_OTHERS - number_of_stars: 3 - description: "A reimplementation of \"The Lottery Ticket Hypothesis\" (Frankle and Carbin) by Mxnet for FC network." - } - repositories: { - url: "https://github.com/Taoudi/LotteryTicketHypothesis" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Kevin Ammouri and Youssef Taoudi" - } - repositories: { - url: "https://github.com/COMP6248-Reproducability-Challenge/REPRODUCIBILITY-REPORT-THE-LOTTERY-TICKET-HYPOTHESIS" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/Theys96/lottery-ticket-hypothesis" - framework: FRAMEWORK_TENSORFLOW - description: "Experimentation setup for the \"Lottery Ticket\" hypothesis for neural networks." - } - repositories: { - url: "https://github.com/Happy-Virus-IkBeom/LTH_Tensorflow" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/kenichdietrich/LotteryTicketHypothesis" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Codes to perform LTH with Keras and Tensorflow" - } - } - video: { - video_id: "dkNmYu610r8" - video_title: "PR-180: The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks" - number_of_likes: 11 - number_of_views: 759 - published_date: { - seconds: 1564043119 - } - uploader: "Taekmin Kim" - } - } -} -pr_id_to_video: { - key: 181 - value: { - papers: { - paper_id: "data-shapley-equitable-valuation-of-data-for" - title: "Data Shapley: Equitable Valuation of Data for Machine Learning" - arxiv_id: "1904.02868" - abstract: "As data becomes the fuel driving technological and economic growth, a fundamental challenge is how to quantify the value of data in algorithmic predictions and decisions. For example, in healthcare and consumer markets, it has been suggested that individuals should be compensated for the data that they generate, but it is not clear what is an equitable valuation for individual data. In this work, we develop a principled framework to address data valuation in the context of supervised machine learning. Given a learning algorithm trained on $n$ data points to produce a predictor, we propose data Shapley as a metric to quantify the value of each training datum to the predictor performance. Data Shapley value uniquely satisfies several natural properties of equitable data valuation. We develop Monte Carlo and gradient-based methods to efficiently estimate data Shapley values in practical settings where complex learning algorithms, including neural networks, are trained on large datasets. In addition to being equitable, extensive experiments across biomedical, image and synthetic data demonstrate that data Shapley has several other benefits: 1) it is more powerful than the popular leave-one-out or leverage score in providing insight on what data is more valuable for a given learning task; 2) low Shapley value data effectively capture outliers and corruptions; 3) high Shapley value data inform what type of new data to acquire to improve the predictor." - pub_date: { - seconds: 1554422400 - } - authors: "Amirata Ghorbani" - authors: "James Zou" - repositories: { - url: "https://github.com/Weixin-Liang/HERALD" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "HERALD: An Annotation Efficient Method to Train User EngagementPredictors in Dialogs (ACL 2021)" - } - repositories: { - url: "https://github.com/Weixin-Liang/dialog_evaluation_CMADE" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "Beyond User Self-Reported Likert Scale Ratings: A Comparison Model for Automatic Dialog Evaluation (ACL 2020)" - } - repositories: { - is_official: true - url: "https://github.com/amiratag/DataShapley" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 132 - description: "Data Shapley: Equitable Valuation of Data for Machine Learning" - } - repositories: { - url: "https://github.com/GISH123/Cathay-Holdings-CIP-Projects-for-Interpretable-Machine-Learning" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "All my two month works for projects toward Interpretable Machine Learning for Cathay(國泰金控 數數發 資料科學研發科 Lab)" - } - } - video: { - video_id: "YdCXbBDuVuE" - video_title: "PR-181: Data Shapley: Equitable Valuation of Data for Machine Learning" - number_of_likes: 7 - number_of_views: 534 - published_date: { - seconds: 1563717023 - } - uploader: "Jinsung Yoon" - } - } -} -pr_id_to_video: { - key: 182 - value: { - papers: { - paper_id: "ensemble-deep-learning-a-review" - title: "Ensemble deep learning: A review" - arxiv_id: "2104.02395" - abstract: "Ensemble learning combines several individual models to obtain better generalization performance. Currently, deep learning models with multilayer processing architecture is showing better performance as compared to the shallow or traditional classification models. Deep ensemble learning models combine the advantages of both the deep learning models as well as the ensemble learning such that the final model has better generalization performance. This paper reviews the state-of-art deep ensemble models and hence serves as an extensive summary for the researchers. The ensemble models are broadly categorised into ensemble models like bagging, boosting and stacking, negative correlation based deep ensemble models, explicit/implicit ensembles, homogeneous /heterogeneous ensemble, decision fusion strategies, unsupervised, semi-supervised, reinforcement learning and online/incremental, multilabel based deep ensemble models. Application of deep ensemble models in different domains is also briefly discussed. Finally, we conclude this paper with some future recommendations and research directions." - pub_date: { - seconds: 1617667200 - } - authors: "M. A. Ganaie" - authors: "Minghui Hu" - authors: "M. Tanveer*" - authors: "P. N. Suganthan*" - } - video: { - video_id: "twhZ3j_VCa0" - video_title: "PR-182: Deep Learning Ensemble Method" - number_of_likes: 15 - number_of_views: 775 - published_date: { - seconds: 1564898851 - } - uploader: "정지원" - } - } -} -pr_id_to_video: { - key: 183 - value: { - papers: { - paper_id: "mixnet-mixed-depthwise-convolutional-kernels" - title: "MixConv: Mixed Depthwise Convolutional Kernels" - arxiv_id: "1907.09595" - abstract: "Depthwise convolution is becoming increasingly popular in modern efficient ConvNets, but its kernel size is often overlooked. In this paper, we systematically study the impact of different kernel sizes, and observe that combining the benefits of multiple kernel sizes can lead to better accuracy and efficiency. Based on this observation, we propose a new mixed depthwise convolution (MixConv), which naturally mixes up multiple kernel sizes in a single convolution. As a simple drop-in replacement of vanilla depthwise convolution, our MixConv improves the accuracy and efficiency for existing MobileNets on both ImageNet classification and COCO object detection. To demonstrate the effectiveness of MixConv, we integrate it into AutoML search space and develop a new family of models, named as MixNets, which outperform previous mobile models including MobileNetV2 [20] (ImageNet top-1 accuracy +4.2%), ShuffleNetV2 [16] (+3.5%), MnasNet [26] (+1.3%), ProxylessNAS [2] (+2.2%), and FBNet [27] (+2.0%). In particular, our MixNet-L achieves a new state-of-the-art 78.9% ImageNet top-1 accuracy under typical mobile settings (<600M FLOPS). Code is at https://github.com/ tensorflow/tpu/tree/master/models/official/mnasnet/mixnet" - pub_date: { - seconds: 1563753600 - } - authors: "Mingxing Tan" - authors: "Quoc V. Le" - repositories: { - url: "https://github.com/rwightman/pytorch-image-models" - framework: FRAMEWORK_PYTORCH - number_of_stars: 11022 - description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" - } - repositories: { - url: "https://github.com/osmr/imgclsmob" - framework: FRAMEWORK_OTHERS - number_of_stars: 2200 - description: "Sandbox for training deep learning networks" - } - repositories: { - url: "https://github.com/rwightman/efficientnet-jax" - framework: FRAMEWORK_OTHERS - number_of_stars: 65 - description: "EfficientNet, MobileNetV3, MobileNetV2, MixNet, etc in JAX w/ Flax Linen and Objax" - } - repositories: { - url: "https://github.com/chrisway613/MixConv" - framework: FRAMEWORK_PYTORCH - description: "Mixed Depth-Wise Convolution" - } - repositories: { - url: "https://github.com/neeraj-j/MixNet" - framework: FRAMEWORK_PYTORCH - description: "Pytorch implementation of MixNet" - } - repositories: { - is_official: true - url: "https://github.com/tensorflow/tpu" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4338 - description: "Reference models and tools for Cloud TPUs." - } - repositories: { - url: "https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet/mixnet" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4338 - description: "Reference models and tools for Cloud TPUs." - } - repositories: { - url: "https://github.com/zsef123/MixNet-PyTorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 7 - description: "A PyTorch implementation of MixNet: Mixed Depthwise Convolutional Kernels" - } - repositories: { - url: "https://github.com/JinLi711/Convolution_Variants" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 8 - description: "Reimplementing SOTA convolution variants with Tensorflow 2.0." - } - repositories: { - url: "https://github.com/JinLi711/Attention-Augmented-Convolution" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 8 - description: "Reimplementing SOTA convolution variants with Tensorflow 2.0." - } - methods: { - name: "Average Pooling" - full_name: "Average Pooling" - description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." - } - methods: { - name: "MobileNetV1" - full_name: "MobileNetV1" - description: "**MobileNet** is a type of convolutional neural network designed for mobile and embedded vision applications. They are based on a streamlined architecture that uses depthwise separable convolutions to build lightweight deep neural networks that can have low latency for mobile and embedded devices." - } - methods: { - name: "Depthwise Separable Convolution" - full_name: "Depthwise Separable Convolution" - description: "While [standard convolution](https://paperswithcode.com/method/convolution) performs the channelwise and spatial-wise computation in one step, **Depthwise Separable Convolution** splits the computation into two steps: depthwise convolution applies a single convolutional filter per each input channel and pointwise convolution is used to create a linear combination of the output of the depthwise convolution. The comparison of standard convolution and depthwise separable convolution is shown to the right.\r\n\r\nCredit: [Depthwise Convolution Is All You Need for Learning Multiple Visual Domains](https://paperswithcode.com/paper/depthwise-convolution-is-all-you-need-for)" - } - methods: { - name: "MixConv" - full_name: "Mixed Depthwise Convolution" - description: "**MixConv**, or **Mixed Depthwise Convolution**, is a type of depthwise convolution that naturally mixes up multiple kernel sizes in a single convolution. It is based on the insight that depthwise convolution applies a single kernel size to all channels, which MixConv overcomes by combining the benefits of multiple kernel sizes. It does this by partitioning channels into groups and applying a different kernel size to each group." - } - methods: { - name: "Grouped Convolution" - full_name: "Grouped Convolution" - description: "A **Grouped Convolution** uses a group of convolutions - multiple kernels per layer - resulting in multiple channel outputs per layer. This leads to wider networks helping a network learn a varied set of low level and high level features. The original motivation of using Grouped Convolutions in [AlexNet](https://paperswithcode.com/method/alexnet) was to distribute the model over multiple GPUs as an engineering compromise. But later, with models such as [ResNeXt](https://paperswithcode.com/method/alexnet), it was shown this module could be used to improve classification accuracy. Specifically by exposing a new dimension through grouped convolutions, *cardinality* (the size of set of transformations), we can increase accuracy by increasing it." - } - methods: { - name: "MixNet" - full_name: "MixNet" - description: "**MixNet** is a type of convolutional neural network discovered via AutoML that utilises MixConvs instead of regular depthwise convolutions." - } - methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." - } - methods: { - name: "Sigmoid Activation" - full_name: "Sigmoid Activation" - description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." - } - } - video: { - video_id: "252YxqpHzsg" - video_title: "PR-183: MixNet: Mixed Depthwise Convolutional Kernels" - number_of_likes: 28 - number_of_views: 1680 - published_date: { - seconds: 1564326548 - } - uploader: "JinWon Lee" - } - } -} -pr_id_to_video: { - key: 184 - value: { - papers: { - paper_id: "and-the-bit-goes-down-revisiting-the" - title: "And the Bit Goes Down: Revisiting the Quantization of Neural Networks" - arxiv_id: "1907.05686" - abstract: "In this paper, we address the problem of reducing the memory footprint of convolutional network architectures. We introduce a vector quantization method that aims at preserving the quality of the reconstruction of the network outputs rather than its weights. The principle of our approach is that it minimizes the loss reconstruction error for in-domain inputs. Our method only requires a set of unlabelled data at quantization time and allows for efficient inference on CPU by using byte-aligned codebooks to store the compressed weights. We validate our approach by quantizing a high performing ResNet-50 model to a memory size of 5MB (20x compression factor) while preserving a top-1 accuracy of 76.1% on ImageNet object classification and by compressing a Mask R-CNN with a 26x factor." - pub_date: { - seconds: 1562889600 - } - authors: "Pierre Stock" - authors: "Armand Joulin" - authors: "Rémi Gribonval" - authors: "Benjamin Graham" - authors: "Hervé Jégou" - repositories: { - url: "https://github.com/huggingface/block_movement_pruning" - framework: FRAMEWORK_PYTORCH - number_of_stars: 23 - description: "Block Sparse movement pruning" - } - repositories: { - url: "https://github.com/uber-research/permute-quantize-finetune" - framework: FRAMEWORK_PYTORCH - number_of_stars: 89 - description: "Using ideas from product quantization for state-of-the-art neural network compression." - } - repositories: { - is_official: true - url: "https://github.com/facebookresearch/kill-the-bits" - framework: FRAMEWORK_PYTORCH - number_of_stars: 614 - description: "Code for: \"And the bit goes down: Revisiting the quantization of neural networks\"" - } - methods: { - name: "Mask R-CNN" - full_name: "Mask R-CNN" - description: "**Mask R-CNN** extends [Faster R-CNN](http://paperswithcode.com/method/faster-r-cnn) to solve instance segmentation tasks. It achieves this by adding a branch for predicting an object mask in parallel with the existing branch for bounding box recognition. In principle, Mask R-CNN is an intuitive extension of Faster R-CNN, but constructing the mask branch properly is critical for good results. \r\n\r\nMost importantly, Faster R-CNN was not designed for pixel-to-pixel alignment between network inputs and outputs. This is evident in how [RoIPool](http://paperswithcode.com/method/roi-pooling), the *de facto* core operation for attending to instances, performs coarse spatial quantization for feature extraction. To fix the misalignment, Mask R-CNN utilises a simple, quantization-free layer, called [RoIAlign](http://paperswithcode.com/method/roi-align), that faithfully preserves exact spatial locations. \r\n\r\nSecondly, Mask R-CNN *decouples* mask and class prediction: it predicts a binary mask for each class independently, without competition among classes, and relies on the network's RoI classification branch to predict the category. In contrast, an [FCN](http://paperswithcode.com/method/fcn) usually perform per-pixel multi-class categorization, which couples segmentation and classification." - } - methods: { - name: "RoIAlign" - full_name: "RoIAlign" - description: "**Region of Interest Align**, or **RoIAlign**, is an operation for extracting a small feature map from each RoI in detection and segmentation based tasks. It removes the harsh quantization of [RoI Pool](https://paperswithcode.com/method/roi-pooling), properly *aligning* the extracted features with the input. To avoid any quantization of the RoI boundaries or bins (using $x/16$ instead of $[x/16]$), RoIAlign uses bilinear interpolation to compute the exact values of the input features at four regularly sampled locations in each RoI bin, and the result is then aggregated (using max or average)." - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - } - video: { - video_id: "FPLvzxH8geY" - video_title: "PR-184: And the Bit Goes Down: Revisiting the Quantization of Neural Networks" - number_of_likes: 7 - number_of_views: 530 - published_date: { - seconds: 1564926928 - } - uploader: "Young Seok Kim" - } - } -} -pr_id_to_video: { - key: 185 - value: { - papers: { - paper_id: "190500641" - title: "RetinaFace: Single-stage Dense Face Localisation in the Wild" - arxiv_id: "1905.00641" - abstract: "Face Analysis Project on MXNet" - pub_date: { - seconds: 1556755200 - } - authors: "Jiankang Deng" - authors: "Jia Guo" - authors: "Yuxiang Zhou" - authors: "Jinke Yu" - authors: "Irene Kotsia" - authors: "Stefanos Zafeiriou" - repositories: { - url: "https://github.com/vladimirwest/insightface_cinematic" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - } - repositories: { - url: "https://github.com/iChenning/facedetection" - framework: FRAMEWORK_PYTORCH - description: "face detection,based on retinaface" - } - repositories: { - url: "https://github.com/serengil/retinaface" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 64 - description: "RetinaFace: Deep Face Detection Library in TensorFlow for Python" - } - repositories: { - url: "https://github.com/serengil/deepface" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1690 - description: "A Lightweight Deep Face Recognition and Facial Attribute Analysis (Age, Gender, Emotion and Race) Framework for Python" - } - repositories: { - url: "https://github.com/nickuntitled/censorface-js" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Face Detection in Javascript by ONNX.js" - } - repositories: { - url: "https://github.com/prajinkhadka/face_det_check" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/Johnny952/retinaface_mod" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/SohamSarfare/ADS" - framework: FRAMEWORK_OTHERS - description: "Re-evaluating the results of the paper RetinaFace algorithm using original data along with the original WIDERFACE dataset. " - } - repositories: { - url: "https://github.com/bubbliiiing/retinaface-keras" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 40 - description: "这是一个retinaface-keras的源码,可以用于训练自己的模型。" - } - repositories: { - url: "https://github.com/MatGod7/Retinaface" - framework: FRAMEWORK_PYTORCH - } - } - video: { - video_id: "DkcHEnxkXpM" - video_title: "PR-185: RetinaFace: Single-stage Dense Face Localisation in the Wild" - number_of_likes: 25 - number_of_views: 2395 - published_date: { - seconds: 1570081394 - } - uploader: "visionNoob" - } - } -} -pr_id_to_video: { - key: 186 - value: { - papers: { - paper_id: "arbitrary-style-transfer-in-real-time-with" - title: "Arbitrary Style Transfer in Real-time with Adaptive Instance Normalization" - arxiv_id: "1703.06868" - abstract: "Gatys et al. recently introduced a neural algorithm that renders a content\nimage in the style of another image, achieving so-called style transfer.\nHowever, their framework requires a slow iterative optimization process, which\nlimits its practical application. Fast approximations with feed-forward neural\nnetworks have been proposed to speed up neural style transfer. Unfortunately,\nthe speed improvement comes at a cost: the network is usually tied to a fixed\nset of styles and cannot adapt to arbitrary new styles. In this paper, we\npresent a simple yet effective approach that for the first time enables\narbitrary style transfer in real-time. At the heart of our method is a novel\nadaptive instance normalization (AdaIN) layer that aligns the mean and variance\nof the content features with those of the style features. Our method achieves\nspeed comparable to the fastest existing approach, without the restriction to a\npre-defined set of styles. In addition, our approach allows flexible user\ncontrols such as content-style trade-off, style interpolation, color & spatial\ncontrols, all using a single feed-forward neural network." - pub_date: { - seconds: 1489968000 - } - authors: "Xun Huang" - authors: "Serge Belongie" - repositories: { - url: "https://github.com/KaiyangZhou/ssdg-benchmark" - framework: FRAMEWORK_PYTORCH - number_of_stars: 23 - description: "Benchmarks for semi-supervised domain generalization." - } - repositories: { - url: "https://github.com/KaiyangZhou/mixstyle-release" - framework: FRAMEWORK_PYTORCH - number_of_stars: 87 - description: "Domain Generalization with MixStyle. ICLR'21." - } - repositories: { - url: "https://github.com/PacktPublishing/Hands-On-Image-Generation-with-TensorFlow-2.0/tree/master/Chapter05" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 62 - description: "Hands-On Image Generation with TensorFlow 2.0, published by Packt" - } - repositories: { - url: "https://github.com/srihari-humbarwadi/adain-tensorflow2.x" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5 - description: "TensorFlow2.x implementation of Arbitrary Style Transfer in Real-time with Adaptive Instance Normalization" - } - repositories: { - url: "https://github.com/krisrjohnson/Realistic-Neural-Talking-Head-Models" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/JeongsolKim/BiS400_term_project" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/Jwrede/neural_style_transfer" - framework: FRAMEWORK_PYTORCH - description: "Pytorch implementation of the paper Arbitrary Style Transfer in Real-time with Adaptive Instance Normalization" - } - repositories: { - url: "https://github.com/times2049/talkinghead" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/ptran1203/style_transfer" - framework: FRAMEWORK_TENSORFLOW - description: "Arbitrary Style Transfer With Adaptive Instance Normalization" - } - repositories: { - url: "https://github.com/Yijunmaverick/UniversalStyleTransfer" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 521 - description: "The source code of NIPS17 'Universal Style Transfer via Feature Transforms'." - } - methods: { - name: "Style Transfer Module" - full_name: "Style Transfer Module" - description: "Modules used in GAN's style transfer." - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "Adaptive Instance Normalization" - full_name: "Adaptive Instance Normalization" - description: "**Adaptive Instance Normalization** is a normalization method that aligns the mean and variance of the content features with those of the style features. \r\n\r\n[Instance Normalization](https://paperswithcode.com/method/instance-normalization) normalizes the input to a single style specified by the affine parameters. Adaptive Instance Normaliation is an extension. In AdaIN, we receive a content input $x$ and a style input $y$, and we simply align the channel-wise mean and variance of $x$ to match those of $y$. Unlike [Batch Normalization](https://paperswithcode.com/method/batch-normalization), Instance Normalization or Conditional Instance Normalization, AdaIN has no learnable affine parameters. Instead, it adaptively computes the affine parameters from the style input:\r\n\r\n$$\r\n\\textrm{AdaIN}(x, y)= \\sigma(y)\\left(\\frac{x-\\mu(x)}{\\sigma(x)}\\right)+\\mu(y)\r\n$$" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "VGG" - full_name: "VGG" - description: "**VGG** is a classical convolutional neural network architecture. It was based on an analysis of how to increase the depth of such networks. The network utilises small 3 x 3 filters. Otherwise the network is characterized by its simplicity: the only other components being pooling layers and a fully connected layer.\r\n\r\nImage: [Davi Frossard](https://www.cs.toronto.edu/frossard/post/vgg16/)" - } - methods: { - name: "Max Pooling" - full_name: "Max Pooling" - description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." - } - methods: { - name: "Dropout" - full_name: "Dropout" - description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." - } - } - video: { - video_id: "16BGnsIyh6M" - video_title: "PR-186: Arbitrary Style Transfer in Real-time with Adaptive Instance Normalization" - number_of_likes: 23 - number_of_views: 821 - published_date: { - seconds: 1565608448 - } - uploader: "Soyeon Kim" - } - } -} -pr_id_to_video: { - key: 187 - value: { - papers: { - paper_id: "morphnet-fast-simple-resource-constrained" - title: "MorphNet: Fast & Simple Resource-Constrained Structure Learning of Deep Networks" - arxiv_id: "1711.06798" - abstract: "We present MorphNet, an approach to automate the design of neural network\nstructures. MorphNet iteratively shrinks and expands a network, shrinking via a\nresource-weighted sparsifying regularizer on activations and expanding via a\nuniform multiplicative factor on all layers. In contrast to previous\napproaches, our method is scalable to large networks, adaptable to specific\nresource constraints (e.g. the number of floating-point operations per\ninference), and capable of increasing the network's performance. When applied\nto standard network architectures on a wide variety of datasets, our approach\ndiscovers novel structures in each domain, obtaining higher performance while\nrespecting the resource constraint." - pub_date: { - seconds: 1510963200 - } - authors: "Ariel Gordon" - authors: "Elad Eban" - authors: "Ofir Nachum" - authors: "Bo Chen" - authors: "Hao Wu" - authors: "Tien-Ju Yang" - authors: "Edward Choi" - repositories: { - url: "https://github.com/google-research/morph-net" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 961 - description: "Fast & Simple Resource-Constrained Learning of Deep Network Structure" - } - repositories: { - url: "https://github.com/tensorflow/models" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70333 - description: "Models and examples built with TensorFlow" - } - repositories: { - url: "https://github.com/NatGr/Master_Thesis" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 10 - description: "Repo for my Master Thesis at ULiège in 2019 (Machine learning under resource constraints)" - } - } - video: { - video_id: "vUNAJsO2G98" - video_title: "PR-187 : MorphNet: Fast & Simple Resource-Constrained Structure Learning of Deep Networks" - number_of_likes: 4 - number_of_views: 506 - published_date: { - seconds: 1565712056 - } - uploader: "Sunghoon Joo" - } - } -} -pr_id_to_video: { - key: 188 - value: { - papers: { - paper_id: "online-meta-learning" - title: "Online Meta-Learning" - arxiv_id: "1902.08438" - abstract: "A central capability of intelligent systems is the ability to continuously build upon previous experiences to speed up and enhance learning of new tasks. Two distinct research paradigms have studied this question. Meta-learning views this problem as learning a prior over model parameters that is amenable for fast adaptation on a new task, but typically assumes the set of tasks are available together as a batch. In contrast, online (regret based) learning considers a sequential setting in which problems are revealed one after the other, but conventionally train only a single model without any task-specific adaptation. This work introduces an online meta-learning setting, which merges ideas from both the aforementioned paradigms to better capture the spirit and practice of continual lifelong learning. We propose the follow the meta leader algorithm which extends the MAML algorithm to this setting. Theoretically, this work provides an $\\mathcal{O}(\\log T)$ regret guarantee with only one additional higher order smoothness assumption in comparison to the standard online setting. Our experimental evaluation on three different large-scale tasks suggest that the proposed algorithm significantly outperforms alternatives based on traditional online learning approaches." - pub_date: { - seconds: 1550793600 - } - authors: "Chelsea Finn" - authors: "Aravind Rajeswaran" - authors: "Sham Kakade" - authors: "Sergey Levine" - methods: { - name: "MAML" - full_name: "Model-Agnostic Meta-Learning" - description: "**MAML**, or **Model-Agnostic Meta-Learning**, is a model and task-agnostic algorithm for meta-learning that trains a model’s parameters such that a small number of gradient updates will lead to fast learning on a new task.\r\n\r\nConsider a model represented by a parametrized function $f\\_{\\theta}$ with parameters $\\theta$. When adapting to a new task $\\mathcal{T}\\_{i}$, the model’s parameters $\\theta$ become $\\theta'\\_{i}$. With MAML, the updated parameter vector $\\theta'\\_{i}$ is computed using one or more gradient descent updates on task $\\mathcal{T}\\_{i}$. For example, when using one gradient update,\r\n\r\n$$ \\theta'\\_{i} = \\theta - \\alpha\\nabla\\_{\\theta}\\mathcal{L}\\_{\\mathcal{T}\\_{i}}\\left(f\\_{\\theta}\\right) $$\r\n\r\nThe step size $\\alpha$ may be fixed as a hyperparameter or metalearned. The model parameters are trained by optimizing for the performance of $f\\_{\\theta'\\_{i}}$ with respect to $\\theta$ across tasks sampled from $p\\left(\\mathcal{T}\\_{i}\\right)$. More concretely the meta-objective is as follows:\r\n\r\n$$ \\min\\_{\\theta} \\sum\\_{\\mathcal{T}\\_{i} \\sim p\\left(\\mathcal{T}\\right)} \\mathcal{L}\\_{\\mathcal{T\\_{i}}}\\left(f\\_{\\theta'\\_{i}}\\right) = \\sum\\_{\\mathcal{T}\\_{i} \\sim p\\left(\\mathcal{T}\\right)} \\mathcal{L}\\_{\\mathcal{T\\_{i}}}\\left(f\\_{\\theta - \\alpha\\nabla\\_{\\theta}\\mathcal{L}\\_{\\mathcal{T}\\_{i}}\\left(f\\_{\\theta}\\right)}\\right) $$\r\n\r\nNote that the meta-optimization is performed over the model parameters $\\theta$, whereas the objective is computed using the updated model parameters $\\theta'$. In effect MAML aims to optimize the model parameters such that one or a small number of gradient steps on a new task will produce maximally effective behavior on that task. The meta-optimization across tasks is performed via stochastic gradient descent (SGD), such that the model parameters $\\theta$ are updated as follows:\r\n\r\n$$ \\theta \\leftarrow \\theta - \\beta\\nabla\\_{\\theta} \\sum\\_{\\mathcal{T}\\_{i} \\sim p\\left(\\mathcal{T}\\right)} \\mathcal{L}\\_{\\mathcal{T\\_{i}}}\\left(f\\_{\\theta'\\_{i}}\\right)$$\r\n\r\nwhere $\\beta$ is the meta step size." - } - } - video: { - video_id: "vUNAJsO2G98" - video_title: "PR-187 : MorphNet: Fast & Simple Resource-Constrained Structure Learning of Deep Networks" - number_of_likes: 4 - number_of_views: 506 - published_date: { - seconds: 1565712056 - } - uploader: "Sunghoon Joo" - } - } -} -pr_id_to_video: { - key: 189 - value: { - papers: { - paper_id: "unsupervised-data-augmentation-1" - title: "Unsupervised Data Augmentation for Consistency Training" - arxiv_id: "1904.12848" - abstract: "Semi-supervised learning lately has shown much promise in improving deep learning models when labeled data is scarce. Common among recent approaches is the use of consistency training on a large amount of unlabeled data to constrain model predictions to be invariant to input noise. In this work, we present a new perspective on how to effectively noise unlabeled examples and argue that the quality of noising, specifically those produced by advanced data augmentation methods, plays a crucial role in semi-supervised learning. By substituting simple noising operations with advanced data augmentation methods such as RandAugment and back-translation, our method brings substantial improvements across six language and three vision tasks under the same consistency training framework. On the IMDb text classification dataset, with only 20 labeled examples, our method achieves an error rate of 4.20, outperforming the state-of-the-art model trained on 25,000 labeled examples. On a standard semi-supervised learning benchmark, CIFAR-10, our method outperforms all previous approaches and achieves an error rate of 5.43 with only 250 examples. Our method also combines well with transfer learning, e.g., when finetuning from BERT, and yields improvements in high-data regime, such as ImageNet, whether when there is only 10% labeled data or when a full labeled set with 1.3M extra unlabeled examples is used. Code is available at https://github.com/google-research/uda." - pub_date: { - seconds: 1556496000 - } - authors: "Qizhe Xie" - authors: "Zihang Dai" - authors: "Eduard Hovy" - authors: "Minh-Thang Luong" - authors: "Quoc V. Le" - repositories: { - url: "https://github.com/kekmodel/UDA-pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "An unofficial PyTorch implementation of Unsupervised Data Augmentation" - } - repositories: { - url: "https://github.com/A-Telfer/AugKey" - framework: FRAMEWORK_OTHERS - description: "RandAugment with Keypoints Annotation Support." - } - repositories: { - url: "https://github.com/rwbfd/OpenCompetitionV2" - framework: FRAMEWORK_PYTORCH - number_of_stars: 40 - description: "This is a collection of convenient methods for data science competition." - } - repositories: { - url: "https://github.com/joannayu25/NLP_Project_MIDS-W266" - framework: FRAMEWORK_TENSORFLOW - description: "Final Project for NLP class in UC Berkeley MIDS Program W266" - } - repositories: { - url: "https://github.com/leblancdaniel/paraphraser" - framework: FRAMEWORK_TENSORFLOW - description: "paraphrasing w/ unsupervised data augmentation (source: https://github.com/google-research/uda)" - } - repositories: { - is_official: true - url: "https://github.com/google-research/uda" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1724 - description: "Unsupervised Data Augmentation (UDA)" - } - repositories: { - url: "https://github.com/SanghunYun/UDA_pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 168 - description: "UDA(Unsupervised Data Augmentation) implemented by pytorch" - } - repositories: { - url: "https://github.com/bhacquin/UDA_pytorch" - framework: FRAMEWORK_PYTORCH - description: "Pytorch version of the algorithm described in Unsupervised Data Augmentation. " - } - repositories: { - url: "https://github.com/PhamNguyen97/TSA_pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 8 - description: "Training Signal Annealing" - } - repositories: { - url: "https://github.com/tomgoter/nlp_finalproject" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Repository for Final Project for W266: Natural Language Processing with Deep Learning" - } - methods: { - name: "Layer Normalization" - full_name: "Layer Normalization" - description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." - } - methods: { - name: "Multi-Head Attention" - full_name: "Multi-Head Attention" - description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" - } - methods: { - name: "Residual Block" - full_name: "Residual Block" - description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." - } - methods: { - name: "WordPiece" - full_name: "WordPiece" - description: "**WordPiece** is a subword segmentation algorithm used in natural language processing. The vocabulary is initialized with individual characters in the language, then the most frequent combinations of symbols in the vocabulary are iteratively added to the vocabulary. The process is:\r\n\r\n1. Initialize the word unit inventory with all the characters in the text.\r\n2. Build a language model on the training data using the inventory from 1.\r\n3. Generate a new word unit by combining two units out of the current word inventory to increment the word unit inventory by one. Choose the new word unit out of all the possible ones that increases the likelihood on the training data the most when added to the model.\r\n4. Goto 2 until a predefined limit of word units is reached or the likelihood increase falls below a certain threshold.\r\n\r\nText: [Source](https://stackoverflow.com/questions/55382596/how-is-wordpiece-tokenization-helpful-to-effectively-deal-with-rare-words-proble/55416944#55416944)\r\n\r\nImage: WordPiece as used in BERT" - } - methods: { - name: "Dropout" - full_name: "Dropout" - description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." - } - methods: { - name: "Weight Decay" - full_name: "Weight Decay" - description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" - } - methods: { - name: "Bottleneck Residual Block" - full_name: "Bottleneck Residual Block" - description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." - } - methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." - } - methods: { - name: "Kaiming Initialization" - full_name: "Kaiming Initialization" - description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - } - video: { - video_id: "YiKn93Ud4dA" - video_title: "PR-189: Unsupervised Data Augmentation for Consistency Training" - number_of_likes: 17 - number_of_views: 1174 - published_date: { - seconds: 1566745737 - } - uploader: "박성남" - } - } -} -pr_id_to_video: { - key: 190 - value: { - papers: { - paper_id: "a-baseline-for-detecting-misclassified-and" - title: "A Baseline for Detecting Misclassified and Out-of-Distribution Examples in Neural Networks" - arxiv_id: "1610.02136" - abstract: "We consider the two related problems of detecting if an example is\nmisclassified or out-of-distribution. We present a simple baseline that\nutilizes probabilities from softmax distributions. Correctly classified\nexamples tend to have greater maximum softmax probabilities than erroneously\nclassified and out-of-distribution examples, allowing for their detection. We\nassess performance by defining several tasks in computer vision, natural\nlanguage processing, and automatic speech recognition, showing the\neffectiveness of this baseline across all. We then show the baseline can\nsometimes be surpassed, demonstrating the room for future research on these\nunderexplored detection tasks." - pub_date: { - seconds: 1475798400 - } - authors: "Dan Hendrycks" - authors: "Kevin Gimpel" - repositories: { - url: "https://github.com/sooonwoo/RotNet-OOD" - framework: FRAMEWORK_PYTORCH - description: "Self-Supervised Learning for OOD Detection (NeurIPS 2019)" - } - repositories: { - is_official: true - url: "https://github.com/hendrycks/error-detection" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 132 - description: "A Baseline for Detecting Misclassified and Out-of-Distribution Examples in Neural Networks" - } - repositories: { - url: "https://github.com/dabsdamoon/MNIST-Auxiliary-Decoder" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "Implemented auxiliary decoder mentioned in the paper 'A Baseline for Detecting Misclassified and Out-of-Distribution Examples in Neural Networks' (https://arxiv.org/abs/1610.02136)" - } - repositories: { - url: "https://github.com/2sang/OOD-baseline" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 7 - description: "Reproducing experiment result of 'A Baseline for Detecting Misclassified and Out-of-Distribution Examples in Neural Networks', by Hendrycks et al.(2017)" - } - repositories: { - url: "https://github.com/omallo/kaggle-whale" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/guyAmit/GLOD" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "Github for the conference paper GLOD-Gaussian Likelihood OOD detector" - } - methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Residual Block" - full_name: "Residual Block" - description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." - } - methods: { - name: "Max Pooling" - full_name: "Max Pooling" - description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - methods: { - name: "Kaiming Initialization" - full_name: "Kaiming Initialization" - description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." - } - methods: { - name: "Batch Normalization" - full_name: "Batch Normalization" - description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." - } - methods: { - name: "Bottleneck Residual Block" - full_name: "Bottleneck Residual Block" - description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." - } - } - video: { - video_id: "xaABseUoHAI" - video_title: "PR-190: A Baseline For Detecting Misclassified and Out-of-Distribution Examples In Neural Networks" - number_of_likes: 10 - number_of_views: 1045 - published_date: { - seconds: 1569764236 - } - uploader: "MinGuk Kang" - } - } -} -pr_id_to_video: { - key: 191 - value: { - papers: { - paper_id: "learning-adversarially-fair-and-transferable" - title: "Learning Adversarially Fair and Transferable Representations" - arxiv_id: "1802.06309" - abstract: "In this paper, we advocate for representation learning as the key to\nmitigating unfair prediction outcomes downstream. Motivated by a scenario where\nlearned representations are used by third parties with unknown objectives, we\npropose and explore adversarial representation learning as a natural method of\nensuring those parties act fairly. We connect group fairness (demographic\nparity, equalized odds, and equal opportunity) to different adversarial\nobjectives. Through worst-case theoretical guarantees and experimental\nvalidation, we show that the choice of this objective is crucial to fair\nprediction. Furthermore, we present the first in-depth experimental\ndemonstration of fair transfer learning and demonstrate empirically that our\nlearned representations admit fair predictions on new tasks while maintaining\nutility, an essential goal of fair representation learning." - pub_date: { - seconds: 1518825600 - } - authors: "David Madras" - authors: "Elliot Creager" - authors: "Toniann Pitassi" - authors: "Richard Zemel" - repositories: { - url: "https://github.com/rvr-account/rvr" - framework: FRAMEWORK_OTHERS - description: "Representation via Representations is a project aimed at improving transfer learning to out-of-distribution examples. Motivated by the challenge of finding robust biomedical predictors of disease, the model leverages data from heterogenous sources to discover feature representations that allow for accurate prediction outside of the training data." - } - repositories: { - is_official: true - url: "https://github.com/VectorInstitute/laftr" - framework: FRAMEWORK_OTHERS - number_of_stars: 33 - description: "Learning Adversarially Fair and Transferable Representations" - } - repositories: { - url: "https://github.com/ecreager/laftr" - framework: FRAMEWORK_OTHERS - number_of_stars: 33 - description: "Learning Adversarially Fair and Transferable Representations" - } - } - video: { - video_id: "cgolskL-_WM" - video_title: "PR-191: Learning Adversarially Fair and Transferable Representations" - number_of_views: 379 - published_date: { - seconds: 1567348971 - } - uploader: "Byung-Hak Kim" - } - } -} -pr_id_to_video: { - key: 192 - value: { - papers: { - paper_id: "mocogan-decomposing-motion-and-content-for" - title: "MoCoGAN: Decomposing Motion and Content for Video Generation" - arxiv_id: "1707.04993" - abstract: "Visual signals in a video can be divided into content and motion. While\ncontent specifies which objects are in the video, motion describes their\ndynamics. Based on this prior, we propose the Motion and Content decomposed\nGenerative Adversarial Network (MoCoGAN) framework for video generation. The\nproposed framework generates a video by mapping a sequence of random vectors to\na sequence of video frames. Each random vector consists of a content part and a\nmotion part. While the content part is kept fixed, the motion part is realized\nas a stochastic process. To learn motion and content decomposition in an\nunsupervised manner, we introduce a novel adversarial learning scheme utilizing\nboth image and video discriminators. Extensive experimental results on several\nchallenging datasets with qualitative and quantitative comparison to the\nstate-of-the-art approaches, verify effectiveness of the proposed framework. In\naddition, we show that MoCoGAN allows one to generate videos with same content\nbut different motion as well as videos with different content and same motion." - pub_date: { - seconds: 1500249600 - } - authors: "Sergey Tulyakov" - authors: "Ming-Yu Liu" - authors: "Xiaodong Yang" - authors: "Jan Kautz" - repositories: { - url: "https://github.com/ubc-vision/DwNet" - framework: FRAMEWORK_PYTORCH - number_of_stars: 19 - } - repositories: { - url: "https://github.com/DLHacks/mocogan" - framework: FRAMEWORK_PYTORCH - number_of_stars: 93 - description: "A pytorch implemention of MoCoGAN" - } - repositories: { - url: "https://github.com/vaibhavsingh9/MoCoGAN_implementation" - framework: FRAMEWORK_PYTORCH - description: "Learning motion GAN's for video based generations" - } - repositories: { - url: "https://github.com/HappyBahman/ldvdGAN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "ldvdGAN, Lower Dimensional Kernels for Video DiscriminatorsLdvdGAN" - } - repositories: { - is_official: true - url: "https://github.com/sergeytulyakov/mocogan" - framework: FRAMEWORK_PYTORCH - number_of_stars: 437 - description: "MoCoGAN: Decomposing Motion and Content for Video Generation" - } - repositories: { - url: "https://github.com/UBC-Computer-Vision-Group/DwNet" - framework: FRAMEWORK_PYTORCH - number_of_stars: 19 - } - } - video: { - video_id: "9uNFtnRa_JU" - video_title: "PR-192: MoCoGAN: Decomposing Motion and Content for Video Generation" - number_of_likes: 9 - number_of_views: 1223 - published_date: { - seconds: 1568189938 - } - uploader: "Taeoh Kim" - } - } -} -pr_id_to_video: { - key: 193 - value: { - papers: { - paper_id: "nisp-pruning-networks-using-neuron-importance" - title: "NISP: Pruning Networks using Neuron Importance Score Propagation" - arxiv_id: "1711.05908" - abstract: "To reduce the significant redundancy in deep Convolutional Neural Networks\n(CNNs), most existing methods prune neurons by only considering statistics of\nan individual layer or two consecutive layers (e.g., prune one layer to\nminimize the reconstruction error of the next layer), ignoring the effect of\nerror propagation in deep networks. In contrast, we argue that it is essential\nto prune neurons in the entire neuron network jointly based on a unified goal:\nminimizing the reconstruction error of important responses in the \"final\nresponse layer\" (FRL), which is the second-to-last layer before classification,\nfor a pruned network to retrain its predictive power. Specifically, we apply\nfeature ranking techniques to measure the importance of each neuron in the FRL,\nand formulate network pruning as a binary integer optimization problem and\nderive a closed-form solution to it for pruning neurons in earlier layers.\nBased on our theoretical analysis, we propose the Neuron Importance Score\nPropagation (NISP) algorithm to propagate the importance scores of final\nresponses to every neuron in the network. The CNN is pruned by removing neurons\nwith least importance, and then fine-tuned to retain its predictive power. NISP\nis evaluated on several datasets with multiple CNN models and demonstrated to\nachieve significant acceleration and compression with negligible accuracy loss." - pub_date: { - seconds: 1510790400 - } - authors: "Ruichi Yu" - authors: "Ang Li" - authors: "Chun-Fu Chen" - authors: "Jui-Hsin Lai" - authors: "Vlad I. Morariu" - authors: "Xintong Han" - authors: "Mingfei Gao" - authors: "Ching-Yung Lin" - authors: "Larry S. Davis" - } - video: { - video_id: "3KoqN_yYhmI" - video_title: "PR-193: NISP: Pruning Networks using Neural Importance Score Propagation" - number_of_likes: 10 - number_of_views: 566 - published_date: { - seconds: 1567953078 - } - uploader: "taesu" - } - } -} -pr_id_to_video: { - key: 194 - value: { - papers: { - paper_id: "once-for-all-train-one-network-and-specialize" - title: "Once-for-All: Train One Network and Specialize it for Efficient Deployment" - arxiv_id: "1908.09791" - abstract: "We address the challenging problem of efficient inference across many devices and resource constraints, especially on edge devices. Conventional approaches either manually design or use neural architecture search (NAS) to find a specialized neural network and train it from scratch for each case, which is computationally prohibitive (causing $CO_2$ emission as much as 5 cars' lifetime) thus unscalable. In this work, we propose to train a once-for-all (OFA) network that supports diverse architectural settings by decoupling training and search, to reduce the cost. We can quickly get a specialized sub-network by selecting from the OFA network without additional training. To efficiently train OFA networks, we also propose a novel progressive shrinking algorithm, a generalized pruning method that reduces the model size across many more dimensions than pruning (depth, width, kernel size, and resolution). It can obtain a surprisingly large number of sub-networks ($> 10^{19}$) that can fit different hardware platforms and latency constraints while maintaining the same level of accuracy as training independently. On diverse edge devices, OFA consistently outperforms state-of-the-art (SOTA) NAS methods (up to 4.0% ImageNet top1 accuracy improvement over MobileNetV3, or same accuracy but 1.5x faster than MobileNetV3, 2.6x faster than EfficientNet w.r.t measured latency) while reducing many orders of magnitude GPU hours and $CO_2$ emission. In particular, OFA achieves a new SOTA 80.0% ImageNet top-1 accuracy under the mobile setting ($<$600M MACs). OFA is the winning solution for the 3rd Low Power Computer Vision Challenge (LPCVC), DSP classification track and the 4th LPCVC, both classification track and detection track. Code and 50 pre-trained models (for many devices & many latency constraints) are released at https://github.com/mit-han-lab/once-for-all." - pub_date: { - seconds: 1566777600 - } - authors: "Han Cai" - authors: "Chuang Gan" - authors: "Tianzhe Wang" - authors: "Zhekai Zhang" - authors: "Song Han" - repositories: { - url: "https://github.com/seulkiyeom/once-for-all" - framework: FRAMEWORK_PYTORCH - description: "Transformable NAS (based on OFA network)" - } - repositories: { - is_official: true - url: "https://github.com/mit-han-lab/once-for-all" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1247 - description: "[ICLR 2020] Once for All: Train One Network and Specialize it for Efficient Deployment" - } - repositories: { - url: "https://github.com/MIT-HAN-LAB/ProxylessNAS" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1240 - description: "[ICLR 2019] ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware" - } - repositories: { - url: "https://github.com/mit-han-lab/ProxylessNAS" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1240 - description: "[ICLR 2019] ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware" - } - repositories: { - url: "https://github.com/mit-han-lab/lpcvc" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 35 - description: "[LPIRC 2019, ICCV 2019] Winner Solution for 4th LPCVC" - } - } - video: {} - } -} -pr_id_to_video: { - key: 195 - value: { - papers: { - paper_id: "mixmatch-a-holistic-approach-to-semi" - title: "MixMatch: A Holistic Approach to Semi-Supervised Learning" - arxiv_id: "1905.02249" - abstract: "Semi-supervised learning has proven to be a powerful paradigm for leveraging unlabeled data to mitigate the reliance on large labeled datasets. In this work, we unify the current dominant approaches for semi-supervised learning to produce a new algorithm, MixMatch, that works by guessing low-entropy labels for data-augmented unlabeled examples and mixing labeled and unlabeled data using MixUp. We show that MixMatch obtains state-of-the-art results by a large margin across many datasets and labeled data amounts. For example, on CIFAR-10 with 250 labels, we reduce error rate by a factor of 4 (from 38% to 11%) and by a factor of 2 on STL-10. We also demonstrate how MixMatch can help achieve a dramatically better accuracy-privacy trade-off for differential privacy. Finally, we perform an ablation study to tease apart which components of MixMatch are most important for its success." - pub_date: { - seconds: 1557100800 - } - authors: "David Berthelot" - authors: "Nicholas Carlini" - authors: "Ian Goodfellow" - authors: "Nicolas Papernot" - authors: "Avital Oliver" - authors: "Colin Raffel" - repositories: { - url: "https://github.com/google-research/crest" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 7 - description: "Repo for CReST: A Class-Rebalancing Self-Training Framework for Imbalanced Semi-Supervised Learning" - } - repositories: { - url: "https://github.com/narendoraiswamy/MixMatch-pytorch-demo" - framework: FRAMEWORK_PYTORCH - description: "The execution of tests for mixmatch." - } - repositories: { - url: "https://github.com/DonghwanKIM0101/CS492I_CV" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/ktran1/Manifold-attack" - framework: FRAMEWORK_PYTORCH - description: "This is an implementation of manifold attack" - } - repositories: { - url: "https://github.com/dhx000/DGM_project" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - } - repositories: { - url: "https://github.com/ms903-github/MixMatch-imdb" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/yuxi120407/semi-supervised_tensorflow2.0" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5 - description: "This is an Tensorflow implementation of semi-supervised learning with the following methods: Pseudo-label, Pi_model, VAT, mean_teacher, Mixup, ICT and Mixmatch." - } - repositories: { - url: "https://github.com/rit-git/Snippext_public" - framework: FRAMEWORK_PYTORCH - number_of_stars: 44 - description: "Snippext: Semi-supervised Opinion Mining with Augmented Data" - } - repositories: { - url: "https://github.com/ntozer/mixmatch-tensorflow2.0" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 11 - description: "Implementation of \"MixMatch: A Holistic Approach to Semi-Supervised Learning\" in TensorFlow 2.0" - } - repositories: { - url: "https://github.com/FelixAbrahamsson/mixmatch-pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 31 - description: "An implementation of MixMatch with PyTorch" - } - } - video: { - video_id: "ud863JQmUW0" - video_title: "PR-195: MixMatch: A Holistic Approach to Semi-Supervised Learning" - number_of_likes: 21 - number_of_views: 1420 - published_date: { - seconds: 1569160250 - } - uploader: "Jinsung Yoon" - } - } -} -pr_id_to_video: { - key: 196 - value: { - papers: { - paper_id: "stand-alone-self-attention-in-vision-models" - title: "Stand-Alone Self-Attention in Vision Models" - arxiv_id: "1906.05909" - abstract: "Convolutions are a fundamental building block of modern computer vision systems. Recent approaches have argued for going beyond convolutions in order to capture long-range dependencies. These efforts focus on augmenting convolutional models with content-based interactions, such as self-attention and non-local means, to achieve gains on a number of vision tasks. The natural question that arises is whether attention can be a stand-alone primitive for vision models instead of serving as just an augmentation on top of convolutions. In developing and testing a pure self-attention vision model, we verify that self-attention can indeed be an effective stand-alone layer. A simple procedure of replacing all instances of spatial convolutions with a form of self-attention applied to ResNet model produces a fully self-attentional model that outperforms the baseline on ImageNet classification with 12% fewer FLOPS and 29% fewer parameters. On COCO object detection, a pure self-attention model matches the mAP of a baseline RetinaNet while having 39% fewer FLOPS and 34% fewer parameters. Detailed ablation studies demonstrate that self-attention is especially impactful when used in later layers. These results establish that stand-alone self-attention is an important addition to the vision practitioner's toolbox." - pub_date: { - seconds: 1560384000 - } - authors: "Prajit Ramachandran" - authors: "Niki Parmar" - authors: "Ashish Vaswani" - authors: "Irwan Bello" - authors: "Anselm Levskaya" - authors: "Jonathon Shlens" - repositories: { - url: "https://github.com/MartinGer/Stand-Alone-Self-Attention-in-Vision-Models" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Pytorch implementation of the paper Stand-Alone Self-Attention in Vision Models" - } - repositories: { - url: "https://github.com/MaheepChaudhary/Stand-Alone_Self-Attention" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Implemented the Stand-Alone Self-Attention research paper form scratch in Tensorflow" - } - repositories: { - url: "https://github.com/The-AI-Summer/self_attention" - framework: FRAMEWORK_PYTORCH - number_of_stars: 449 - description: "Implementation of various self-attention mechanisms focused on computer vision. Ongoing repository. " - } - repositories: { - url: "https://github.com/leaderj1001/Stand-Alone-Self-Attention" - framework: FRAMEWORK_PYTORCH - number_of_stars: 343 - description: "Implementing Stand-Alone Self-Attention in Vision Models using Pytorch" - } - repositories: { - is_official: true - url: "https://github.com/google-research/google-research" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 18067 - description: "Google Research" - } - repositories: { - url: "https://github.com/JoeRoussy/adaptive-attention-in-cv" - framework: FRAMEWORK_PYTORCH - number_of_stars: 24 - description: "Implementation for our paper exploring a novel 2D adaptive attention span kernel in computer vision." - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Residual Block" - full_name: "Residual Block" - description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." - } - methods: { - name: "Max Pooling" - full_name: "Max Pooling" - description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" - } - methods: { - name: "Focal Loss" - full_name: "Focal Loss" - description: "A **Focal Loss** function addresses class imbalance during training in tasks like object detection. Focal loss applies a modulating term to the cross entropy loss in order to focus learning on hard negative examples. It is a dynamically scaled cross entropy loss, where the scaling factor decays to zero as confidence in the correct class increases. Intuitively, this scaling factor can automatically down-weight the contribution of easy examples during training and rapidly focus the model on hard examples. \r\n\r\nFormally, the Focal Loss adds a factor $(1 - p\\_{t})^\\gamma$ to the standard cross entropy criterion. Setting $\\gamma>0$ reduces the relative loss for well-classified examples ($p\\_{t}>.5$), putting more focus on hard, misclassified examples. Here there is tunable *focusing* parameter $\\gamma \\ge 0$. \r\n\r\n$$ {\\text{FL}(p\\_{t}) = - (1 - p\\_{t})^\\gamma \\log\\left(p\\_{t}\\right)} $$" - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - methods: { - name: "RetinaNet" - full_name: "RetinaNet" - description: "**RetinaNet** is a one-stage object detection model that utilizes a focal loss function to address class imbalance during training. Focal loss applies a modulating term to the cross entropy loss in order to focus learning on hard negative examples. RetinaNet is a single, unified network composed of a *backbone* network and two task-specific *subnetworks*. The backbone is responsible for computing a convolutional feature map over an entire input image and is an off-the-self convolutional network. The first subnet performs convolutional object classification on the backbone's output; the second subnet performs convolutional bounding box regression. The two subnetworks feature a simple design that the authors propose specifically for one-stage, dense detection. \r\n\r\nWe can see the motivation for focal loss by comparing with two-stage object detectors. Here class imbalance is addressed by a two-stage cascade and sampling heuristics. The proposal stage (e.g., [Selective Search](https://paperswithcode.com/method/selective-search), [EdgeBoxes](https://paperswithcode.com/method/edgeboxes), [DeepMask](https://paperswithcode.com/method/deepmask), [RPN](https://paperswithcode.com/method/rpn)) rapidly narrows down the number of candidate object locations to a small number (e.g., 1-2k), filtering out most background samples. In the second classification stage, sampling heuristics, such as a fixed foreground-to-background ratio, or online hard example mining ([OHEM](https://paperswithcode.com/method/ohem)), are performed to maintain a\r\nmanageable balance between foreground and background.\r\n\r\nIn contrast, a one-stage detector must process a much larger set of candidate object locations regularly sampled across an image. To tackle this, RetinaNet uses a focal loss function, a dynamically scaled cross entropy loss, where the scaling factor decays to zero as confidence in the correct class increases. Intuitively, this scaling factor can automatically down-weight the contribution of easy examples during training and rapidly focus the model on hard examples. \r\n\r\nFormally, the Focal Loss adds a factor $(1 - p\\_{t})^\\gamma$ to the standard cross entropy criterion. Setting $\\gamma>0$ reduces the relative loss for well-classified examples ($p\\_{t}>.5$), putting more focus on hard, misclassified examples. Here there is tunable *focusing* parameter $\\gamma \\ge 0$. \r\n\r\n$$ {\\text{FL}(p\\_{t}) = - (1 - p\\_{t})^\\gamma \\log\\left(p\\_{t}\\right)} $$" - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - methods: { - name: "Kaiming Initialization" - full_name: "Kaiming Initialization" - description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." - } - methods: { - name: "Batch Normalization" - full_name: "Batch Normalization" - description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." - } - methods: { - name: "FPN" - full_name: "Feature Pyramid Network" - description: "A **Feature Pyramid Network**, or **FPN**, is a feature extractor that takes a single-scale image of an arbitrary size as input, and outputs proportionally sized feature maps at multiple levels, in a fully convolutional fashion. This process is independent of the backbone convolutional architectures. It therefore acts as a generic solution for building feature pyramids inside deep convolutional networks to be used in tasks like object detection.\r\n\r\nThe construction of the pyramid involves a bottom-up pathway and a top-down pathway.\r\n\r\nThe bottom-up pathway is the feedforward computation of the backbone ConvNet, which computes a feature hierarchy consisting of feature maps at several scales with a scaling step of 2. For the feature\r\npyramid, one pyramid level is defined for each stage. The output of the last layer of each stage is used as a reference set of feature maps. For [ResNets](https://paperswithcode.com/method/resnet) we use the feature activations output by each stage’s last residual block. \r\n\r\nThe top-down pathway hallucinates higher resolution features by upsampling spatially coarser, but semantically stronger, feature maps from higher pyramid levels. These features are then enhanced with features from the bottom-up pathway via lateral connections. Each lateral connection merges feature maps of the same spatial size from the bottom-up pathway and the top-down pathway. The bottom-up feature map is of lower-level semantics, but its activations are more accurately localized as it was subsampled fewer times." - } - } - video: { - video_id: "6hadVw4Sy2M" - video_title: "PR-196: Stand Alone Self Attention in Vision Models" - number_of_likes: 9 - number_of_views: 1557 - published_date: { - seconds: 1571072079 - } - uploader: "정지원" - } - } -} -pr_id_to_video: { - key: 197 - value: { - papers: { - paper_id: "one-ticket-to-win-them-all-generalizing" - title: "One ticket to win them all: generalizing lottery ticket initializations across datasets and optimizers" - arxiv_id: "1906.02773" - abstract: "The success of lottery ticket initializations (Frankle and Carbin, 2019) suggests that small, sparsified networks can be trained so long as the network is initialized appropriately. Unfortunately, finding these \"winning ticket\" initializations is computationally expensive. One potential solution is to reuse the same winning tickets across a variety of datasets and optimizers. However, the generality of winning ticket initializations remains unclear. Here, we attempt to answer this question by generating winning tickets for one training configuration (optimizer and dataset) and evaluating their performance on another configuration. Perhaps surprisingly, we found that, within the natural images domain, winning ticket initializations generalized across a variety of datasets, including Fashion MNIST, SVHN, CIFAR-10/100, ImageNet, and Places365, often achieving performance close to that of winning tickets generated on the same dataset. Moreover, winning tickets generated using larger datasets consistently transferred better than those generated using smaller datasets. We also found that winning ticket initializations generalize across optimizers with high performance. These results suggest that winning ticket initializations generated by sufficiently large datasets contain inductive biases generic to neural networks more broadly which improve training across many settings and provide hope for the development of better initialization methods." - pub_date: { - seconds: 1559779200 - } - authors: "Ari S. Morcos" - authors: "Haonan Yu" - authors: "Michela Paganini" - authors: "Yuandong Tian" - repositories: { - url: "https://github.com/varungohil/Generalizing-Lottery-Tickets" - framework: FRAMEWORK_PYTORCH - number_of_stars: 42 - description: "This repository contains code to replicate the experiments given in NeurIPS 2019 paper \"One ticket to win them all: generalizing lottery ticket initializations across datasets and optimizers\"" - } - } - video: { - video_id: "YmTNpF2OOjA" - video_title: "PR-197: One ticket to win them all: generalizing lottery ticket initialization" - number_of_likes: 21 - number_of_views: 1013 - published_date: { - seconds: 1569769625 - } - uploader: "JinWon Lee" - } - } -} -pr_id_to_video: { - key: 198 - value: { - papers: { - paper_id: "temporal-shift-module-for-efficient-video" - title: "TSM: Temporal Shift Module for Efficient Video Understanding" - arxiv_id: "1811.08383" - abstract: "The explosive growth in video streaming gives rise to challenges on performing video understanding at high accuracy and low computation cost. Conventional 2D CNNs are computationally cheap but cannot capture temporal relationships; 3D CNN based methods can achieve good performance but are computationally intensive, making it expensive to deploy. In this paper, we propose a generic and effective Temporal Shift Module (TSM) that enjoys both high efficiency and high performance. Specifically, it can achieve the performance of 3D CNN but maintain 2D CNN's complexity. TSM shifts part of the channels along the temporal dimension; thus facilitate information exchanged among neighboring frames. It can be inserted into 2D CNNs to achieve temporal modeling at zero computation and zero parameters. We also extended TSM to online setting, which enables real-time low-latency online video recognition and video object detection. TSM is accurate and efficient: it ranks the first place on the Something-Something leaderboard upon publication; on Jetson Nano and Galaxy Note8, it achieves a low latency of 13ms and 35ms for online video recognition. The code is available at: https://github.com/mit-han-lab/temporal-shift-module." - pub_date: { - seconds: 1542672000 - } - authors: "Ji Lin" - authors: "Chuang Gan" - authors: "Song Han" - repositories: { - url: "https://github.com/open-mmlab/mmaction2" - framework: FRAMEWORK_PYTORCH - number_of_stars: 938 - description: "OpenMMLab's Next Generation Video Understanding Toolbox and Benchmark" - } - repositories: { - url: "https://github.com/rijuldhir/TSM" - framework: FRAMEWORK_PYTORCH - } - repositories: { - is_official: true - url: "https://github.com/MIT-HAN-LAB/temporal-shift-module" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1381 - description: "[ICCV 2019] TSM: Temporal Shift Module for Efficient Video Understanding" - } - repositories: { - url: "https://github.com/WavesUR/embedded_TSM" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "cs231n project" - } - repositories: { - url: "https://github.com/PaParaZz1/TemporalShiftModule" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - description: "pytorch implementation for TemporalShiftModule" - } - repositories: { - url: "https://github.com/niveditarahurkar/CS231N-ActionRecognition" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Course Project for Stanford CS231n Convolutional Neural Networks for Visual Recognition" - } - } - video: {} - } -} -pr_id_to_video: { - key: 199 - value: { - papers: { - paper_id: "sniper-efficient-multi-scale-training" - title: "SNIPER: Efficient Multi-Scale Training" - arxiv_id: "1805.09300" - abstract: "We present SNIPER, an algorithm for performing efficient multi-scale training\nin instance level visual recognition tasks. Instead of processing every pixel\nin an image pyramid, SNIPER processes context regions around ground-truth\ninstances (referred to as chips) at the appropriate scale. For background\nsampling, these context-regions are generated using proposals extracted from a\nregion proposal network trained with a short learning schedule. Hence, the\nnumber of chips generated per image during training adaptively changes based on\nthe scene complexity. SNIPER only processes 30% more pixels compared to the\ncommonly used single scale training at 800x1333 pixels on the COCO dataset.\nBut, it also observes samples from extreme resolutions of the image pyramid,\nlike 1400x2000 pixels. As SNIPER operates on resampled low resolution chips\n(512x512 pixels), it can have a batch size as large as 20 on a single GPU even\nwith a ResNet-101 backbone. Therefore it can benefit from batch-normalization\nduring training without the need for synchronizing batch-normalization\nstatistics across GPUs. SNIPER brings training of instance level recognition\ntasks like object detection closer to the protocol for image classification and\nsuggests that the commonly accepted guideline that it is important to train on\nhigh resolution images for instance level visual recognition tasks might not be\ncorrect. Our implementation based on Faster-RCNN with a ResNet-101 backbone\nobtains an mAP of 47.6% on the COCO dataset for bounding box detection and can\nprocess 5 images per second during inference with a single GPU. Code is\navailable at https://github.com/MahyarNajibi/SNIPER/." - pub_date: { - seconds: 1527033600 - } - authors: "Bharat Singh" - authors: "Mahyar Najibi" - authors: "Larry S. Davis" - repositories: { - url: "https://github.com/starimpact/arm_SNIPER" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 7 - description: "sniper version for arm tiny input and network training." - } - repositories: { - url: "https://github.com/Hwang64/PSIS" - framework: FRAMEWORK_OTHERS - number_of_stars: 75 - description: "Data Augmentation for Object Detection via Progressive and Selective Instance-Switching" - } - repositories: { - is_official: true - url: "https://github.com/MahyarNajibi/SNIPER" - framework: FRAMEWORK_OTHERS - number_of_stars: 2643 - description: "SNIPER / AutoFocus is an efficient multi-scale object detection training / inference algorithm" - } - methods: { - name: "ResNet" - full_name: "Residual Network" - description: "**Residual Networks**, or **ResNets**, learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. Instead of hoping each few stacked layers directly fit a desired underlying mapping, residual nets let these layers fit a residual mapping. They stack [residual blocks](https://paperswithcode.com/method/residual-block) ontop of each other to form network: e.g. a ResNet-50 has fifty layers using these blocks. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}(x)$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}(x):=\\mathcal{H}(x)-x$. The original mapping is recast into $\\mathcal{F}(x)+x$.\r\n\r\nThere is empirical evidence that these types of network are easier to optimize, and can gain accuracy from considerably increased depth." - } - methods: { - name: "RPN" - full_name: "Region Proposal Network" - description: "A **Region Proposal Network**, or **RPN**, is a fully convolutional network that simultaneously predicts object bounds and objectness scores at each position. The RPN is trained end-to-end to generate high-quality region proposals. RPN and algorithms like Fast R-CNN can be merged into a single network by sharing their convolutional features - using the recently popular terminology of neural networks with attention mechanisms, the RPN component tells the unified network where to look.\r\n\r\nRPNs are designed to efficiently predict region proposals with a wide range of scales and aspect ratios. RPNs use anchor boxes that serve as references at multiple scales and aspect ratios. The scheme can be thought of as a pyramid of regression references, which avoids enumerating images or filters of multiple scales or aspect ratios." - } - methods: { - name: "Average Pooling" - full_name: "Average Pooling" - description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" - } - methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." - } - methods: { - name: "RoIPool" - full_name: "RoIPool" - description: "**Region of Interest Pooling**, or **RoIPool**, is an operation for extracting a small feature map (e.g., $7×7$) from each RoI in detection and segmentation based tasks. Features are extracted from each candidate box, and thereafter in models like Fast R-CNN, are then classified and bounding box regression performed.\r\n\r\nThe actual scaling to, e.g., $7×7$, occurs by dividing the region proposal into equally sized sections, finding the largest value in each section, and then copying these max values to the output buffer. In essence, **RoIPool** is max pooling on a discrete grid based on a box.\r\n\r\nImage Source: [Joyce Xu](https://towardsdatascience.com/deep-learning-for-object-detection-a-comprehensive-review-73930816d8d9)" - } - methods: { - name: "Faster R-CNN" - full_name: "Faster R-CNN" - description: "**Faster R-CNN** is an object detection model that improves on [Fast R-CNN](https://paperswithcode.com/method/fast-r-cnn) by utilising a region proposal network ([RPN](https://paperswithcode.com/method/rpn)) with the CNN model. The RPN shares full-image convolutional features with the detection network, enabling nearly cost-free region proposals. It is a fully convolutional network that simultaneously predicts object bounds and objectness scores at each position. The RPN is trained end-to-end to generate high-quality region proposals, which are used by [Fast R-CNN](https://paperswithcode.com/method/fast-r-cnn) for detection. RPN and Fast R-CNN are merged into a single network by sharing their convolutional features: the RPN component tells the unified network where to look.\r\n\r\nAs a whole, Faster R-CNN consists of two modules. The first module is a deep fully convolutional network that proposes regions, and the second module is the Fast R-CNN detector that uses the proposed regions." - } - methods: { - name: "SNIPER" - full_name: "SNIPER" - description: "**SNIPER** is a multi-scale training approach for instance-level recognition tasks like object detection and instance-level segmentation. Instead of processing all pixels in an image pyramid, SNIPER selectively processes context regions around the ground-truth objects (a.k.a chips). This can help to speed up multi-scale training as it operates on low-resolution chips. Due to its memory-efficient design, SNIPER can benefit from Batch Normalization during training and it makes larger batch-sizes possible for instance-level recognition tasks on a single GPU." - } - methods: { - name: "Bottleneck Residual Block" - full_name: "Bottleneck Residual Block" - description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." - } - methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - } - video: { - video_id: "EkndN7svgUk" - video_title: "PR-199: SNIPER:Efficient Multi Scale Training" - number_of_likes: 16 - number_of_views: 1384 - published_date: { - seconds: 1570377571 - } - uploader: "visionNoob" - } - } -} -pr_id_to_video: { - key: 200 - value: { - papers: { - paper_id: "online-model-distillation-for-efficient-video" - title: "Online Model Distillation for Efficient Video Inference" - arxiv_id: "1812.02699" - abstract: "High-quality computer vision models typically address the problem of understanding the general distribution of real-world images. However, most cameras observe only a very small fraction of this distribution. This offers the possibility of achieving more efficient inference by specializing compact, low-cost models to the specific distribution of frames observed by a single camera. In this paper, we employ the technique of model distillation (supervising a low-cost student model using the output of a high-cost teacher) to specialize accurate, low-cost semantic segmentation models to a target video stream. Rather than learn a specialized student model on offline data from the video stream, we train the student in an online fashion on the live video, intermittently running the teacher to provide a target for learning. Online model distillation yields semantic segmentation models that closely approximate their Mask R-CNN teacher with 7 to 17$\\times$ lower inference runtime cost (11 to 26$\\times$ in FLOPs), even when the target video's distribution is non-stationary. Our method requires no offline pretraining on the target video stream, achieves higher accuracy and lower cost than solutions based on flow or video object segmentation, and can exhibit better temporal stability than the original teacher. We also provide a new video dataset for evaluating the efficiency of inference over long running video streams." - pub_date: { - seconds: 1544054400 - } - authors: "Ravi Teja Mullapudi" - authors: "Steven Chen" - authors: "Keyi Zhang" - authors: "Deva Ramanan" - authors: "Kayvon Fatahalian" - repositories: { - url: "https://github.com/josephch405/jit-masker" - framework: FRAMEWORK_PYTORCH - number_of_stars: 18 - } - methods: { - name: "Mask R-CNN" - full_name: "Mask R-CNN" - description: "**Mask R-CNN** extends [Faster R-CNN](http://paperswithcode.com/method/faster-r-cnn) to solve instance segmentation tasks. It achieves this by adding a branch for predicting an object mask in parallel with the existing branch for bounding box recognition. In principle, Mask R-CNN is an intuitive extension of Faster R-CNN, but constructing the mask branch properly is critical for good results. \r\n\r\nMost importantly, Faster R-CNN was not designed for pixel-to-pixel alignment between network inputs and outputs. This is evident in how [RoIPool](http://paperswithcode.com/method/roi-pooling), the *de facto* core operation for attending to instances, performs coarse spatial quantization for feature extraction. To fix the misalignment, Mask R-CNN utilises a simple, quantization-free layer, called [RoIAlign](http://paperswithcode.com/method/roi-align), that faithfully preserves exact spatial locations. \r\n\r\nSecondly, Mask R-CNN *decouples* mask and class prediction: it predicts a binary mask for each class independently, without competition among classes, and relies on the network's RoI classification branch to predict the category. In contrast, an [FCN](http://paperswithcode.com/method/fcn) usually perform per-pixel multi-class categorization, which couples segmentation and classification." - } - methods: { - name: "RoIAlign" - full_name: "RoIAlign" - description: "**Region of Interest Align**, or **RoIAlign**, is an operation for extracting a small feature map from each RoI in detection and segmentation based tasks. It removes the harsh quantization of [RoI Pool](https://paperswithcode.com/method/roi-pooling), properly *aligning* the extracted features with the input. To avoid any quantization of the RoI boundaries or bins (using $x/16$ instead of $[x/16]$), RoIAlign uses bilinear interpolation to compute the exact values of the input features at four regularly sampled locations in each RoI bin, and the result is then aggregated (using max or average)." - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - } - video: { - video_id: "BHEncY-f548" - video_title: "PR-200: Online Model Distillation for Efficient Video Inference" - number_of_likes: 16 - number_of_views: 714 - published_date: { - seconds: 1571035103 - } - uploader: "Soyeon Kim" - } - } -} -pr_id_to_video: { - key: 201 - value: { - papers: { - paper_id: "bag-of-tricks-for-image-classification-with" - title: "Bag of Tricks for Image Classification with Convolutional Neural Networks" - arxiv_id: "1812.01187" - abstract: "Much of the recent progress made in image classification research can be\ncredited to training procedure refinements, such as changes in data\naugmentations and optimization methods. In the literature, however, most\nrefinements are either briefly mentioned as implementation details or only\nvisible in source code. In this paper, we will examine a collection of such\nrefinements and empirically evaluate their impact on the final model accuracy\nthrough ablation study. We will show that, by combining these refinements\ntogether, we are able to improve various CNN models significantly. For example,\nwe raise ResNet-50's top-1 validation accuracy from 75.3% to 79.29% on\nImageNet. We will also demonstrate that improvement on image classification\naccuracy leads to better transfer learning performance in other application\ndomains such as object detection and semantic segmentation." - pub_date: { - seconds: 1543881600 - } - authors: "Tong He" - authors: "Zhi Zhang" - authors: "Hang Zhang" - authors: "Zhongyue Zhang" - authors: "Junyuan Xie" - authors: "Mu Li" - repositories: { - url: "https://github.com/Tirth27/Skin-Cancer-Classification-using-Deep-Learning" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Classify Skin cancer from the skin lesion images using Image classification. The dataset for the project is obtained from the Kaggle SIIM-ISIC-Melanoma-Classification competition. " - } - repositories: { - url: "https://github.com/Media-Smart/vedaseg" - framework: FRAMEWORK_PYTORCH - number_of_stars: 382 - description: "A semantic segmentation toolbox based on PyTorch" - } - repositories: { - url: "https://github.com/seermer/TensorFlow2-EfficientNetV2" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 8 - description: "a TensorFlow2(keras model) implementation of EfficientNetV2" - } - repositories: { - url: "https://github.com/rwightman/pytorch-image-models" - framework: FRAMEWORK_PYTORCH - number_of_stars: 11022 - description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" - } - repositories: { - url: "https://github.com/qingyuanchen1997/Bag-of-Tricks" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "The reproduction of paper \"Bag of Tricks for Image Classification with Convolutional Neural Networks\" (based on Pyorch)" - } - repositories: { - url: "https://github.com/jameswang287/Car-Detection" - framework: FRAMEWORK_PYTORCH - description: "Using the Stanford cars dataset and PyTorch/Resnet-34 to predict a car's make and model." - } - repositories: { - is_official: true - url: "https://github.com/dmlc/gluon-cv" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4786 - description: "Gluon CV Toolkit" - } - repositories: { - url: "https://github.com/sherdencooper/tricks-in-deeplearning" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 18 - description: "Using different tricks to improve performance of resetnet. The final accuracy:95.21%" - } - repositories: { - url: "https://github.com/PaddlePaddle/models" - framework: FRAMEWORK_OTHERS - number_of_stars: 5997 - description: "Pre-trained and Reproduced Deep Learning Models (『飞桨』官方模型库,包含多种学术前沿和工业场景验证的深度学习模型)" - } - repositories: { - url: "https://github.com/Dmitrsl/Tools" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - } - methods: { - name: "Nesterov Accelerated Gradient" - full_name: "Nesterov Accelerated Gradient" - description: "**Nesterov Accelerated Gradient** is a momentum-based SGD optimizer that \"looks ahead\" to where the parameters will be to calculate the gradient **ex post** rather than **ex ante**:\r\n\r\n$$ v\\_{t} = \\gamma{v}\\_{t-1} + \\eta\\nabla\\_{\\theta}J\\left(\\theta-\\gamma{v\\_{t-1}}\\right) $$\r\n$$\\theta\\_{t} = \\theta\\_{t-1} + v\\_{t}$$\r\n\r\nLike SGD with momentum $\\gamma$ is usually set to $0.9$.\r\n\r\nThe intuition is that the [standard momentum](https://paperswithcode.com/method/sgd-with-momentum) method first computes the gradient at the current location and then takes a big jump in the direction of the updated accumulated gradient. In contrast Nesterov momentum first makes a big jump in the direction of the previous accumulated gradient and then measures the gradient where it ends up and makes a correction. The idea being that it is better to correct a mistake after you have made it. \r\n\r\nImage Source: [Geoff Hinton lecture notes](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)" - } - methods: { - name: "Mixup" - full_name: "Mixup" - description: "**Mixup** is a data augmentation technique that that generates a weighted combinations of random image pairs from the training data. Given two images and their ground truth labels: $\\left(x\\_{i}, y\\_{i}\\right), \\left(x\\_{j}, y\\_{j}\\right)$, a synthetic training example $\\left(\\hat{x}, \\hat{y}\\right)$ is generated as:\r\n\r\n$$ \\hat{x} = \\lambda{x\\_{i}} + \\left(1 − \\lambda\\right){x\\_{j}} $$\r\n$$ \\hat{y} = \\lambda{y\\_{i}} + \\left(1 − \\lambda\\right){y\\_{j}} $$\r\n\r\nwhere $\\lambda \\sim \\text{Beta}\\left(\\alpha = 0.2\\right)$ is independently sampled for each augmented example." - } - methods: { - name: "Average Pooling" - full_name: "Average Pooling" - description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" - } - methods: { - name: "Cosine Annealing" - full_name: "Cosine Annealing" - description: "**Cosine Annealing** is a type of learning rate schedule that has the effect of starting with a large learning rate that is relatively rapidly decreased to a minimum value before being increased rapidly again. The resetting of the learning rate acts like a simulated restart of the learning process and the re-use of good weights as the starting point of the restart is referred to as a \"warm restart\" in contrast to a \"cold restart\" where a new set of small random numbers may be used as a starting point.\r\n\r\n$$\\eta\\_{t} = \\eta\\_{min}^{i} + \\frac{1}{2}\\left(\\eta\\_{max}^{i}-\\eta\\_{min}^{i}\\right)\\left(1+\\cos\\left(\\frac{T\\_{cur}}{T\\_{i}}\\pi\\right)\\right)\r\n$$\r\n\r\nWhere where $\\eta\\_{min}^{i}$ and $ \\eta\\_{max}^{i}$ are ranges for the learning rate, and $T\\_{cur}$ account for how many epochs have been performed since the last restart.\r\n\r\nText Source: [Jason Brownlee](https://machinelearningmastery.com/snapshot-ensemble-deep-learning-neural-network/)\r\n\r\nImage Source: [Gao Huang](https://www.researchgate.net/figure/Training-loss-of-100-layer-DenseNet-on-CIFAR10-using-standard-learning-rate-blue-and-M_fig2_315765130)" - } - methods: { - name: "Random Horizontal Flip" - full_name: "Random Horizontal Flip" - description: "**RandomHorizontalFlip** is a type of image data augmentation which horizontally flips a given image with a given probability.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" - } - methods: { - name: "Label Smoothing" - full_name: "Label Smoothing" - description: "**Label Smoothing** is a regularization technique that introduces noise for the labels. This accounts for the fact that datasets may have mistakes in them, so maximizing the likelihood of $\\log{p}\\left(y\\mid{x}\\right)$ directly can be harmful. Assume for a small constant $\\epsilon$, the training set label $y$ is correct with probability $1-\\epsilon$ and incorrect otherwise. Label Smoothing regularizes a model based on a softmax with $k$ output values by replacing the hard $0$ and $1$ classification targets with targets of $\\frac{\\epsilon}{k-1}$ and $1-\\epsilon$ respectively.\r\n\r\nSource: Deep Learning, Goodfellow et al\r\n\r\nImage Source: [When Does Label Smoothing Help?](https://arxiv.org/abs/1906.02629)" - } - methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." - } - methods: { - name: "Bottleneck Residual Block" - full_name: "Bottleneck Residual Block" - description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." - } - methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." - } - methods: { - name: "ResNet-D" - full_name: "ResNet-D" - description: "**ResNet-D** is a modification on the [ResNet](https://paperswithcode.com/method/resnet) architecture that utilises an average pooling tweak for downsampling. The motivation is that in the unmodified ResNet, the 1 × 1 convolution for the downsampling block ignores 3/4 of input feature maps, so this is modified so no information will be ignored" - } - } - video: { - video_id: "D-baIgejA4M" - video_title: "PR-201: Bag of Tricks for Image Classification with Convolutional Neural Networks" - number_of_likes: 47 - number_of_views: 8746 - published_date: { - seconds: 1571580127 - } - uploader: "Jiyang Kang" - } - } -} -pr_id_to_video: { - key: 202 - value: { - papers: { - paper_id: "tafe-net-task-aware-feature-embeddings-for" - title: "Deep Mixture of Experts via Shallow Embedding" - arxiv_id: "1806.01531" - abstract: "Larger networks generally have greater representational power at the cost of\nincreased computational complexity. Sparsifying such networks has been an\nactive area of research but has been generally limited to static regularization\nor dynamic approaches using reinforcement learning. We explore a mixture of\nexperts (MoE) approach to deep dynamic routing, which activates certain experts\nin the network on a per-example basis. Our novel DeepMoE architecture increases\nthe representational power of standard convolutional networks by adaptively\nsparsifying and recalibrating channel-wise features in each convolutional\nlayer. We employ a multi-headed sparse gating network to determine the\nselection and scaling of channels for each input, leveraging exponential\ncombinations of experts within a single convolutional network. Our proposed\narchitecture is evaluated on four benchmark datasets and tasks, and we show\nthat Deep-MoEs are able to achieve higher accuracy with lower computation than\nstandard convolutional networks." - pub_date: { - seconds: 1528156800 - } - authors: "Xin Wang" - authors: "Fisher Yu" - authors: "Lisa Dunlap" - authors: "Yi-An Ma" - authors: "Ruth Wang" - authors: "Azalia Mirhoseini" - authors: "Trevor Darrell" - authors: "Joseph E. Gonzalez" - } - video: { - video_id: "iR7T3lH20gI" - video_title: "PR-202: Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts" - number_of_likes: 14 - number_of_views: 1043 - published_date: { - seconds: 1571582247 - } - uploader: "박성남" - } - } -} -pr_id_to_video: { - key: 203 - value: { - papers: { - paper_id: "class-balanced-loss-based-on-effective-number" - title: "Class-Balanced Loss Based on Effective Number of Samples" - arxiv_id: "1901.05555" - abstract: "With the rapid increase of large-scale, real-world datasets, it becomes\ncritical to address the problem of long-tailed data distribution (i.e., a few\nclasses account for most of the data, while most classes are\nunder-represented). Existing solutions typically adopt class re-balancing\nstrategies such as re-sampling and re-weighting based on the number of\nobservations for each class. In this work, we argue that as the number of\nsamples increases, the additional benefit of a newly added data point will\ndiminish. We introduce a novel theoretical framework to measure data overlap by\nassociating with each sample a small neighboring region rather than a single\npoint. The effective number of samples is defined as the volume of samples and\ncan be calculated by a simple formula $(1-\\beta^{n})/(1-\\beta)$, where $n$ is\nthe number of samples and $\\beta \\in [0,1)$ is a hyperparameter. We design a\nre-weighting scheme that uses the effective number of samples for each class to\nre-balance the loss, thereby yielding a class-balanced loss. Comprehensive\nexperiments are conducted on artificially induced long-tailed CIFAR datasets\nand large-scale datasets including ImageNet and iNaturalist. Our results show\nthat when trained with the proposed class-balanced loss, the network is able to\nachieve significant performance gains on long-tailed datasets." - pub_date: { - seconds: 1547596800 - } - authors: "Yin Cui" - authors: "Menglin Jia" - authors: "Tsung-Yi Lin" - authors: "Yang Song" - authors: "Serge Belongie" - repositories: { - url: "https://github.com/tiagoCuervo/JapaNet" - framework: FRAMEWORK_TENSORFLOW - description: "Detection and classification of Kuzushiji characters for the Kuzushiji Recognition Kaggle challenge using CenterNet as detector and multiple classifiers" - } - repositories: { - is_official: true - url: "https://github.com/richardaecn/class-balanced-loss" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 447 - description: "Class-Balanced Loss Based on Effective Number of Samples. CVPR 2019" - } - repositories: { - url: "https://github.com/frgfm/Holocron" - framework: FRAMEWORK_PYTORCH - number_of_stars: 115 - description: "PyTorch implementations of recent Computer Vision tricks (ReXNet, RepVGG, Unet3p, YOLOv4, CIoU loss, AdaBelief)" - } - repositories: { - url: "https://github.com/vandit15/Class-balanced-loss-pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 509 - description: "Pytorch implementation of the paper \"Class-Balanced Loss Based on Effective Number of Samples\"" - } - repositories: { - url: "https://github.com/statsu1990/yoto_class_balanced_loss" - framework: FRAMEWORK_PYTORCH - number_of_stars: 8 - description: "Unofficial implementation of YOTO (You Only Train Once) applied to Class balanced loss" - } - repositories: { - url: "https://github.com/feidfoe/AdjustBnd4Imbalance" - framework: FRAMEWORK_PYTORCH - number_of_stars: 15 - description: "Adjust Decision Boundary for Class Imbalanced Learning" - } - } - video: { - video_id: "3hL0uVtJrXM" - video_title: "PR-203 : Class-Balanced Loss Based on Effective Number of Samples" - number_of_likes: 15 - number_of_views: 1270 - published_date: { - seconds: 1572183724 - } - uploader: "Sunghoon Joo" - } - } -} -pr_id_to_video: { - key: 204 - value: { - papers: { - paper_id: "learning-deep-representations-by-mutual" - title: "Learning deep representations by mutual information estimation and maximization" - arxiv_id: "1808.06670" - abstract: "In this work, we perform unsupervised learning of representations by\nmaximizing mutual information between an input and the output of a deep neural\nnetwork encoder. Importantly, we show that structure matters: incorporating\nknowledge about locality of the input to the objective can greatly influence a\nrepresentation's suitability for downstream tasks. We further control\ncharacteristics of the representation by matching to a prior distribution\nadversarially. Our method, which we call Deep InfoMax (DIM), outperforms a\nnumber of popular unsupervised learning methods and competes with\nfully-supervised learning on several classification tasks. DIM opens new\navenues for unsupervised learning of representations and is an important step\ntowards flexible formulations of representation-learning objectives for\nspecific end-goals." - pub_date: { - seconds: 1534723200 - } - authors: "R Devon Hjelm" - authors: "Alex Fedorov" - authors: "Samuel Lavoie-Marchildon" - authors: "Karan Grewal" - authors: "Phil Bachman" - authors: "Adam Trischler" - authors: "Yoshua Bengio" - repositories: { - url: "https://github.com/jqhoogland/rgpy" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Renormalization Group techniques implemented in python with special emphasis on Machine Learning-inspired methods." - } - repositories: { - url: "https://github.com/jtlai0921/infomax" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/HolenYHR/Deepinfo_pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "a pytorch implementation of deepinfo(Learning deep representations by mutual information estimation and maximization)" - } - repositories: { - url: "https://github.com/bojone/infomax" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 118 - description: "extract features by maximizing mutual information" - } - repositories: { - is_official: true - url: "https://github.com/rdevon/DIM" - framework: FRAMEWORK_PYTORCH - number_of_stars: 647 - description: "Deep InfoMax (DIM), or \"Learning Deep Representations by Mutual Information Estimation and Maximization\"" - } - repositories: { - url: "https://github.com/DuaneNielsen/DeepInfomaxPytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 233 - description: "Learning deep representations by mutual information estimation and maximization" - } - repositories: { - url: "https://github.com/createamind/DIM_Commented" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/ifding/simple-Infomax-pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "This is a simple pytorch implementation of Deep-INFOMAX" - } - repositories: { - url: "https://github.com/schzhu/learning-adversarially-robust-representations" - framework: FRAMEWORK_PYTORCH - number_of_stars: 12 - description: "Code for the paper: Learning Adversarially Robust Representations via Worst-Case Mutual Information Maximization (https://arxiv.org/abs/2002.11798)" - } - } - video: { - video_id: "YNicvevmByo" - video_title: "PR-204: Learning deep representations by mutual information estimation and maximization" - number_of_likes: 30 - number_of_views: 2473 - published_date: { - seconds: 1572789342 - } - uploader: "SeongOk Ryu" - } - } -} -pr_id_to_video: { - key: 205 - value: { - papers: { - paper_id: "a-closer-look-at-few-shot-classification-1" - title: "A Closer Look at Few-shot Classification" - arxiv_id: "1904.04232" - abstract: "Few-shot classification aims to learn a classifier to recognize unseen classes during training with limited labeled examples. While significant progress has been made, the growing complexity of network designs, meta-learning algorithms, and differences in implementation details make a fair comparison difficult. In this paper, we present 1) a consistent comparative analysis of several representative few-shot classification algorithms, with results showing that deeper backbones significantly reduce the performance differences among methods on datasets with limited domain differences, 2) a modified baseline method that surprisingly achieves competitive performance when compared with the state-of-the-art on both the \\miniI and the CUB datasets, and 3) a new experimental setting for evaluating the cross-domain generalization ability for few-shot classification algorithms. Our results reveal that reducing intra-class variation is an important factor when the feature backbone is shallow, but not as critical when using deeper backbones. In a realistic cross-domain evaluation setting, we show that a baseline method with a standard fine-tuning practice compares favorably against other state-of-the-art few-shot learning algorithms." - pub_date: { - seconds: 1554681600 - } - authors: "Wei-Yu Chen" - authors: "Yen-Cheng Liu" - authors: "Zsolt Kira" - authors: "Yu-Chiang Frank Wang" - authors: "Jia-Bin Huang" - repositories: { - url: "https://github.com/mikehuisman/revisiting-learned-optimizers" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/yinboc/few-shot-meta-baseline" - framework: FRAMEWORK_PYTORCH - number_of_stars: 300 - description: "A New Meta-Baseline for Few-Shot Learning" - } - repositories: { - url: "https://github.com/cyvius96/few-shot-meta-baseline" - framework: FRAMEWORK_PYTORCH - number_of_stars: 300 - description: "A New Meta-Baseline for Few-Shot Learning" - } - repositories: { - is_official: true - url: "https://github.com/wyharveychen/CloserLookFewShot" - framework: FRAMEWORK_PYTORCH - number_of_stars: 837 - description: "source code to ICLR'19, 'A Closer Look at Few-shot Classification' " - } - } - video: { - video_id: "yyqZ1K5u2_8" - video_title: "PR-205: A Closer Look at Few Shot Classification" - number_of_likes: 26 - number_of_views: 2137 - published_date: { - seconds: 1573496397 - } - uploader: "Taeoh Kim" - } - } -} -pr_id_to_video: { - key: 206 - value: { - papers: { - paper_id: "pointrcnn-3d-object-proposal-generation-and" - title: "PointRCNN: 3D Object Proposal Generation and Detection from Point Cloud" - arxiv_id: "1812.04244" - abstract: "In this paper, we propose PointRCNN for 3D object detection from raw point cloud. The whole framework is composed of two stages: stage-1 for the bottom-up 3D proposal generation and stage-2 for refining proposals in the canonical coordinates to obtain the final detection results. Instead of generating proposals from RGB image or projecting point cloud to bird's view or voxels as previous methods do, our stage-1 sub-network directly generates a small number of high-quality 3D proposals from point cloud in a bottom-up manner via segmenting the point cloud of the whole scene into foreground points and background. The stage-2 sub-network transforms the pooled points of each proposal to canonical coordinates to learn better local spatial features, which is combined with global semantic features of each point learned in stage-1 for accurate box refinement and confidence prediction. Extensive experiments on the 3D detection benchmark of KITTI dataset show that our proposed architecture outperforms state-of-the-art methods with remarkable margins by using only point cloud as input. The code is available at https://github.com/sshaoshuai/PointRCNN." - pub_date: { - seconds: 1544486400 - } - authors: "Shaoshuai Shi" - authors: "Xiaogang Wang" - authors: "Hongsheng Li" - repositories: { - url: "https://github.com/cxy1997/3D_adapt_auto_driving" - framework: FRAMEWORK_PYTORCH - number_of_stars: 51 - } - repositories: { - url: "https://github.com/direcf/pointrcnn_multiclass" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "PointRCNN_multiclass" - } - repositories: { - url: "https://github.com/jskim808/js_pointrcnn" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/carterprice2/Deep_Learning_project" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Modified 3D object detectors [F-ConvNet and PointRCNN] for Car detection on Kitti dataset" - } - repositories: { - is_official: true - url: "https://github.com/sshaoshuai/PointRCNN" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1259 - description: "PointRCNN: 3D Object Proposal Generation and Detection from Point Cloud, CVPR 2019." - } - repositories: { - url: "https://github.com/ModelBunker/PointRCNN-PyTorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - description: "PointRCNN: 3D Object Proposal Generation and Detection from Point Cloud" - } - repositories: { - url: "https://github.com/sshaoshuai/Pointnet2.PyTorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 288 - description: "A faster implementation of PointNet++ based on PyTorch." - } - repositories: { - url: "https://github.com/sshaoshuai/PointCloudDet3D" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1775 - description: "OpenPCDet Toolbox for LiDAR-based 3D Object Detection." - } - repositories: { - url: "https://github.com/open-mmlab/OpenPCDet" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1775 - description: "OpenPCDet Toolbox for LiDAR-based 3D Object Detection." - } - } - video: { - video_id: "sFN_EgCsNzM" - video_title: "PR-206: PointRCNN: 3D Object Proposal Generation and Detection from Point Cloud" - number_of_likes: 38 - number_of_views: 2733 - published_date: { - seconds: 1573396201 - } - uploader: "Doyup Lee" - } - } -} -pr_id_to_video: { - key: 207 - value: { - papers: { - paper_id: "yolov3-an-incremental-improvement" - title: "YOLOv3: An Incremental Improvement" - arxiv_id: "1804.02767" - abstract: "We present some updates to YOLO! We made a bunch of little design changes to\nmake it better. We also trained this new network that's pretty swell. It's a\nlittle bigger than last time but more accurate. It's still fast though, don't\nworry. At 320x320 YOLOv3 runs in 22 ms at 28.2 mAP, as accurate as SSD but\nthree times faster. When we look at the old .5 IOU mAP detection metric YOLOv3\nis quite good. It achieves 57.9 mAP@50 in 51 ms on a Titan X, compared to 57.5\nmAP@50 in 198 ms by RetinaNet, similar performance but 3.8x faster. As always,\nall the code is online at https://pjreddie.com/yolo/" - pub_date: { - seconds: 1523145600 - } - authors: "Joseph Redmon" - authors: "Ali Farhadi" - repositories: { - url: "https://github.com/DevBruce/YOLOv3-TF2" - framework: FRAMEWORK_TENSORFLOW - description: "YOLOv3 implementation with TensorFlow2" - } - repositories: { - url: "https://github.com/Qengineering/YoloV3-ncnn-Jetson-Nano" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "YoloV3 for Jetson Nano" - } - repositories: { - url: "https://github.com/CRIGIM/darknet" - framework: FRAMEWORK_TENSORFLOW - description: "edited darknet" - } - repositories: { - url: "https://github.com/zgcr/simpleAICV-pytorch-ImageNet-COCO-training" - framework: FRAMEWORK_PYTORCH - number_of_stars: 156 - description: "Training examples and results for ImageNet(ILSVRC2012)/COCO2017/VOC2007+VOC2012 datasets.Include ResNet/DarkNet/RegNet/RetinaNet/FCOS/CenterNet/YOLO series." - } - repositories: { - url: "https://github.com/fredotran/traffic-sign-detector-yolov4" - framework: FRAMEWORK_OTHERS - number_of_stars: 4 - description: "This repository contains my upgraded version of using YoloV4 with OpenCV DNN to detect 4 classes of traffic road signs : traffic lights, speed limit signs, crosswalk and stop signs. " - } - repositories: { - url: "https://github.com/thinhhoang95/helipad-yolo" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/ntcuong777/aicc-lightnet" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - } - repositories: { - url: "https://github.com/nilesh0109/PedestrianTracking" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "Yolo-v3 and SORT(kalman filter) based pedestrian detector and tracker" - } - repositories: { - url: "https://github.com/MINED30/Face_Mask_Detection_YOLO" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/albertsokol/yolov3-tf2" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "An implementation of YOLOv3 from scratch in Tensorflow 2.3 " - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - methods: { - name: "RetinaNet" - full_name: "RetinaNet" - description: "**RetinaNet** is a one-stage object detection model that utilizes a focal loss function to address class imbalance during training. Focal loss applies a modulating term to the cross entropy loss in order to focus learning on hard negative examples. RetinaNet is a single, unified network composed of a *backbone* network and two task-specific *subnetworks*. The backbone is responsible for computing a convolutional feature map over an entire input image and is an off-the-self convolutional network. The first subnet performs convolutional object classification on the backbone's output; the second subnet performs convolutional bounding box regression. The two subnetworks feature a simple design that the authors propose specifically for one-stage, dense detection. \r\n\r\nWe can see the motivation for focal loss by comparing with two-stage object detectors. Here class imbalance is addressed by a two-stage cascade and sampling heuristics. The proposal stage (e.g., [Selective Search](https://paperswithcode.com/method/selective-search), [EdgeBoxes](https://paperswithcode.com/method/edgeboxes), [DeepMask](https://paperswithcode.com/method/deepmask), [RPN](https://paperswithcode.com/method/rpn)) rapidly narrows down the number of candidate object locations to a small number (e.g., 1-2k), filtering out most background samples. In the second classification stage, sampling heuristics, such as a fixed foreground-to-background ratio, or online hard example mining ([OHEM](https://paperswithcode.com/method/ohem)), are performed to maintain a\r\nmanageable balance between foreground and background.\r\n\r\nIn contrast, a one-stage detector must process a much larger set of candidate object locations regularly sampled across an image. To tackle this, RetinaNet uses a focal loss function, a dynamically scaled cross entropy loss, where the scaling factor decays to zero as confidence in the correct class increases. Intuitively, this scaling factor can automatically down-weight the contribution of easy examples during training and rapidly focus the model on hard examples. \r\n\r\nFormally, the Focal Loss adds a factor $(1 - p\\_{t})^\\gamma$ to the standard cross entropy criterion. Setting $\\gamma>0$ reduces the relative loss for well-classified examples ($p\\_{t}>.5$), putting more focus on hard, misclassified examples. Here there is tunable *focusing* parameter $\\gamma \\ge 0$. \r\n\r\n$$ {\\text{FL}(p\\_{t}) = - (1 - p\\_{t})^\\gamma \\log\\left(p\\_{t}\\right)} $$" - } - methods: { - name: "YOLOv3" - full_name: "YOLOv3" - description: "**YOLOv3** is a real-time, single-stage object detection model that builds on [YOLOv2](https://paperswithcode.com/method/yolov2) with several improvements. Improvements include the use of a new backbone network, Darknet-53 that utilises residual connections, or in the words of the author, \"those newfangled residual network stuff\", as well as some improvements to the bounding box prediction step, and use of three different scales from which to extract features (similar to an FPN)." - } - methods: { - name: "Batch Normalization" - full_name: "Batch Normalization" - description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." - } - methods: { - name: "FPN" - full_name: "Feature Pyramid Network" - description: "A **Feature Pyramid Network**, or **FPN**, is a feature extractor that takes a single-scale image of an arbitrary size as input, and outputs proportionally sized feature maps at multiple levels, in a fully convolutional fashion. This process is independent of the backbone convolutional architectures. It therefore acts as a generic solution for building feature pyramids inside deep convolutional networks to be used in tasks like object detection.\r\n\r\nThe construction of the pyramid involves a bottom-up pathway and a top-down pathway.\r\n\r\nThe bottom-up pathway is the feedforward computation of the backbone ConvNet, which computes a feature hierarchy consisting of feature maps at several scales with a scaling step of 2. For the feature\r\npyramid, one pyramid level is defined for each stage. The output of the last layer of each stage is used as a reference set of feature maps. For [ResNets](https://paperswithcode.com/method/resnet) we use the feature activations output by each stage’s last residual block. \r\n\r\nThe top-down pathway hallucinates higher resolution features by upsampling spatially coarser, but semantically stronger, feature maps from higher pyramid levels. These features are then enhanced with features from the bottom-up pathway via lateral connections. Each lateral connection merges feature maps of the same spatial size from the bottom-up pathway and the top-down pathway. The bottom-up feature map is of lower-level semantics, but its activations are more accurately localized as it was subsampled fewer times." - } - methods: { - name: "Average Pooling" - full_name: "Average Pooling" - description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" - } - methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." - } - methods: { - name: "Darknet-53" - full_name: "Darknet-53" - description: "**Darknet-53** is a convolutional neural network that acts as a backbone for the [YOLOv3](https://paperswithcode.com/method/yolov3) object detection approach. The improvements upon its predecessor [Darknet-19](https://paperswithcode.com/method/darknet-19) include the use of residual connections, as well as more layers." - } - methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." - } - methods: { - name: "Fast-YOLOv3" - full_name: "Fast-YOLOv3" - } - } - video: { - video_id: "HMgcvgRrDcA" - video_title: "PR-207: YOLOv3: An Incremental Improvement" - number_of_likes: 116 - number_of_views: 7252 - published_date: { - seconds: 1574001134 - } - uploader: "JinWon Lee" +pr_id_to_video: { + key: 1 + value: { + pr_id: 1 + papers: { + paper_id: "generative-adversarial-networks" + title: "Generative Adversarial Networks" + arxiv_id: "1406.2661" + abstract: "We propose a new framework for estimating generative models via an adversarial process, in which we simultaneously train two models: a generative model G that captures the data distribution, and a discriminative model D that estimates the probability that a sample came from the training data rather than G. The training procedure for G is to maximize the probability of D making a mistake. This framework corresponds to a minimax two-player game. In the space of arbitrary functions G and D, a unique solution exists, with G recovering the training data distribution and D equal to 1/2 everywhere. In the case where G and D are defined by multilayer perceptrons, the entire system can be trained with backpropagation. There is no need for any Markov chains or unrolled approximate inference networks during either training or generation of samples. Experiments demonstrate the potential of the framework through qualitative and quantitative evaluation of the generated samples." + pub_date: { + seconds: 1402358400 + } + authors: "Ian J. Goodfellow" + authors: "Jean Pouget-Abadie" + authors: "Mehdi Mirza" + authors: "Bing Xu" + authors: "David Warde-Farley" + authors: "Sherjil Ozair" + authors: "Aaron Courville" + authors: "Yoshua Bengio" + repositories: { + url: "https://github.com/jskDr/keraspp_2021" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/JaryV/CycleGAN_OldYoung" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/asiltureli/gan-in-colab" + framework: FRAMEWORK_PYTORCH + description: "GAN implementations on Google Colab" + } + repositories: { + url: "https://github.com/rohitkuk/AnimeGAN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 17 + description: "Generating Anime Images by Implementing Deep Convolutional Generative Adversarial Networks paper " + } + repositories: { + url: "https://github.com/ddehueck/pytorch-GAN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "PyTorch implementation of the original GAN paper by Goodfellow et al." + } + repositories: { + url: "https://github.com/roberttwomey/machine-imagination-workshop" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "text to image notebook with CLIP for workshop on Machine Imagination, Spring 2021" + } + repositories: { + url: "https://github.com/MaximeVandegar/Papers-in-100-Lines-of-Code" + framework: FRAMEWORK_PYTORCH + number_of_stars: 11 + description: "Implementation of papers in 100 lines of code." + } + repositories: { + url: "https://github.com/dhrim/andong_2021" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + } + repositories: { + url: "https://github.com/lab-ml/annotated_deep_learning_paper_implementations/tree/master/labml_nn/gan/original" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3068 + description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." + } + repositories: { + url: "https://github.com/jhKessler/Progressively-Growing-Generative-Adverserial-Network" + framework: FRAMEWORK_PYTORCH + description: "Generative Adverserial Network for Image Generation" + } + methods: { + name: "GAN" + full_name: "Generative Adversarial Network" + description: "A **GAN**, or **Generative Adversarial Network**, is a generative model that simultaneously trains\r\ntwo models: a generative model $G$ that captures the data distribution, and a discriminative model $D$ that estimates the\r\nprobability that a sample came from the training data rather than $G$.\r\n\r\nThe training procedure for $G$ is to maximize the probability of $D$ making\r\na mistake. This framework corresponds to a minimax two-player game. In the\r\nspace of arbitrary functions $G$ and $D$, a unique solution exists, with $G$\r\nrecovering the training data distribution and $D$ equal to $\\frac{1}{2}$\r\neverywhere. In the case where $G$ and $D$ are defined by multilayer perceptrons,\r\nthe entire system can be trained with backpropagation. \r\n\r\n(Image Source: [here](http://www.kdnuggets.com/2017/01/generative-adversarial-networks-hot-topic-machine-learning.html))" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + } + video: { + video_id: "L3hz57whyNw" + video_title: "PR-001: Generative adversarial nets by Jaejun Yoo (2017/4/13)" + number_of_likes: 255 + number_of_views: 34431 + published_date: { + seconds: 1492839397 + } + uploader: "Sung Kim" + } + } +} +pr_id_to_video: { + key: 2 + value: { + pr_id: 2 + papers: { + paper_id: "deformable-convolutional-networks" + title: "Deformable Convolutional Networks" + arxiv_id: "1703.06211" + abstract: "Convolutional neural networks (CNNs) are inherently limited to model\ngeometric transformations due to the fixed geometric structures in its building\nmodules. In this work, we introduce two new modules to enhance the\ntransformation modeling capacity of CNNs, namely, deformable convolution and\ndeformable RoI pooling. Both are based on the idea of augmenting the spatial\nsampling locations in the modules with additional offsets and learning the\noffsets from target tasks, without additional supervision. The new modules can\nreadily replace their plain counterparts in existing CNNs and can be easily\ntrained end-to-end by standard back-propagation, giving rise to deformable\nconvolutional networks. Extensive experiments validate the effectiveness of our\napproach on sophisticated vision tasks of object detection and semantic\nsegmentation. The code would be released." + pub_date: { + seconds: 1489708800 + } + authors: "Jifeng Dai" + authors: "Haozhi Qi" + authors: "Yuwen Xiong" + authors: "Yi Li" + authors: "Guodong Zhang" + authors: "Han Hu" + authors: "Yichen Wei" + repositories: { + url: "https://github.com/ximilar-com/xcenternet" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 133 + description: "Fast anchor free Object Detection based on CenterNet (Objects As Points) and TTFNet (Training-Time-Friendly Network). Implemented in TensorFlow 2.4+." + } + repositories: { + url: "https://github.com/esw0116/DynaVSR" + framework: FRAMEWORK_PYTORCH + number_of_stars: 40 + description: "DynaVSR: Dynamic Adaptive Blind VideoSuper-Resolution" + } + repositories: { + url: "https://github.com/bkvie/Locally-Consistent-Deformable-Convolution" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "Locally Consistent Deformable Convolution as part of deformable flow" + } + repositories: { + url: "https://github.com/zhusiling/EDVR" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + } + repositories: { + url: "https://github.com/TangDL/DCN" + framework: FRAMEWORK_TENSORFLOW + description: "DCN" + } + repositories: { + url: "https://github.com/tianhai123/deform-conv" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + } + repositories: { + url: "https://github.com/necla-ml/Deformable-ConvNets-py3" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "Forked Deformable ConvNets for Python 3" + } + repositories: { + is_official: true + url: "https://github.com/msracver/Deformable-ConvNets" + framework: FRAMEWORK_OTHERS + number_of_stars: 3526 + description: "Deformable Convolutional Networks" + } + repositories: { + url: "https://github.com/NVIDIAAICITYCHALLENGE/AICity_Team6_ISU" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 20 + description: "Source code and code description of Team6_ISU for NVIDIA AICity Challenge 2017 track 1" + } + repositories: { + url: "https://github.com/qilei123/fpn_crop" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + methods: { + name: "Kaiming Initialization" + full_name: "Kaiming Initialization" + description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." + } + methods: { + name: "Batch Normalization" + full_name: "Batch Normalization" + description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." + } + methods: { + name: "Deformable RoI Pooling" + full_name: "Deformable RoI Pooling" + description: "**Deformable RoI Pooling** adds an offset to each bin position in the regular bin partition of the RoI Pooling. Similarly, the offsets are learned from the preceding feature maps and the RoIs, enabling adaptive part localization for objects with different shapes." + } + methods: { + name: "ResNet" + full_name: "Residual Network" + description: "**Residual Networks**, or **ResNets**, learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. Instead of hoping each few stacked layers directly fit a desired underlying mapping, residual nets let these layers fit a residual mapping. They stack [residual blocks](https://paperswithcode.com/method/residual-block) ontop of each other to form network: e.g. a ResNet-50 has fifty layers using these blocks. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}(x)$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}(x):=\\mathcal{H}(x)-x$. The original mapping is recast into $\\mathcal{F}(x)+x$.\r\n\r\nThere is empirical evidence that these types of network are easier to optimize, and can gain accuracy from considerably increased depth." + } + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "Deformable Position-Sensitive RoI Pooling" + full_name: "Deformable Position-Sensitive RoI Pooling" + description: "**Deformable Position-Sensitive RoI Pooling** is similar to PS RoI Pooling but it adds an offset to each bin position in the regular bin partition. Offset learning follows the “fully convolutional” spirit. In the top branch, a convolutional layer generates the full spatial resolution offset fields. For each RoI (also for each class), PS RoI pooling is applied on such fields to obtain normalized offsets, which are then transformed to the real offsets, in the same way as in deformable RoI pooling." + } + methods: { + name: "Deformable Convolution" + full_name: "Deformable Convolution" + description: "**Deformable convolutions** add 2D offsets to the regular grid sampling locations in the standard convolution. It enables free form deformation of the sampling grid. The offsets are learned from the preceding feature maps, via additional convolutional layers. Thus, the deformation is conditioned on the input features in a local, dense, and adaptive manner." + } + } + video: { + video_id: "RRwaz0fBQ0Y" + video_title: "PR-002: Deformable Convolutional Networks (2017)" + number_of_likes: 110 + number_of_views: 14406 + published_date: { + seconds: 1492352642 + } + uploader: "Terry TaeWoong Um" + } + } +} +pr_id_to_video: { + key: 3 + value: { + pr_id: 3 + papers: { + paper_id: "learning-phrase-representations-using-rnn" + title: "Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation" + arxiv_id: "1406.1078" + abstract: "In this paper, we propose a novel neural network model called RNN\nEncoder-Decoder that consists of two recurrent neural networks (RNN). One RNN\nencodes a sequence of symbols into a fixed-length vector representation, and\nthe other decodes the representation into another sequence of symbols. The\nencoder and decoder of the proposed model are jointly trained to maximize the\nconditional probability of a target sequence given a source sequence. The\nperformance of a statistical machine translation system is empirically found to\nimprove by using the conditional probabilities of phrase pairs computed by the\nRNN Encoder-Decoder as an additional feature in the existing log-linear model.\nQualitatively, we show that the proposed model learns a semantically and\nsyntactically meaningful representation of linguistic phrases." + pub_date: { + seconds: 1401753600 + } + authors: "Kyunghyun Cho" + authors: "Bart van Merrienboer" + authors: "Caglar Gulcehre" + authors: "Dzmitry Bahdanau" + authors: "Fethi Bougares" + authors: "Holger Schwenk" + authors: "Yoshua Bengio" + repositories: { + url: "https://github.com/roomylee/rnn-text-classification-tf" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 115 + description: "Tensorflow Implementation of Recurrent Neural Network (Vanilla, LSTM, GRU) for Text Classification" + } + repositories: { + url: "https://github.com/dewanderelex/LanguageTranslation" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/magahub/songrnn" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/starry91/NMT-Lab" + framework: FRAMEWORK_OTHERS + description: "Implementation of Neural machine translation papers" + } + repositories: { + url: "https://github.com/munir-bd/Korean-POS-Tagger-LSTM" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/trevor-richardson/rnn_zoo" + framework: FRAMEWORK_PYTORCH + number_of_stars: 9 + description: "This repository tests various recurrent neural network architectures on baseline datasets SeqMNIST and pMNIST." + } + repositories: { + url: "https://github.com/Avmb/lowrank-gru" + framework: FRAMEWORK_OTHERS + number_of_stars: 34 + description: "Gated Recurrent Unit with Low-rank matrix factorization" + } + repositories: { + url: "https://github.com/mp2893/gram" + framework: FRAMEWORK_OTHERS + number_of_stars: 197 + description: "Graph-based Attention Model" + } + repositories: { + url: "https://github.com/farizrahman4u/seq2seq" + framework: FRAMEWORK_OTHERS + number_of_stars: 3077 + description: "Sequence to Sequence Learning with Keras" + } + repositories: { + url: "https://github.com/littleflow3r/Sequence_to_sequence_learning_for_machine_translation" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + description: "Pytorch implementation of several seq2seq models (Machine translation task, Japanese-English)" + } + methods: { + name: "GRU" + full_name: "Gated Recurrent Unit" + description: "A **Gated Recurrent Unit**, or **GRU**, is a type of recurrent neural network. It is similar to an [LSTM](https://paperswithcode.com/method/lstm), but only has two gates - a reset gate and an update gate - and notably lacks an output gate. Fewer parameters means GRUs are generally easier/faster to train than their LSTM counterparts.\r\n\r\nImage Source: [here](https://www.google.com/url?sa=i&url=https%3A%2F%2Fcommons.wikimedia.org%2Fwiki%2FFile%3AGated_Recurrent_Unit%2C_type_1.svg&psig=AOvVaw3EmNX8QXC5hvyxeenmJIUn&ust=1590332062671000&source=images&cd=vfe&ved=0CA0QjhxqFwoTCMiev9-eyukCFQAAAAAdAAAAABAR)" + } + } + video: { + video_id: "_Dp8u97_rQ0" + video_title: "PR-003:Learning phrase representations using RNN encoder-decoder for statistical machine translation" + number_of_likes: 34 + number_of_views: 6321 + published_date: { + seconds: 1495764575 + } + uploader: "keun bong Kwak" + } + } +} +pr_id_to_video: { + key: 4 + value: { + pr_id: 4 + papers: { + paper_id: "image-super-resolution-using-deep" + title: "Image Super-Resolution Using Deep Convolutional Networks" + arxiv_id: "1501.00092" + abstract: "We propose a deep learning method for single image super-resolution (SR). Our\nmethod directly learns an end-to-end mapping between the low/high-resolution\nimages. The mapping is represented as a deep convolutional neural network (CNN)\nthat takes the low-resolution image as the input and outputs the\nhigh-resolution one. We further show that traditional sparse-coding-based SR\nmethods can also be viewed as a deep convolutional network. But unlike\ntraditional methods that handle each component separately, our method jointly\noptimizes all layers. Our deep CNN has a lightweight structure, yet\ndemonstrates state-of-the-art restoration quality, and achieves fast speed for\npractical on-line usage. We explore different network structures and parameter\nsettings to achieve trade-offs between performance and speed. Moreover, we\nextend our network to cope with three color channels simultaneously, and show\nbetter overall reconstruction quality." + pub_date: { + seconds: 1419984000 + } + authors: "Chao Dong" + authors: "Chen Change Loy" + authors: "Kaiming He" + authors: "Xiaoou Tang" + repositories: { + url: "https://github.com/aba450/Super-Resolution" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/shreeyashyende/better_img_res_with_SRCNN" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } + repositories: { + url: "https://github.com/mukul1093/Image-Super-Resolution" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/jaivanti/Super-Resolution-using-ConvNet" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "Reconstructing a high resolution photo-realistic image from its counterpart low resolution image has been a long challenging task in the fraternity of computer vision. This task becomes even more difficult when all you have is a single low resolution image as input to recreate its high resolution image. This can be done using Convolution Neural Networks." + } + repositories: { + url: "https://github.com/Amritha16/ImageResolutionEnhancement" + framework: FRAMEWORK_OTHERS + description: "A python implementation of https://arxiv.org/pdf/1501.00092.pdf" + } + repositories: { + url: "https://github.com/amzamzamzamz/nagadomi-waifu2x" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/vpaliwal1/Deep_learning_SRCNN" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/xgd/waifu2xx" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/Weifeng73/Zero-Shot-Super-resolution" + framework: FRAMEWORK_OTHERS + description: "Computer Vision Course 2019 Final Project in ZJU " + } + repositories: { + url: "https://github.com/ferseiti/reproducibility" + framework: FRAMEWORK_TENSORFLOW + } + } + video: { + video_id: "1jGr_OFyfa0" + video_title: "PR-004: Image Super-Resolution Using Deep Convolutional Networks" + number_of_likes: 64 + number_of_views: 9823 + published_date: { + seconds: 1492956744 + } + uploader: "Taegyun Jeon" + } + } +} +pr_id_to_video: { + key: 5 + value: { + pr_id: 5 + papers: { + paper_id: "playing-atari-with-deep-reinforcement" + title: "Playing Atari with Deep Reinforcement Learning" + arxiv_id: "1312.5602" + abstract: "We present the first deep learning model to successfully learn control\npolicies directly from high-dimensional sensory input using reinforcement\nlearning. The model is a convolutional neural network, trained with a variant\nof Q-learning, whose input is raw pixels and whose output is a value function\nestimating future rewards. We apply our method to seven Atari 2600 games from\nthe Arcade Learning Environment, with no adjustment of the architecture or\nlearning algorithm. We find that it outperforms all previous approaches on six\nof the games and surpasses a human expert on three of them." + pub_date: { + seconds: 1387411200 + } + authors: "Volodymyr Mnih" + authors: "Koray Kavukcuoglu" + authors: "David Silver" + authors: "Alex Graves" + authors: "Ioannis Antonoglou" + authors: "Daan Wierstra" + authors: "Martin Riedmiller" + repositories: { + url: "https://github.com/datamllab/rlcard" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1184 + description: "Reinforcement Learning / AI Bots in Card (Poker) Games - Blackjack, Leduc, Texas, DouDizhu, Mahjong, UNO." + } + repositories: { + url: "https://github.com/TheFebrin/DeepRL-Pong" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Deep Reinforcement Learning bot playing Pong game." + } + repositories: { + url: "https://github.com/rikluost/RL_DQN_Pong" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Tackling Atari 2600 game Pong with Reinforcement Learning by utilizing DQN and TF-Agents" + } + repositories: { + url: "https://github.com/gordicaleksa/pytorch-learn-reinforcement-learning" + framework: FRAMEWORK_PYTORCH + number_of_stars: 70 + description: "A collection of various RL algorithms like policy gradients, DQN and PPO. The goal of this repo will be to make it a go-to resource for learning about RL. How to visualize, debug and solve RL problems. I've additionally included playground.py for learning more about OpenAI gym, etc." + } + repositories: { + url: "https://github.com/Curt-Park/rainbow-is-all-you-need" + framework: FRAMEWORK_OTHERS + number_of_stars: 1015 + description: "Rainbow is all you need! A step-by-step tutorial from DQN to Rainbow" + } + repositories: { + url: "https://github.com/epignatelli/human-level-control-through-deep-reinforcement-learning" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "A jax/stax implementation of: Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A.A., Veness, J., Bellemare, M.G., Graves, A., Riedmiller, M., Fidjeland, A.K., Ostrovski, G. and Petersen, S., 2015. Human-level control through deep reinforcement learning. nature, 518(7540), pp.529-533." + } + repositories: { + url: "https://github.com/rishavb123/MineRL" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + } + repositories: { + url: "https://github.com/eddynelson/dqn" + framework: FRAMEWORK_TENSORFLOW + description: "Deep Q-Networks Implementation with tensorflow 2.x" + } + repositories: { + url: "https://github.com/ktkachuk/Atari-with-Q-Learning" + framework: FRAMEWORK_TENSORFLOW + description: "This notebook shows and explains the implementation of a Reinforcement Learning agent which plays the Atari game Breakout. The agent was trained with Q-Learning." + } + repositories: { + url: "https://github.com/lab-ml/nn" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3070 + description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "DQN" + full_name: "Deep Q-Network" + description: "A **DQN**, or Deep Q-Network, approximates a state-value function in a [Q-Learning](https://paperswithcode.com/method/q-learning) framework with a neural network. In the Atari Games case, they take in several frames of the game as an input and output state values for each action as an output. \r\n\r\nIt is usually used in conjunction with Experience Replay, for storing the episode steps in memory for off-policy learning, where samples are drawn from the replay memory at random. Additionally, the Q-Network is usually optimized towards a frozen target network that is periodically updated with the latest weights every $k$ steps (where $k$ is a hyperparameter). The latter makes training more stable by preventing short-term oscillations from a moving target. The former tackles autocorrelation that would occur from on-line learning, and having a replay memory makes the problem more like a supervised learning problem.\r\n\r\nImage Source: [here](https://www.researchgate.net/publication/319643003_Autonomous_Quadrotor_Landing_using_Deep_Reinforcement_Learning)" + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Q-Learning" + full_name: "Q-Learning" + description: "**Q-Learning** is an off-policy temporal difference control algorithm:\r\n\r\n$$Q\\left(S\\_{t}, A\\_{t}\\right) \\leftarrow Q\\left(S\\_{t}, A\\_{t}\\right) + \\alpha\\left[R_{t+1} + \\gamma\\max\\_{a}Q\\left(S\\_{t+1}, a\\right) - Q\\left(S\\_{t}, A\\_{t}\\right)\\right] $$\r\n\r\nThe learned action-value function $Q$ directly approximates $q\\_{*}$, the optimal action-value function, independent of the policy being followed.\r\n\r\nSource: Sutton and Barto, Reinforcement Learning, 2nd Edition" + } + methods: { + name: "Epsilon Greedy Exploration" + full_name: "Epsilon Greedy Exploration" + description: "**$\\epsilon$-Greedy Exploration** is an exploration strategy in reinforcement learning that takes an exploratory action with probability $\\epsilon$ and a greedy action with probability $1-\\epsilon$. It tackles the exploration-exploitation tradeoff with reinforcement learning algorithms: the desire to explore the state space with the desire to seek an optimal policy. Despite its simplicity, it is still commonly used as an behaviour policy $\\pi$ in several state-of-the-art reinforcement learning models.\r\n\r\nImage Credit: [Robin van Embden](https://cran.r-project.org/web/packages/contextual/vignettes/sutton_barto.html)" + } + methods: { + name: "Experience Replay" + full_name: "Experience Replay" + description: "**Experience Replay** is a replay memory technique used in reinforcement learning where we store the agent’s experiences at each time-step, $e\\_{t} = \\left(s\\_{t}, a\\_{t}, r\\_{t}, s\\_{t+1}\\right)$ in a data-set $D = e\\_{1}, \\cdots, e\\_{N}$ , pooled over many episodes into a replay memory. We then usually sample the memory randomly for a minibatch of experience, and use this to learn off-policy, as with Deep Q-Networks. This tackles the problem of autocorrelation leading to unstable training, by making the problem more like a supervised learning problem.\r\n\r\nImage Credit: [Hands-On Reinforcement Learning with Python, Sudharsan Ravichandiran](https://subscription.packtpub.com/book/big_data_and_business_intelligence/9781788836524)" + } + } + video: { + video_id: "V7_cNTfm2i8" + video_title: "PR-005: Playing Atari with Deep Reinforcement Learning (NIPS 2013 Deep Learning Workshop)" + number_of_likes: 53 + number_of_views: 8186 + published_date: { + seconds: 1494165820 + } + uploader: "Sung Kim" + } + } +} +pr_id_to_video: { + key: 6 + value: { + pr_id: 6 + papers: { + paper_id: "neural-turing-machines" + title: "Neural Turing Machines" + arxiv_id: "1410.5401" + abstract: "We extend the capabilities of neural networks by coupling them to external\nmemory resources, which they can interact with by attentional processes. The\ncombined system is analogous to a Turing Machine or Von Neumann architecture\nbut is differentiable end-to-end, allowing it to be efficiently trained with\ngradient descent. Preliminary results demonstrate that Neural Turing Machines\ncan infer simple algorithms such as copying, sorting, and associative recall\nfrom input and output examples." + pub_date: { + seconds: 1413763200 + } + authors: "Alex Graves" + authors: "Greg Wayne" + authors: "Ivo Danihelka" + repositories: { + url: "https://github.com/dgedon/lightning-ntm" + framework: FRAMEWORK_PYTORCH + description: "PyTorch Lightning implementation of Neural Turing Machine (NTM)." + } + repositories: { + url: "https://github.com/theneuralbeing/ntm" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "A PyTorch Implementation of Neural Turing Machine" + } + repositories: { + url: "https://github.com/mdabagia/NeuralTuringMachine" + framework: FRAMEWORK_PYTORCH + description: "PyTorch implementation of the neural Turing machine architecture" + } + repositories: { + url: "https://github.com/rs9000/Neural-Turing-machine" + framework: FRAMEWORK_PYTORCH + description: "NTM in PyTorch" + } + repositories: { + url: "https://github.com/shanyaanand/ntm" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/camigord/Neural-Turing-Machine" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 22 + description: "TensorFlow implementation of a Neural Turing Machine" + } + repositories: { + url: "https://github.com/loudinthecloud/pytorch-ntm" + framework: FRAMEWORK_PYTORCH + number_of_stars: 468 + description: "Neural Turing Machines (NTM) - PyTorch Implementation" + } + repositories: { + url: "https://github.com/adityagilra/archibrain" + framework: FRAMEWORK_PYTORCH + number_of_stars: 9 + description: "Synthesize bio-plausible neural networks for cognitive tasks, mimicking brain architecture" + } + repositories: { + url: "https://github.com/MarkPKCollier/NeuralTuringMachine" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 560 + description: "Tensorflow implementation of a Neural Turing Machine" + } + repositories: { + url: "https://github.com/jingweiz/pytorch-dnc" + framework: FRAMEWORK_PYTORCH + number_of_stars: 266 + description: "Neural Turing Machine (NTM) & Differentiable Neural Computer (DNC) with pytorch & visdom" + } + methods: { + name: "Content-based Attention" + full_name: "Content-based Attention" + description: "**Content-based attention** is an attention mechanism based on cosine similarity:\r\n\r\n$$f_{att}\\left(\\textbf{h}_{i}, \\textbf{s}\\_{j}\\right) = \\cos\\left[\\textbf{h}\\_{i};\\textbf{s}\\_{j}\\right] $$\r\n\r\nIt was utilised in [Neural Turing Machines](https://paperswithcode.com/method/neural-turing-machine) as part of the Addressing Mechanism.\r\n\r\nWe produce a normalized attention weighting by taking a softmax over these attention alignment scores." + } + methods: { + name: "Tanh Activation" + full_name: "Tanh Activation" + description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" + } + methods: { + name: "LSTM" + full_name: "Long Short-Term Memory" + description: "An **LSTM** is a type of [recurrent neural network](https://paperswithcode.com/methods/category/recurrent-neural-networks) that addresses the vanishing gradient problem in vanilla RNNs through additional cells, input and output gates. Intuitively, vanishing gradients are solved through additional *additive* components, and forget gate activations, that allow the gradients to flow through the network without vanishing as quickly.\r\n\r\n(Image Source [here](https://medium.com/datadriveninvestor/how-do-lstm-networks-solve-the-problem-of-vanishing-gradients-a6784971a577))\r\n\r\n(Introduced by Hochreiter and Schmidhuber)" + } + methods: { + name: "Neural Turing Machine" + full_name: "Neural Turing Machine" + description: "A **Neural Turing Machine** is a working memory neural network model. It couples a neural network architecture with external memory resources. The whole architecture is differentiable end-to-end with gradient descent. The models can infer tasks such as copying, sorting and associative recall.\r\n\r\nA Neural Turing Machine (NTM) architecture contains two basic components: a neural\r\nnetwork controller and a memory bank. The Figure presents a high-level diagram of the NTM\r\narchitecture. Like most neural networks, the controller interacts with the external world via\r\ninput and output vectors. Unlike a standard network, it also interacts with a memory matrix\r\nusing selective read and write operations. By analogy to the Turing machine we refer to the\r\nnetwork outputs that parameterise these operations as “heads.”\r\n\r\nEvery component of the architecture is differentiable. This is achieved by defining 'blurry' read and write operations that interact to a greater or lesser degree with all the elements in memory (rather\r\nthan addressing a single element, as in a normal Turing machine or digital computer). The\r\ndegree of blurriness is determined by an attentional “focus” mechanism that constrains each\r\nread and write operation to interact with a small portion of the memory, while ignoring the\r\nrest. Because interaction with the memory is highly sparse, the NTM is biased towards\r\nstoring data without interference. The memory location brought into attentional focus is\r\ndetermined by specialised outputs emitted by the heads. These outputs define a normalised\r\nweighting over the rows in the memory matrix (referred to as memory “locations”). Each\r\nweighting, one per read or write head, defines the degree to which the head reads or writes\r\nat each location. A head can thereby attend sharply to the memory at a single location or\r\nweakly to the memory at many locations" + } + methods: { + name: "Sigmoid Activation" + full_name: "Sigmoid Activation" + description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." + } + } + video: { + video_id: "2wbDiZCWQtY" + video_title: "PR-006: Neural Turing Machine" + number_of_likes: 41 + number_of_views: 5055 + published_date: { + seconds: 1494447474 + } + uploader: "Suh Kiho" + } + } +} +pr_id_to_video: { + key: 7 + value: { + pr_id: 7 + papers: { + paper_id: "deep-photo-style-transfer" + title: "Deep Photo Style Transfer" + arxiv_id: "1703.07511" + abstract: "This paper introduces a deep-learning approach to photographic style transfer\nthat handles a large variety of image content while faithfully transferring the\nreference style. Our approach builds upon the recent work on painterly transfer\nthat separates style from the content of an image by considering different\nlayers of a neural network. However, as is, this approach is not suitable for\nphotorealistic style transfer. Even when both the input and reference images\nare photographs, the output still exhibits distortions reminiscent of a\npainting. Our contribution is to constrain the transformation from the input to\nthe output to be locally affine in colorspace, and to express this constraint\nas a custom fully differentiable energy term. We show that this approach\nsuccessfully suppresses distortion and yields satisfying photorealistic style\ntransfers in a broad variety of scenarios, including transfer of the time of\nday, weather, season, and artistic edits." + pub_date: { + seconds: 1490140800 + } + authors: "Fujun Luan" + authors: "Sylvain Paris" + authors: "Eli Shechtman" + authors: "Kavita Bala" + repositories: { + url: "https://github.com/YooJiHyeong/SinIR" + framework: FRAMEWORK_PYTORCH + number_of_stars: 26 + description: "Official implementation of \"SinIR: Efficient General Image Manipulation with Single Image Reconstruction\" (ICML 2021)" + } + repositories: { + url: "https://github.com/EvanLi/Github-Ranking" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 457 + description: ":star:Github Ranking:star: Github stars and forks ranking list. Github Top100 stars list of different languages. Automatically update daily. | Github仓库排名,每日自动更新" + } + repositories: { + url: "https://github.com/EvanLi/github-most-stars-forks" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 457 + description: ":star:Github Ranking:star: Github stars and forks ranking list. Github Top100 stars list of different languages. Automatically update daily. | Github仓库排名,每日自动更新" + } + repositories: { + url: "https://github.com/LouieYang/deep-photo-styletransfer-tf" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 772 + description: "Tensorflow (Python API) implementation of Deep Photo Style Transfer" + } + repositories: { + url: "https://github.com/fatihky/starred" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5 + } + repositories: { + url: "https://github.com/alexanderivanov2424/CSCI-1430-Final-Project" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + } + repositories: { + url: "https://github.com/ucsd-dsc-arts/dsc160-final-dsc160_final_group4" + framework: FRAMEWORK_TENSORFLOW + description: "dsc160-final-dsc160_final_group4 created by GitHub Classroom" + } + repositories: { + url: "https://github.com/ritesh2212/DeepPhotoStyle_pytorch-master" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/johnsun03/myTest" + framework: FRAMEWORK_OTHERS + description: "one test" + } + repositories: { + url: "https://github.com/muriloime/awesome-stars" + framework: FRAMEWORK_TENSORFLOW + } + } + video: { + video_id: "YF6nLVDlznE" + video_title: "PR-007: Deep Photo Style Transfer" + number_of_likes: 29 + number_of_views: 5720 + published_date: { + seconds: 1494826006 + } + uploader: "Sung Kim" + } + } +} +pr_id_to_video: { + key: 8 + value: { + pr_id: 8 + papers: { + paper_id: "reverse-classification-accuracy-predicting" + title: "Reverse Classification Accuracy: Predicting Segmentation Performance in the Absence of Ground Truth" + arxiv_id: "1702.03407" + abstract: "When integrating computational tools such as automatic segmentation into\nclinical practice, it is of utmost importance to be able to assess the level of\naccuracy on new data, and in particular, to detect when an automatic method\nfails. However, this is difficult to achieve due to absence of ground truth.\nSegmentation accuracy on clinical data might be different from what is found\nthrough cross-validation because validation data is often used during\nincremental method development, which can lead to overfitting and unrealistic\nperformance expectations. Before deployment, performance is quantified using\ndifferent metrics, for which the predicted segmentation is compared to a\nreference segmentation, often obtained manually by an expert. But little is\nknown about the real performance after deployment when a reference is\nunavailable. In this paper, we introduce the concept of reverse classification\naccuracy (RCA) as a framework for predicting the performance of a segmentation\nmethod on new data. In RCA we take the predicted segmentation from a new image\nto train a reverse classifier which is evaluated on a set of reference images\nwith available ground truth. The hypothesis is that if the predicted\nsegmentation is of good quality, then the reverse classifier will perform well\non at least some of the reference images. We validate our approach on\nmulti-organ segmentation with different classifiers and segmentation methods.\nOur results indicate that it is indeed possible to predict the quality of\nindividual segmentations, in the absence of ground truth. Thus, RCA is ideal\nfor integration into automatic processing pipelines in clinical routine and as\npart of large-scale image analysis studies." + pub_date: { + seconds: 1486771200 + } + authors: "Vanya V. Valindria" + authors: "Ioannis Lavdas" + authors: "Wenjia Bai" + authors: "Konstantinos Kamnitsas" + authors: "Eric O. Aboagye" + authors: "Andrea G. Rockall" + authors: "Daniel Rueckert" + authors: "Ben Glocker" + } + video: { + video_id: "jbnjzyJDldA" + } + } +} +pr_id_to_video: { + key: 9 + value: { + pr_id: 9 + papers: { + paper_id: "distilling-the-knowledge-in-a-neural-network" + title: "Distilling the Knowledge in a Neural Network" + arxiv_id: "1503.02531" + abstract: "A very simple way to improve the performance of almost any machine learning\nalgorithm is to train many different models on the same data and then to\naverage their predictions. Unfortunately, making predictions using a whole\nensemble of models is cumbersome and may be too computationally expensive to\nallow deployment to a large number of users, especially if the individual\nmodels are large neural nets. Caruana and his collaborators have shown that it\nis possible to compress the knowledge in an ensemble into a single model which\nis much easier to deploy and we develop this approach further using a different\ncompression technique. We achieve some surprising results on MNIST and we show\nthat we can significantly improve the acoustic model of a heavily used\ncommercial system by distilling the knowledge in an ensemble of models into a\nsingle model. We also introduce a new type of ensemble composed of one or more\nfull models and many specialist models which learn to distinguish fine-grained\nclasses that the full models confuse. Unlike a mixture of experts, these\nspecialist models can be trained rapidly and in parallel." + pub_date: { + seconds: 1425859200 + } + authors: "Geoffrey Hinton" + authors: "Oriol Vinyals" + authors: "Jeff Dean" + repositories: { + url: "https://github.com/JunzWu/Distilling-the-Knowledge-in-a-Neural-Network" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + } + repositories: { + url: "https://github.com/jaychoi12/LG_KD" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "LG paper review QnA session - Knowledge Distillation" + } + repositories: { + url: "https://github.com/yoshitomo-matsubara/torchdistill" + framework: FRAMEWORK_PYTORCH + number_of_stars: 311 + description: "PyTorch-based modular, configuration-driven framework for knowledge distillation. 🏆18 methods presented at CVPR, ICLR, ECCV, NeurIPS, ICCV, etc are implemented so far. 🎁 Trained models, training logs and configurations are available for ensuring the reproducibiliy." + } + repositories: { + url: "https://github.com/franknb/Text-Summarization" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "An experimental repo for testing effective text summarization tools." + } + repositories: { + url: "https://github.com/TakieddineSOUALHI/Transfer_learning" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } + repositories: { + url: "https://github.com/millenialSpirou/ift6010" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/MasLiang/Learning-without-Forgetting-using-Pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 8 + description: "This is the Pytorch implementation of LwF" + } + repositories: { + url: "https://github.com/KaiyuYue/mgd" + framework: FRAMEWORK_PYTORCH + number_of_stars: 37 + description: "Matching Guided Distillation (ECCV 2020)" + } + repositories: { + url: "https://github.com/see--/speech_recognition" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 19 + description: "This repo contains my part of the code for our winning entry in the TensorFlow Speech Recognition Challenge hosted by kaggle" + } + repositories: { + url: "https://github.com/jpmcd/TensorFlow-KnowledgeDistillation" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 3 + description: "Knowledge Distillation with CIFAR10" + } + } + video: { + video_id: "tOItokBZSfU" + video_title: "PR-009: Distilling the Knowledge in a Neural Network (Slide: English, Speaking: Korean)" + number_of_likes: 43 + number_of_views: 6456 + published_date: { + seconds: 1495514577 + } + uploader: "Choung young jae" + } + } +} +pr_id_to_video: { + key: 10 + value: { + pr_id: 10 + papers: { + paper_id: "auto-encoding-variational-bayes" + title: "Auto-Encoding Variational Bayes" + arxiv_id: "1312.6114" + abstract: "How can we perform efficient inference and learning in directed probabilistic\nmodels, in the presence of continuous latent variables with intractable\nposterior distributions, and large datasets? We introduce a stochastic\nvariational inference and learning algorithm that scales to large datasets and,\nunder some mild differentiability conditions, even works in the intractable\ncase. Our contributions is two-fold. First, we show that a reparameterization\nof the variational lower bound yields a lower bound estimator that can be\nstraightforwardly optimized using standard stochastic gradient methods. Second,\nwe show that for i.i.d. datasets with continuous latent variables per\ndatapoint, posterior inference can be made especially efficient by fitting an\napproximate inference model (also called a recognition model) to the\nintractable posterior using the proposed lower bound estimator. Theoretical\nadvantages are reflected in experimental results." + pub_date: { + seconds: 1387497600 + } + authors: "Diederik P Kingma" + authors: "Max Welling" + repositories: { + url: "https://github.com/ngiann/ApproximateVI.jl" + framework: FRAMEWORK_OTHERS + description: "Approximate variational inference in Julia" + } + repositories: { + url: "https://github.com/nghorbani/human_body_prior" + framework: FRAMEWORK_PYTORCH + number_of_stars: 291 + description: "VPoser: Variational Human Pose Prior" + } + repositories: { + url: "https://github.com/lanzhang128/disentanglement" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 3 + } + repositories: { + url: "https://github.com/carbonati/variational-zoo" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 15 + description: "Variational inference and disentangled representations through unsupervised learning" + } + repositories: { + url: "https://github.com/tonystevenj/vae-celeba-pytorch-lightning" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Valinna VAE implemented in pytorch-lightning, trained through Celeba dataset" + } + repositories: { + url: "https://github.com/leokster/CVAE" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/selimseker/logogram-language-generator" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + } + repositories: { + url: "https://github.com/shinshoji01/Style-Restricted_GAN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + description: "This repository is to introduce our model, Style-Restricted GAN." + } + repositories: { + url: "https://github.com/EugenHotaj/pytorch-generative/blob/master/pytorch_generative/models/vae/vae.py" + framework: FRAMEWORK_PYTORCH + number_of_stars: 144 + description: "Easy generative modeling in PyTorch." + } + repositories: { + url: "https://github.com/chandu-97/BayesByBackprop" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "My implementation of Bayes by Backprop(MLP)" + } + methods: { + name: "VAE" + full_name: "Variational Autoencoder" + description: "A **Variational Autoencoder** is a type of likelihood-based generative model. It consists of an encoder, that takes in data $x$ as input and transforms this into a latent representation $z$, and a decoder, that takes a latent representation $z$ and returns a reconstruction $\\hat{x}$. Inference is performed via variational inference to approximate the posterior of the model." + } + methods: { + name: "Stochastic Gradient Variational Bayes" + full_name: "Stochastic Gradient Variational Bayes" + } + } + video: { + video_id: "KYA-GEhObIs" + video_title: "PR-010: Auto-Encoding Variational Bayes, ICLR 2014" + number_of_likes: 203 + number_of_views: 12147 + published_date: { + seconds: 1495549847 + } + uploader: "차준범" + } + } +} +pr_id_to_video: { + key: 11 + value: { + pr_id: 11 + papers: { + paper_id: "spatial-transformer-networks" + title: "Spatial Transformer Networks" + arxiv_id: "1506.02025" + abstract: "Convolutional Neural Networks define an exceptionally powerful class of\nmodels, but are still limited by the lack of ability to be spatially invariant\nto the input data in a computationally and parameter efficient manner. In this\nwork we introduce a new learnable module, the Spatial Transformer, which\nexplicitly allows the spatial manipulation of data within the network. This\ndifferentiable module can be inserted into existing convolutional\narchitectures, giving neural networks the ability to actively spatially\ntransform feature maps, conditional on the feature map itself, without any\nextra training supervision or modification to the optimisation process. We show\nthat the use of spatial transformers results in models which learn invariance\nto translation, scale, rotation and more generic warping, resulting in\nstate-of-the-art performance on several benchmarks, and for a number of classes\nof transformations." + pub_date: { + seconds: 1433462400 + } + authors: "Max Jaderberg" + authors: "Karen Simonyan" + authors: "Andrew Zisserman" + authors: "Koray Kavukcuoglu" + repositories: { + url: "https://github.com/dabane-ghassan/int-lab-book" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "\"Foveated Spatial Transformers\", benchmarking Spatial Transformer Networks against a bio-inspired artificial vision model." + } + repositories: { + url: "https://github.com/vinod377/STN-OCR" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Implementation of \"STN-OCR: A single Neural Network for Text Detection and Text Recognition\" in natural Scenes by Christian Bartz." + } + repositories: { + url: "https://github.com/sayakpaul/Spatial-Transformer-Networks-with-Keras" + framework: FRAMEWORK_OTHERS + number_of_stars: 15 + description: "This repository provides a Colab Notebook that shows how to use Spatial Transformer Networks inside CNNs build in Keras." + } + repositories: { + url: "https://github.com/TencentYoutuResearch/SelfSupervisedLearning-DSM" + framework: FRAMEWORK_PYTORCH + number_of_stars: 22 + description: "code for AAAI21 paper \"Enhancing Unsupervised Video Representation Learning by Decoupling the Scene and the Motion“" + } + repositories: { + url: "https://github.com/dedhiaparth98/spatial-transformer-network" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 6 + description: "Spatial Transformer Network (STN) provides attention to a particular region to in an image, by doing transformation to the input image. The code in this repository does Affine transformation to image, but other transformation can be explored." + } + repositories: { + url: "https://github.com/chenwuperth/rgz_rcnn" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 36 + description: "ClaRAN: A deep learning classifier for radio morphologies" + } + repositories: { + url: "https://github.com/FingerRec/DSM" + framework: FRAMEWORK_OTHERS + number_of_stars: 43 + description: "[AAAI2021] The source code for our paper 《Enhancing Unsupervised Video Representation Learning by Decoupling the Scene and the Motion》." + } + repositories: { + url: "https://github.com/tianyu-tristan/Visual-Attention-Model" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 41 + } + repositories: { + url: "https://github.com/elisiojsj/Kuzushiji-49" + framework: FRAMEWORK_PYTORCH + description: "Classifier for Kuzushiji (Japanese calligraphy) characters." + } + repositories: { + url: "https://github.com/Mugilvanan/stnbhwd" + framework: FRAMEWORK_OTHERS + } + methods: { + name: "Spatial Transformer" + full_name: "Spatial Transformer" + description: "A **Spatial Transformer** is an image model block that explicitly allows the spatial manipulation of data within a convolutional neural network. It gives CNNs the ability to actively spatially transform feature maps, conditional on the feature map itself, without any extra training supervision or modification to the optimisation process. Unlike pooling layers, where the receptive fields are fixed and local, the spatial transformer module is a dynamic mechanism that can actively spatially transform an image (or a feature map) by producing an appropriate transformation for each input sample. The transformation is then performed on the entire feature map (non-locally) and can include scaling, cropping, rotations, as well as non-rigid deformations.\r\n\r\nThe architecture is shown in the Figure to the right. The input feature map $U$ is passed to a localisation network which regresses the transformation parameters $\\theta$. The regular spatial grid $G$ over $V$ is transformed to the sampling grid $T\\_{\\theta}\\left(G\\right)$, which is applied to $U$, producing the warped output feature map $V$. The combination of the localisation network and sampling mechanism defines a spatial transformer." + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "SGD" + full_name: "Stochastic Gradient Descent" + description: "**Stochastic Gradient Descent** is an iterative optimization technique that uses minibatches of data to form an expectation of the gradient, rather than the full gradient using all available data. That is for weights $w$ and a loss function $L$ we have:\r\n\r\n$$ w\\_{t+1} = w\\_{t} - \\eta\\hat{\\nabla}\\_{w}{L(w\\_{t})} $$\r\n\r\nWhere $\\eta$ is a learning rate. SGD reduces redundancy compared to batch gradient descent - which recomputes gradients for similar examples before each parameter update - so it is usually much faster.\r\n\r\n(Image Source: [here](http://rasbt.github.io/mlxtend/user_guide/general_concepts/gradient-optimization/))" + } + } + video: { + video_id: "Rv3osRZWGbg" + video_title: "PR-011: Spatial Transformer Networks" + number_of_likes: 45 + number_of_views: 5443 + published_date: { + seconds: 1495978512 + } + uploader: "Jiyang Kang" + } + } +} +pr_id_to_video: { + key: 12 + value: { + pr_id: 12 + papers: { + paper_id: "faster-r-cnn-towards-real-time-object" + title: "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" + arxiv_id: "1506.01497" + abstract: "State-of-the-art object detection networks depend on region proposal\nalgorithms to hypothesize object locations. Advances like SPPnet and Fast R-CNN\nhave reduced the running time of these detection networks, exposing region\nproposal computation as a bottleneck. In this work, we introduce a Region\nProposal Network (RPN) that shares full-image convolutional features with the\ndetection network, thus enabling nearly cost-free region proposals. An RPN is a\nfully convolutional network that simultaneously predicts object bounds and\nobjectness scores at each position. The RPN is trained end-to-end to generate\nhigh-quality region proposals, which are used by Fast R-CNN for detection. We\nfurther merge RPN and Fast R-CNN into a single network by sharing their\nconvolutional features---using the recently popular terminology of neural\nnetworks with 'attention' mechanisms, the RPN component tells the unified\nnetwork where to look. For the very deep VGG-16 model, our detection system has\na frame rate of 5fps (including all steps) on a GPU, while achieving\nstate-of-the-art object detection accuracy on PASCAL VOC 2007, 2012, and MS\nCOCO datasets with only 300 proposals per image. In ILSVRC and COCO 2015\ncompetitions, Faster R-CNN and RPN are the foundations of the 1st-place winning\nentries in several tracks. Code has been made publicly available." + pub_date: { + seconds: 1433376000 + } + authors: "Shaoqing Ren" + authors: "Kaiming He" + authors: "Ross Girshick" + authors: "Jian Sun" + repositories: { + url: "https://github.com/miaohua1982/simple_fasterrcnn_pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/JeffCHEN2017/WSSTG" + framework: FRAMEWORK_PYTORCH + number_of_stars: 40 + description: "This repository contains the main baselines introduced in WSSTG (ACL 2019)." + } + repositories: { + url: "https://github.com/VDIGPKU/OPANAS" + framework: FRAMEWORK_PYTORCH + number_of_stars: 14 + description: "The official code for OPANAS: One-Shot Path Aggregation Network Architecture Search for Object Detection" + } + repositories: { + url: "https://github.com/KostadinovShalon/UAVDetectionTrackingBenchmark" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + } + repositories: { + url: "https://github.com/potterhsu/easy-faster-rcnn.pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 147 + description: "An easy implementation of Faster R-CNN (https://arxiv.org/pdf/1506.01497.pdf) in PyTorch." + } + repositories: { + url: "https://github.com/zhudelong/elevator_button_recognition" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 16 + description: "Button recognition for autonomous elevator operation" + } + repositories: { + url: "https://github.com/EmGarr/kerod" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 34 + description: "DETR - Faster RCNN implementation in tensorflow 2" + } + repositories: { + url: "https://github.com/liangheming/faster_rcnnv1" + framework: FRAMEWORK_PYTORCH + number_of_stars: 9 + description: "pytorch implement of fasterRCNN,736px(max side),39.4mAP(COCO),30.21fps(RTX 2080TI)" + } + repositories: { + url: "https://github.com/chenwuperth/ClaRAN" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 3 + description: "ClaRAN - Classifying Radio Galaxies Automatically with Neural Networks" + } + repositories: { + url: "https://github.com/AlphaJia/pytorch-faster-rcnn" + framework: FRAMEWORK_PYTORCH + number_of_stars: 292 + description: "pytorch based implementation faster rcnn" + } + methods: { + name: "RPN" + full_name: "Region Proposal Network" + description: "A **Region Proposal Network**, or **RPN**, is a fully convolutional network that simultaneously predicts object bounds and objectness scores at each position. The RPN is trained end-to-end to generate high-quality region proposals. RPN and algorithms like Fast R-CNN can be merged into a single network by sharing their convolutional features - using the recently popular terminology of neural networks with attention mechanisms, the RPN component tells the unified network where to look.\r\n\r\nRPNs are designed to efficiently predict region proposals with a wide range of scales and aspect ratios. RPNs use anchor boxes that serve as references at multiple scales and aspect ratios. The scheme can be thought of as a pyramid of regression references, which avoids enumerating images or filters of multiple scales or aspect ratios." + } + methods: { + name: "Fast R-CNN" + full_name: "Fast R-CNN" + description: "**Fast R-CNN** is an object detection model that improves in its predecessor [R-CNN](https://paperswithcode.com/method/r-cnn) in a number of ways. Instead of extracting CNN features independently for each region of interest, Fast R-CNN aggregates them into a single forward pass over the image; i.e. regions of interest from the same image share computation and memory in the forward and backward passes." + } + methods: { + name: "RoIPool" + full_name: "RoIPool" + description: "**Region of Interest Pooling**, or **RoIPool**, is an operation for extracting a small feature map (e.g., $7×7$) from each RoI in detection and segmentation based tasks. Features are extracted from each candidate box, and thereafter in models like Fast R-CNN, are then classified and bounding box regression performed.\r\n\r\nThe actual scaling to, e.g., $7×7$, occurs by dividing the region proposal into equally sized sections, finding the largest value in each section, and then copying these max values to the output buffer. In essence, **RoIPool** is max pooling on a discrete grid based on a box.\r\n\r\nImage Source: [Joyce Xu](https://towardsdatascience.com/deep-learning-for-object-detection-a-comprehensive-review-73930816d8d9)" + } + methods: { + name: "VGG-16" + full_name: "VGG-16" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "Faster R-CNN" + full_name: "Faster R-CNN" + description: "**Faster R-CNN** is an object detection model that improves on [Fast R-CNN](https://paperswithcode.com/method/fast-r-cnn) by utilising a region proposal network ([RPN](https://paperswithcode.com/method/rpn)) with the CNN model. The RPN shares full-image convolutional features with the detection network, enabling nearly cost-free region proposals. It is a fully convolutional network that simultaneously predicts object bounds and objectness scores at each position. The RPN is trained end-to-end to generate high-quality region proposals, which are used by [Fast R-CNN](https://paperswithcode.com/method/fast-r-cnn) for detection. RPN and Fast R-CNN are merged into a single network by sharing their convolutional features: the RPN component tells the unified network where to look.\r\n\r\nAs a whole, Faster R-CNN consists of two modules. The first module is a deep fully convolutional network that proposes regions, and the second module is the Fast R-CNN detector that uses the proposed regions." + } + } + video: { + video_id: "kcPAGIgBGRs" + video_title: "PR-012: Faster R-CNN : Towards Real-Time Object Detection with Region Proposal Networks" + number_of_likes: 387 + number_of_views: 48675 + published_date: { + seconds: 1495981094 + } + uploader: "JinWon Lee" + } + } +} +pr_id_to_video: { + key: 13 + value: { + pr_id: 13 + papers: { + paper_id: "domain-adversarial-training-of-neural" + title: "Domain-Adversarial Training of Neural Networks" + arxiv_id: "1505.07818" + abstract: "We introduce a new representation learning approach for domain adaptation, in\nwhich data at training and test time come from similar but different\ndistributions. Our approach is directly inspired by the theory on domain\nadaptation suggesting that, for effective domain transfer to be achieved,\npredictions must be made based on features that cannot discriminate between the\ntraining (source) and test (target) domains. The approach implements this idea\nin the context of neural network architectures that are trained on labeled data\nfrom the source domain and unlabeled data from the target domain (no labeled\ntarget-domain data is necessary). As the training progresses, the approach\npromotes the emergence of features that are (i) discriminative for the main\nlearning task on the source domain and (ii) indiscriminate with respect to the\nshift between the domains. We show that this adaptation behaviour can be\nachieved in almost any feed-forward model by augmenting it with few standard\nlayers and a new gradient reversal layer. The resulting augmented architecture\ncan be trained using standard backpropagation and stochastic gradient descent,\nand can thus be implemented with little effort using any of the deep learning\npackages. We demonstrate the success of our approach for two distinct\nclassification problems (document sentiment analysis and image classification),\nwhere state-of-the-art domain adaptation performance on standard benchmarks is\nachieved. We also validate the approach for descriptor learning task in the\ncontext of person re-identification application." + pub_date: { + seconds: 1432771200 + } + authors: "Yaroslav Ganin" + authors: "Evgeniya Ustinova" + authors: "Hana Ajakan" + authors: "Pascal Germain" + authors: "Hugo Larochelle" + authors: "François Laviolette" + authors: "Mario Marchand" + authors: "Victor Lempitsky" + repositories: { + url: "https://github.com/criteo-research/pytorch-ada" + framework: FRAMEWORK_PYTORCH + number_of_stars: 53 + description: "Another Domain Adaptation library, aimed at researchers." + } + repositories: { + url: "https://github.com/rpryzant/proxy-a-distance" + framework: FRAMEWORK_OTHERS + number_of_stars: 29 + description: "Proxy A-Distance algorithm for measuring domain disparity in parallel corpora" + } + repositories: { + url: "https://github.com/JorisRoels/domain-adaptive-segmentation" + framework: FRAMEWORK_PYTORCH + number_of_stars: 15 + description: "Domain adaptation segmentation for volume EM imaging" + } + repositories: { + url: "https://github.com/facebookresearch/DomainBed" + framework: FRAMEWORK_PYTORCH + number_of_stars: 339 + description: "DomainBed is a suite to test domain generalization algorithms" + } + repositories: { + url: "https://github.com/monkey0head/Domain_Adaptation_thesis" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Source code for master thesis on Unsupervised Domain Adaptation for Image Processing " + } + repositories: { + url: "https://github.com/dv-fenix/Domain-Adaptation" + framework: FRAMEWORK_PYTORCH + description: "PyTorch implementations of some papers on Domain Adaptation" + } + repositories: { + url: "https://github.com/Nadavc220/DomainAdversarialTrainingOfNeuralNetworks" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + description: "This is a Pytorch implementation of the 2014 paper named Domain Adversarial Training of Neural Networks " + } + repositories: { + url: "https://github.com/asahi417/DeepDomainAdaptation" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 18 + description: "Tensorflow deep learning based domain adaptation model implementations with experiment of estimate MNIST by SVHN data (SVHN -> MNIST): DANN (domain-adversarial neural network), Deep JDOT (joint distribution optimal transportation)" + } + repositories: { + url: "https://github.com/ShichengChen/WaveNetSeparateAudio" + framework: FRAMEWORK_PYTORCH + number_of_stars: 44 + description: "WaveNet for the separation of audio sources" + } + repositories: { + url: "https://github.com/scpark20/universal-music-translation" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 26 + description: "A Universal Music Translation Network Implementation" + } + } + video: { + video_id: "n2J7giHrS-Y" + video_title: "PR-013: Domain Adversarial Training of Neural Network" + number_of_likes: 51 + number_of_views: 5880 + published_date: { + seconds: 1496675287 + } + uploader: "Jaejun Yoo" + } + } +} +pr_id_to_video: { + key: 14 + value: { + pr_id: 14 + papers: { + paper_id: "on-human-motion-prediction-using-recurrent" + title: "On human motion prediction using recurrent neural networks" + arxiv_id: "1705.02445" + abstract: "Human motion modelling is a classical problem at the intersection of graphics\nand computer vision, with applications spanning human-computer interaction,\nmotion synthesis, and motion prediction for virtual and augmented reality.\nFollowing the success of deep learning methods in several computer vision\ntasks, recent work has focused on using deep recurrent neural networks (RNNs)\nto model human motion, with the goal of learning time-dependent representations\nthat perform tasks such as short-term motion prediction and long-term human\nmotion synthesis. We examine recent work, with a focus on the evaluation\nmethodologies commonly used in the literature, and show that, surprisingly,\nstate-of-the-art performance can be achieved by a simple baseline that does not\nattempt to model motion at all. We investigate this result, and analyze recent\nRNN methods by looking at the architectures, loss functions, and training\nprocedures used in state-of-the-art approaches. We propose three changes to the\nstandard RNN models typically used for human motion, which result in a simple\nand scalable RNN architecture that obtains state-of-the-art performance on\nhuman motion prediction." + pub_date: { + seconds: 1494028800 + } + authors: "Julieta Martinez" + authors: "Michael J. Black" + authors: "Javier Romero" + repositories: { + url: "https://github.com/nageshpindi/human-motion-prediction-master" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/YQRickWang/tf" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/facebookresearch/QuaterNet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 602 + description: "Proposes neural networks that can generate animation of virtual characters for different actions." + } + repositories: { + is_official: true + url: "https://github.com/una-dinosauria/human-motion-prediction" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 302 + description: "Simple baselines and RNNs for predicting human motion in tensorflow. Presented at CVPR 17." + } + repositories: { + url: "https://github.com/garroud/human-motion-prediction-pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 12 + description: "Pytorch implementation of human motion prediction" + } + repositories: { + url: "https://github.com/YQRickWang/Motion_Prediction" + framework: FRAMEWORK_PYTORCH + description: "Motion Prediciton on deepfly" + } + } + video: { + video_id: "Y1loN3Sc4Dk" + video_title: "PR-014: On Human Motion Prediction using RNNs (2017)" + number_of_likes: 53 + number_of_views: 5113 + published_date: { + seconds: 1496611967 + } + uploader: "Terry TaeWoong Um" + } + } +} +pr_id_to_video: { + key: 15 + value: { + pr_id: 15 + papers: { + paper_id: "convolutional-neural-networks-for-sentence" + title: "Convolutional Neural Networks for Sentence Classification" + arxiv_id: "1408.5882" + abstract: "We report on a series of experiments with convolutional neural networks (CNN)\ntrained on top of pre-trained word vectors for sentence-level classification\ntasks. We show that a simple CNN with little hyperparameter tuning and static\nvectors achieves excellent results on multiple benchmarks. Learning\ntask-specific vectors through fine-tuning offers further gains in performance.\nWe additionally propose a simple modification to the architecture to allow for\nthe use of both task-specific and static vectors. The CNN models discussed\nherein improve upon the state of the art on 4 out of 7 tasks, which include\nsentiment analysis and question classification." + pub_date: { + seconds: 1408924800 + } + authors: "Yoon Kim" + repositories: { + url: "https://github.com/chiemenz/AzureML-Sentiment-Classification-and-Model-Deployment" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + } + repositories: { + url: "https://github.com/GayeonKim-data/section4-project" + framework: FRAMEWORK_OTHERS + description: "딥러닝을 활용한 영화 리뷰 속 스포일러 탐지 프로젝트" + } + repositories: { + url: "https://github.com/PaddlePaddle/PaddleRec/tree/release/2.1.0/models/contentunderstanding/textcnn" + framework: FRAMEWORK_OTHERS + number_of_stars: 530 + description: "大规模推荐模型训练工具" + } + repositories: { + url: "https://github.com/guanliu321/CNN-RNN-HAN-for-Text-Classification-Using-NLP" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "It’s a NLP Problem,the goal of our project is to classify categories of news based on the content of news articles from the BBC website using CNN, RNN and HAN models on two datasets that the former dataset have 2225 news, 5 categories and the latter dataset have 18846 news, 20 categories. Set hyperparameters, such as embedding dimensions of glove model, trainable parameter of embedding layer, bidirectional LSTM or simple LSTM Preprocess the news articles, including removing punctuation ,stopwords, lemmatization,removing outliers in terms of news length and the number of sentences and set the corresponding parameters Tokenize the data using word-index which is fit on the train data,then generate 2D input data (article, word) for CNN and RNN algorithms,and then generate 3D input data (article, sentence, word) for HAN algorithm Use set hyperparameters to build the model architecture and use checkpointing, early stopping to train model, and then compare the test accuracy and validation loss of these three models Utilized:Python,Pandas,Numpy,Seaborn,Matplolib,NLP,DNN,CNN,RNN,HAN,LSTM,GPU,Text Classification,Hyperparameters Tuning" + } + repositories: { + url: "https://github.com/dongjun-Lee/text-classification-models-tf" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 486 + description: "Tensorflow implementations of Text Classification Models." + } + repositories: { + url: "https://github.com/yinghao1019/NLP_and_DL_practice/blob/master/Convolution_Neural_Netowrks_for_sentence_classification_Practice.ipynb" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "This repository is used for NLP Model practice and learning" + } + repositories: { + url: "https://github.com/yinghao1019/NLP_and_DL_practice" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "This repository is used for NLP Model practice and learning" + } + repositories: { + url: "https://github.com/chiemenz/automl_vs_hyperdrive" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + } + repositories: { + url: "https://github.com/yschoi-nisp/AI-Grand-Challenge-2020" + framework: FRAMEWORK_PYTORCH + number_of_stars: 24 + description: "AI grand challenge 2020 Repo (Speech Recognition Track)" + } + repositories: { + url: "https://github.com/prakashpandey9/Text-Classification-Pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 711 + description: "Text classification using deep learning models in Pytorch" + } + } + video: { + video_id: "IRB2vXSet2E" + video_title: "PR-015:Convolutional Neural Networks for Sentence Classification" + number_of_likes: 49 + number_of_views: 5783 + published_date: { + seconds: 1497187460 + } + uploader: "keun bong Kwak" + } + } +} +pr_id_to_video: { + key: 16 + value: { + pr_id: 16 + papers: { + paper_id: "you-only-look-once-unified-real-time-object" + title: "You Only Look Once: Unified, Real-Time Object Detection" + arxiv_id: "1506.02640" + abstract: "We present YOLO, a new approach to object detection. Prior work on object\ndetection repurposes classifiers to perform detection. Instead, we frame object\ndetection as a regression problem to spatially separated bounding boxes and\nassociated class probabilities. A single neural network predicts bounding boxes\nand class probabilities directly from full images in one evaluation. Since the\nwhole detection pipeline is a single network, it can be optimized end-to-end\ndirectly on detection performance.\n Our unified architecture is extremely fast. Our base YOLO model processes\nimages in real-time at 45 frames per second. A smaller version of the network,\nFast YOLO, processes an astounding 155 frames per second while still achieving\ndouble the mAP of other real-time detectors. Compared to state-of-the-art\ndetection systems, YOLO makes more localization errors but is far less likely\nto predict false detections where nothing exists. Finally, YOLO learns very\ngeneral representations of objects. It outperforms all other detection methods,\nincluding DPM and R-CNN, by a wide margin when generalizing from natural images\nto artwork on both the Picasso Dataset and the People-Art Dataset." + pub_date: { + seconds: 1433721600 + } + authors: "Joseph Redmon" + authors: "Santosh Divvala" + authors: "Ross Girshick" + authors: "Ali Farhadi" + repositories: { + url: "https://github.com/DevBruce/YOLOv1-TF2" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "YOLOv1 implementation with TensorFlow2" + } + repositories: { + url: "https://github.com/msuhail1997/YOLO-Pytorch-Object_Detection" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/jalotra/Queue-Detection" + framework: FRAMEWORK_PYTORCH + description: "A naive Algorithm that uses People Detection and Convex Hull as subroutines to solve this problem: \"Given an image of people standing in a queue, how many people are standing in some queue{Q}.\"" + } + repositories: { + url: "https://github.com/jalotra/Queue-Detection-" + framework: FRAMEWORK_PYTORCH + description: "A naive Algorithm that uses People Detection and Convex Hull as subroutines to solve this problem: \"Given an image of people standing in a queue, how many people are standing in some queue{Q}.\"" + } + repositories: { + url: "https://github.com/TeamML-2021/knowledge-base" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/zer0sh0t/artificial_intelligence/tree/master/object_detection/you_only_look_once" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "ai codebase" + } + repositories: { + url: "https://github.com/hamidriasat/Computer-Vision-and-Deep-Learning" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/ritesh2448/Text-Detection-And-Recognition" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/MINED30/Face_Mask_Detection_YOLO" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/Qengineering/YoloV3-ncnn-Raspberry-Pi-4" + framework: FRAMEWORK_OTHERS + number_of_stars: 21 + description: "MobileNetV2_YOLOV3 for ncnn framework" + } + methods: { + name: "Non Maximum Suppression" + full_name: "Non Maximum Suppression" + description: "**Non Maximum Suppression** is a computer vision method that selects a single entity out of many overlapping entities (for example bounding boxes in object detection). The criteria is usually discarding entities that are below a given probability bound. With remaining entities we repeatedly pick the entity with the highest probability, output that as the prediction, and discard any remaining box where a $\\text{IoU} \\geq 0.5$ with the box output in the previous step.\r\n\r\nImage Credit: [Martin Kersner](https://github.com/martinkersner/non-maximum-suppression-cpp)" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Max Pooling" + full_name: "Max Pooling" + description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + methods: { + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + } + methods: { + name: "Step Decay" + full_name: "Step Decay" + description: "**Step Decay** is a learning rate schedule that drops the learning rate by a factor every few epochs, where the number of epochs is a hyperparameter.\r\n\r\nImage Credit: [Suki Lau](https://towardsdatascience.com/learning-rate-schedules-and-adaptive-learning-rate-methods-for-deep-learning-2c8f433990d1)" + } + methods: { + name: "Random Resized Crop" + full_name: "Random Resized Crop" + description: "**RandomResizedCrop** is a type of image data augmentation where a crop of random size of the original size and a random aspect ratio of the original aspect ratio is made. This crop is finally resized to given size.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" + } + methods: { + name: "SGD with Momentum" + full_name: "SGD with Momentum" + description: "**SGD with Momentum** is a stochastic optimization method that adds a momentum term to regular stochastic gradient descent:\r\n\r\n$$v\\_{t} = \\gamma{v}\\_{t-1} + \\eta\\nabla\\_{\\theta}J\\left(\\theta\\right)$$\r\n$$\\theta\\_{t} = \\theta\\_{t-1} - v\\_{t} $$\r\n\r\nA typical value for $\\gamma$ is $0.9$. The momentum name comes from an analogy to physics, such as ball accelerating down a slope. In the case of weight updates, we can think of the weights as a particle traveling through parameter space which incurs acceleration from the gradient of the loss.\r\n\r\nImage Source: [Juan Du](https://www.researchgate.net/figure/The-compare-of-the-SGD-algorithms-with-and-without-momentum-Take-Task-1-as-example-The_fig1_333469047)" + } + } + video: { + video_id: "eTDcoeqj1_w" + video_title: "PR-016: You only look once: Unified, real-time object detection" + number_of_likes: 99 + number_of_views: 16126 + published_date: { + seconds: 1497795435 + } + uploader: "Taegyun Jeon" + } + } +} +pr_id_to_video: { + key: 17 + value: { + pr_id: 17 + papers: { + paper_id: "neural-architecture-search-with-reinforcement" + title: "Neural Architecture Search with Reinforcement Learning" + arxiv_id: "1611.01578" + abstract: "Neural networks are powerful and flexible models that work well for many\ndifficult learning tasks in image, speech and natural language understanding.\nDespite their success, neural networks are still hard to design. In this paper,\nwe use a recurrent network to generate the model descriptions of neural\nnetworks and train this RNN with reinforcement learning to maximize the\nexpected accuracy of the generated architectures on a validation set. On the\nCIFAR-10 dataset, our method, starting from scratch, can design a novel network\narchitecture that rivals the best human-invented architecture in terms of test\nset accuracy. Our CIFAR-10 model achieves a test error rate of 3.65, which is\n0.09 percent better and 1.05x faster than the previous state-of-the-art model\nthat used a similar architectural scheme. On the Penn Treebank dataset, our\nmodel can compose a novel recurrent cell that outperforms the widely-used LSTM\ncell, and other state-of-the-art baselines. Our cell achieves a test set\nperplexity of 62.4 on the Penn Treebank, which is 3.6 perplexity better than\nthe previous state-of-the-art model. The cell can also be transferred to the\ncharacter language modeling task on PTB and achieves a state-of-the-art\nperplexity of 1.214." + pub_date: { + seconds: 1478304000 + } + authors: "Barret Zoph" + authors: "Quoc V. Le" + repositories: { + url: "https://github.com/abcp4/DAPytorch" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/YaCpotato/deepaugmentFix" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/TreeLimes/QANAS" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/cshannonn/blackscholes_nas" + framework: FRAMEWORK_OTHERS + number_of_stars: 3 + description: "Can a neural network learn Black Scholes, yes..." + } + repositories: { + url: "https://github.com/YaCpotato/B4ResearchDeepaugment" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } + repositories: { + url: "https://github.com/GiuliaLanzillotta/INAS" + framework: FRAMEWORK_PYTORCH + description: "Infinite Neural Architecture Search" + } + repositories: { + url: "https://github.com/carpedm20/ENAS-pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2446 + description: "PyTorch implementation of \"Efficient Neural Architecture Search via Parameters Sharing\"" + } + repositories: { + is_official: true + url: "https://github.com/tensorflow/models" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 70339 + description: "Models and examples built with TensorFlow" + } + repositories: { + url: "https://github.com/barisozmen/deepaugment" + framework: FRAMEWORK_OTHERS + number_of_stars: 192 + description: "Discover augmentation strategies tailored for your dataset" + } + repositories: { + url: "https://github.com/DataCanvasIO/Hypernets" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 94 + description: "A General Automated Machine Learning framework to simplify the development of End-to-end AutoML toolkits in specific domains." + } + } + video: { + video_id: "XP3vyVrrt3Q" + video_title: "PR-017: Neural Architecture Search with Reinforcement Learning" + number_of_likes: 31 + number_of_views: 3951 + published_date: { + seconds: 1497796191 + } + uploader: "Suh Kiho" + } + } +} +pr_id_to_video: { + key: 18 + value: { + pr_id: 18 + papers: { + paper_id: "a-simple-neural-network-module-for-relational" + title: "A simple neural network module for relational reasoning" + arxiv_id: "1706.01427" + abstract: "Relational reasoning is a central component of generally intelligent\nbehavior, but has proven difficult for neural networks to learn. In this paper\nwe describe how to use Relation Networks (RNs) as a simple plug-and-play module\nto solve problems that fundamentally hinge on relational reasoning. We tested\nRN-augmented networks on three tasks: visual question answering using a\nchallenging dataset called CLEVR, on which we achieve state-of-the-art,\nsuper-human performance; text-based question answering using the bAbI suite of\ntasks; and complex reasoning about dynamic physical systems. Then, using a\ncurated dataset called Sort-of-CLEVR we show that powerful convolutional\nnetworks do not have a general capacity to solve relational questions, but can\ngain this capacity when augmented with RNs. Our work shows how a deep learning\narchitecture equipped with an RN module can implicitly discover and learn to\nreason about entities and their relations." + pub_date: { + seconds: 1496620800 + } + authors: "Adam Santoro" + authors: "David Raposo" + authors: "David G. T. Barrett" + authors: "Mateusz Malinowski" + authors: "Razvan Pascanu" + authors: "Peter Battaglia" + authors: "Timothy Lillicrap" + repositories: { + url: "https://github.com/jaehyunnn/RelationalNetwork_pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "An un-official implementation of Relational Network [A. Santoro et al., 2017] (PyTorch) " + } + repositories: { + url: "https://github.com/ttok0s7u2n5/ML2_proj" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/moduIo/Relation-Networks" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5 + description: "Keras implementation of Relation Networks for Visual Question Answering using the CLEVR dataset." + } + repositories: { + url: "https://github.com/matwilso/relation-networks" + framework: FRAMEWORK_TENSORFLOW + description: "Messing around with Relation Networks and other stuff for state embedding" + } + repositories: { + url: "https://github.com/adriangoe/relational-networks-pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "A pytorch implementation of Relational Networks by Santoro et al (https://arxiv.org/abs/1706.01427)" + } + repositories: { + url: "https://github.com/mesnico/RelationNetworks-CLEVR" + framework: FRAMEWORK_PYTORCH + number_of_stars: 74 + description: "A pytorch implementation for \"A simple neural network module for relational reasoning\", working on the CLEVR dataset" + } + repositories: { + url: "https://github.com/fcorencoret/dynamic-rn" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/IllgamhoDuck/ResTR" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Aesthetic quality assessment Artificial Intelligence based on relation between elements / 요소 간 관계를 기반으로 미적 수준을 판별하는 인공지능 / 2018.11.21 기준 AVA dataset에서 State of the Art result" + } + repositories: { + url: "https://github.com/gitlimlab/Relation-Network-Tensorflow" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 326 + description: "Tensorflow implementations of Relational Networks and a VQA dataset named Sort-of-CLEVR proposed by DeepMind." + } + repositories: { + url: "https://github.com/mdda/relationships-from-entity-stream" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: " Research presented at the NIPs 2017 ViGiL Workshop." + } + } + video: { + video_id: "Lb1PVpFp9F8" + video_title: "PR-018: A Simple Neural Network Module for Relational Reasoning (DeepMind)" + number_of_likes: 63 + number_of_views: 6769 + published_date: { + seconds: 1498432650 + } + uploader: "Sung Kim" + } + } +} +pr_id_to_video: { + key: 19 + value: { + pr_id: 19 + papers: { + paper_id: "continuous-control-with-deep-reinforcement" + title: "Continuous control with deep reinforcement learning" + arxiv_id: "1509.02971" + abstract: "We adapt the ideas underlying the success of Deep Q-Learning to the continuous action domain. We present an actor-critic, model-free algorithm based on the deterministic policy gradient that can operate over continuous action spaces. Using the same learning algorithm, network architecture and hyper-parameters, our algorithm robustly solves more than 20 simulated physics tasks, including classic problems such as cartpole swing-up, dexterous manipulation, legged locomotion and car driving. Our algorithm is able to find policies whose performance is competitive with those found by a planning algorithm with full access to the dynamics of the domain and its derivatives. We further demonstrate that for many of the tasks the algorithm can learn policies end-to-end: directly from raw pixel inputs." + pub_date: { + seconds: 1441756800 + } + authors: "Timothy P. Lillicrap" + authors: "Jonathan J. Hunt" + authors: "Alexander Pritzel" + authors: "Nicolas Heess" + authors: "Tom Erez" + authors: "Yuval Tassa" + authors: "David Silver" + authors: "Daan Wierstra" + repositories: { + url: "https://github.com/Brook1711/RIS_components" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "components of RIS simulations" + } + repositories: { + url: "https://github.com/rikluost/RL_DQN_Pong" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Tackling Atari 2600 game Pong with Reinforcement Learning by utilizing DQN and TF-Agents" + } + repositories: { + url: "https://github.com/Medabid1/RL_Project" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "My Deep learning project : Training a robot in MuJoCo with RL" + } + repositories: { + url: "https://github.com/flavioschneider/ml_papers_presentations" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } + repositories: { + url: "https://github.com/KelvinYang0320/deepbots-panda" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + description: "Panda with Deep Reinforcement Learning Simulation Environment Webots" + } + repositories: { + url: "https://github.com/wpiszlogin/driver_critic" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Solution for CarRacing-v0 environment from OpenAI Gym. It uses the Deep Deterministic Policy Gradient algorithm." + } + repositories: { + url: "https://github.com/backgom2357/Recommender_system_via_deep_RL" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 6 + description: "The implemetation of Deep Reinforcement Learning based Recommender System from the paper Deep Reinforcement Learning based Recommendation with Explicit User-Item Interactions Modeling by Liu et al." + } + repositories: { + url: "https://github.com/SarodYatawatta/smart-calibration" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "Deep reinforcement learning for smart calibration of radio telescopes. Automatic hyper-parameter tuning." + } + repositories: { + url: "https://github.com/dchetelat/acer" + framework: FRAMEWORK_PYTORCH + number_of_stars: 17 + description: "PyTorch implementation of both discrete and continuous ACER" + } + repositories: { + url: "https://github.com/DanielLSM/safe-rl-tutorial" + framework: FRAMEWORK_TENSORFLOW + description: "Just a mini tutorial on safe rl" + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Q-Learning" + full_name: "Q-Learning" + description: "**Q-Learning** is an off-policy temporal difference control algorithm:\r\n\r\n$$Q\\left(S\\_{t}, A\\_{t}\\right) \\leftarrow Q\\left(S\\_{t}, A\\_{t}\\right) + \\alpha\\left[R_{t+1} + \\gamma\\max\\_{a}Q\\left(S\\_{t+1}, a\\right) - Q\\left(S\\_{t}, A\\_{t}\\right)\\right] $$\r\n\r\nThe learned action-value function $Q$ directly approximates $q\\_{*}$, the optimal action-value function, independent of the policy being followed.\r\n\r\nSource: Sutton and Barto, Reinforcement Learning, 2nd Edition" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Weight Decay" + full_name: "Weight Decay" + description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + } + methods: { + name: "DDPG" + full_name: "Deep Deterministic Policy Gradient" + description: "**DDPG**, or **Deep Deterministic Policy Gradient**, is an actor-critic, model-free algorithm based on the deterministic policy gradient that can operate over continuous action spaces. It combines the actor-critic approach with insights from [DQNs](https://paperswithcode.com/method/dqn): in particular, the insights that 1) the network is trained off-policy with samples from a replay buffer to minimize correlations between samples, and 2) the network is trained with a target Q network to give consistent targets during temporal difference backups. DDPG makes use of the same ideas along with batch normalization." + } + methods: { + name: "Adam" + full_name: "Adam" + description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + methods: { + name: "Batch Normalization" + full_name: "Batch Normalization" + description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." + } + methods: { + name: "Experience Replay" + full_name: "Experience Replay" + description: "**Experience Replay** is a replay memory technique used in reinforcement learning where we store the agent’s experiences at each time-step, $e\\_{t} = \\left(s\\_{t}, a\\_{t}, r\\_{t}, s\\_{t+1}\\right)$ in a data-set $D = e\\_{1}, \\cdots, e\\_{N}$ , pooled over many episodes into a replay memory. We then usually sample the memory randomly for a minibatch of experience, and use this to learn off-policy, as with Deep Q-Networks. This tackles the problem of autocorrelation leading to unstable training, by making the problem more like a supervised learning problem.\r\n\r\nImage Credit: [Hands-On Reinforcement Learning with Python, Sudharsan Ravichandiran](https://subscription.packtpub.com/book/big_data_and_business_intelligence/9781788836524)" + } + } + video: { + video_id: "h2WSVBAC1t4" + video_title: "PR-019: Continuous Control with Deep Reinforcement Learning" + number_of_likes: 52 + number_of_views: 5333 + published_date: { + seconds: 1498452479 + } + uploader: "Seungil Kim" + } + } +} +pr_id_to_video: { + key: 20 + value: { + pr_id: 20 + papers: { + paper_id: "delving-deep-into-rectifiers-surpassing-human" + title: "Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification" + arxiv_id: "1502.01852" + abstract: "Rectified activation units (rectifiers) are essential for state-of-the-art\nneural networks. In this work, we study rectifier neural networks for image\nclassification from two aspects. First, we propose a Parametric Rectified\nLinear Unit (PReLU) that generalizes the traditional rectified unit. PReLU\nimproves model fitting with nearly zero extra computational cost and little\noverfitting risk. Second, we derive a robust initialization method that\nparticularly considers the rectifier nonlinearities. This method enables us to\ntrain extremely deep rectified models directly from scratch and to investigate\ndeeper or wider network architectures. Based on our PReLU networks\n(PReLU-nets), we achieve 4.94% top-5 test error on the ImageNet 2012\nclassification dataset. This is a 26% relative improvement over the ILSVRC 2014\nwinner (GoogLeNet, 6.66%). To our knowledge, our result is the first to surpass\nhuman-level performance (5.1%, Russakovsky et al.) on this visual recognition\nchallenge." + pub_date: { + seconds: 1423180800 + } + authors: "Kaiming He" + authors: "Xiangyu Zhang" + authors: "Shaoqing Ren" + authors: "Jian Sun" + repositories: { + url: "https://github.com/phogbinh/handwritten-digit-recognition" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/ihsuy/Train-by-Reconnect" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 8 + description: "Official code for the NeurIPS 2020 paper Train by Reconnect: Decoupling Locations of Weights from Their Values by Yushi Qiu and Reiji Suda." + } + repositories: { + url: "https://github.com/AnzorGozalishvili/autoencoders_playground" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "Playing with several types of autoencoders with supervised, unsupervised and semi-supervised learning objectives." + } + repositories: { + url: "https://github.com/hamiddimyati/dd2424-deep-learning" + framework: FRAMEWORK_OTHERS + description: "All codes and reports for assignments of deep learning course" + } + repositories: { + url: "https://github.com/krish-pinninti/api-ann-python" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/LiamLau1/MLDE" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/MrtnMndt/Rethinking_CNN_Layerwise_Feature_Amounts" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + description: "PyTorch implementation of our paper \"Rethinking Layer-wise Feature Amounts in Convolutional Neural Network Architectures\"" + } + repositories: { + url: "https://github.com/patconrey/ANN-Example" + framework: FRAMEWORK_TENSORFLOW + description: "This is an example script to create, train, and evaluate an artificial neural network." + } + repositories: { + url: "https://github.com/dmbernaal/Daedalus" + framework: FRAMEWORK_PYTORCH + number_of_stars: 13 + description: "Deep Learning Research " + } + repositories: { + url: "https://github.com/LFhase/Research_Navigation" + framework: FRAMEWORK_OTHERS + number_of_stars: 3 + description: "Recordings of my research navigation, including paper/book reading notes and related implementations" + } + methods: { + name: "PReLU" + full_name: "Parameterized ReLU" + description: "A **Parametric Rectified Linear Unit**, or **PReLU**, is an activation function that generalizes the traditional rectified unit with a slope for negative values. Formally:\r\n\r\n$$f\\left(y\\_{i}\\right) = y\\_{i} \\text{ if } y\\_{i} \\ge 0$$\r\n$$f\\left(y\\_{i}\\right) = a\\_{i}y\\_{i} \\text{ if } y\\_{i} \\leq 0$$\r\n\r\nThe intuition is that different layers may require different types of nonlinearity. Indeed the authors find in experiments with convolutional neural networks that PReLus for the initial layer have more positive slopes, i.e. closer to linear. Since the filters of the first layers are Gabor-like filters such as edge or texture detectors, this shows a circumstance where positive and negative responses of filters are respected. In contrast the authors find deeper layers have smaller coefficients, suggesting the model becomes more discriminative at later layers (while it wants to retain more information at earlier layers)." + } + methods: { + name: "PReLU-Net" + full_name: "PReLU-Net" + description: "**PReLU-Net** is a type of convolutional neural network that utilises parameterized ReLUs for its activation function. It also uses a robust initialization scheme - afterwards known as [Kaiming Initialization](https://paperswithcode.com/method/he-initialization) - that accounts for non-linear activation functions." + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Max Pooling" + full_name: "Max Pooling" + description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" + } + methods: { + name: "Weight Decay" + full_name: "Weight Decay" + description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + } + methods: { + name: "Spatial Pyramid Pooling" + full_name: "Spatial Pyramid Pooling" + description: "** Spatial Pyramid Pooling (SPP)** is a pooling layer that removes the fixed-size constraint of the network, i.e. a CNN does not require a fixed-size input image. Specifically, we add an SPP layer on top of the last convolutional layer. The SPP layer pools the features and generates fixed-length outputs, which are then fed into the fully-connected layers (or other classifiers). In other words, we perform some information aggregation at a deeper stage of the network hierarchy (between convolutional layers and fully-connected layers) to avoid the need for cropping or warping at the beginning." + } + methods: { + name: "Kaiming Initialization" + full_name: "Kaiming Initialization" + description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." + } + methods: { + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + } + methods: { + name: "Random Resized Crop" + full_name: "Random Resized Crop" + description: "**RandomResizedCrop** is a type of image data augmentation where a crop of random size of the original size and a random aspect ratio of the original aspect ratio is made. This crop is finally resized to given size.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" + } + } + video: { + video_id: "absOinFeGv0" + video_title: "PR-020: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification" + number_of_likes: 14 + number_of_views: 1815 + published_date: { + seconds: 1499002058 + } + uploader: "Jiyang Kang" + } + } +} +pr_id_to_video: { + key: 21 + value: { + pr_id: 21 + papers: { + paper_id: "batch-normalization-accelerating-deep-network" + title: "Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift" + arxiv_id: "1502.03167" + abstract: "Training Deep Neural Networks is complicated by the fact that the\ndistribution of each layer's inputs changes during training, as the parameters\nof the previous layers change. This slows down the training by requiring lower\nlearning rates and careful parameter initialization, and makes it notoriously\nhard to train models with saturating nonlinearities. We refer to this\nphenomenon as internal covariate shift, and address the problem by normalizing\nlayer inputs. Our method draws its strength from making normalization a part of\nthe model architecture and performing the normalization for each training\nmini-batch. Batch Normalization allows us to use much higher learning rates and\nbe less careful about initialization. It also acts as a regularizer, in some\ncases eliminating the need for Dropout. Applied to a state-of-the-art image\nclassification model, Batch Normalization achieves the same accuracy with 14\ntimes fewer training steps, and beats the original model by a significant\nmargin. Using an ensemble of batch-normalized networks, we improve upon the\nbest published result on ImageNet classification: reaching 4.9% top-5\nvalidation error (and 4.8% test error), exceeding the accuracy of human raters." + pub_date: { + seconds: 1423612800 + } + authors: "Sergey Ioffe" + authors: "Christian Szegedy" + repositories: { + url: "https://github.com/tensorflow/models/tree/master/research/deeplab" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 70341 + description: "Models and examples built with TensorFlow" + } + repositories: { + url: "https://github.com/simo-bat/Crack_detection" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + } + repositories: { + url: "https://github.com/dodoproptit99/deep-speaker" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "Speaker identification with Deep Speaker" + } + repositories: { + url: "https://github.com/tensorflow/models/tree/master/research/slim" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 70341 + description: "Models and examples built with TensorFlow" + } + repositories: { + url: "https://github.com/KushajveerSingh/SPADE-PyTorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 22 + description: "PyTorch unofficial implementation of Semantic Image Synthesis with Spatially-Adaptive Normalization paper by Nvidia Research" + } + repositories: { + url: "https://github.com/sayakpaul/Adaptive-Gradient-Clipping" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 58 + description: "Minimal implementation of adaptive gradient clipping (https://arxiv.org/abs/2102.06171) in TensorFlow 2. " + } + repositories: { + url: "https://github.com/osmr/imgclsmob" + framework: FRAMEWORK_OTHERS + number_of_stars: 2202 + description: "Sandbox for training deep learning networks" + } + repositories: { + url: "https://github.com/lab-ml/nn/tree/master/labml_nn/normalization/batch_norm" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3069 + description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." + } + repositories: { + url: "https://github.com/CPJKU/cca_layer" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 31 + description: "Implementation of Canonical Correlation Analysis Layer for Cross-Modality Retrieval." + } + repositories: { + url: "https://github.com/tensorflow/models/tree/master/research/seq_flow_lite" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 70340 + description: "Models and examples built with TensorFlow" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Exponential Decay" + full_name: "Exponential Decay" + description: "**Exponential Decay** is a learning rate schedule where we decay the learning rate with more iterations using an exponential function:\r\n\r\n$$ \\text{lr} = \\text{lr}\\_{0}\\exp\\left(-kt\\right) $$\r\n\r\nImage Credit: [Suki Lau](https://towardsdatascience.com/learning-rate-schedules-and-adaptive-learning-rate-methods-for-deep-learning-2c8f433990d1)" + } + methods: { + name: "Max Pooling" + full_name: "Max Pooling" + description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" + } + methods: { + name: "Weight Decay" + full_name: "Weight Decay" + description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + methods: { + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + } + methods: { + name: "Batch Normalization" + full_name: "Batch Normalization" + description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." + } + methods: { + name: "Random Resized Crop" + full_name: "Random Resized Crop" + description: "**RandomResizedCrop** is a type of image data augmentation where a crop of random size of the original size and a random aspect ratio of the original aspect ratio is made. This crop is finally resized to given size.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" + } + } + video: { + video_id: "TDx8iZHwFtM" + video_title: "PR-021: Batch Normalization (language: korean)" + number_of_likes: 103 + number_of_views: 8036 + published_date: { + seconds: 1499004604 + } + uploader: "Choung young jae" + } + } +} +pr_id_to_video: { + key: 22 + value: { + pr_id: 22 + papers: { + paper_id: "infogan-interpretable-representation-learning" + title: "InfoGAN: Interpretable Representation Learning by Information Maximizing Generative Adversarial Nets" + arxiv_id: "1606.03657" + abstract: "This paper describes InfoGAN, an information-theoretic extension to the\nGenerative Adversarial Network that is able to learn disentangled\nrepresentations in a completely unsupervised manner. InfoGAN is a generative\nadversarial network that also maximizes the mutual information between a small\nsubset of the latent variables and the observation. We derive a lower bound to\nthe mutual information objective that can be optimized efficiently, and show\nthat our training procedure can be interpreted as a variation of the Wake-Sleep\nalgorithm. Specifically, InfoGAN successfully disentangles writing styles from\ndigit shapes on the MNIST dataset, pose from lighting of 3D rendered images,\nand background digits from the central digit on the SVHN dataset. It also\ndiscovers visual concepts that include hair styles, presence/absence of\neyeglasses, and emotions on the CelebA face dataset. Experiments show that\nInfoGAN learns interpretable representations that are competitive with\nrepresentations learned by existing fully supervised methods." + pub_date: { + seconds: 1465689600 + } + authors: "Xi Chen" + authors: "Yan Duan" + authors: "Rein Houthooft" + authors: "John Schulman" + authors: "Ilya Sutskever" + authors: "Pieter Abbeel" + repositories: { + url: "https://github.com/yashgarg98/GAN" + framework: FRAMEWORK_OTHERS + description: "Some implementations of Generative Adversarial Networks.(DCGAN, InfoGAN)" + } + repositories: { + url: "https://github.com/chandragupta0001/GAN/tree/master/info_gan" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/elingaard/infogan-mnist" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "PyTorch implementation of InfoGAN" + } + repositories: { + url: "https://github.com/amiryanj/socialways" + framework: FRAMEWORK_PYTORCH + number_of_stars: 87 + description: "Social Ways: Learning Multi-Modal Distributions of Pedestrian Trajectories with GANs (CVPR 2019)" + } + repositories: { + url: "https://github.com/Neptune-Trojans/GANs" + framework: FRAMEWORK_TENSORFLOW + description: "Implementation of different GANs architectures" + } + repositories: { + url: "https://github.com/zcemycl/Matlab-GAN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 78 + description: "MATLAB implementations of Generative Adversarial Networks -- from GAN to Pixel2Pixel, CycleGAN" + } + repositories: { + url: "https://github.com/Evavanrooijen/InfoGAN-PyTorch" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/inkplatform/InfoGAN-PyTorch" + framework: FRAMEWORK_PYTORCH + description: "code for InfoGAN" + } + repositories: { + url: "https://github.com/vinoth654321/Casia-Webface" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/bacdavid/InfomaxVAE" + framework: FRAMEWORK_OTHERS + description: "Obtain the latent variables that contain the maximal mutual information." + } + methods: { + name: "Batch Normalization" + full_name: "Batch Normalization" + description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." + } + methods: { + name: "Tanh Activation" + full_name: "Tanh Activation" + description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" + } + methods: { + name: "Feedforward Network" + full_name: "Feedforward Network" + description: "A **Feedforward Network**, or a **Multilayer Perceptron (MLP)**, is a neural network with solely densely connected layers. This is the classic neural network architecture of the literature. It consists of inputs $x$ passed through units $h$ (of which there can be many layers) to predict a target $y$. Activation functions are generally chosen to be non-linear to allow for flexible functional approximation.\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + } + methods: { + name: "InfoGAN" + full_name: "InfoGAN" + description: "**InfoGAN** is a type of generative adversarial network that modifies the GAN objective to\r\nencourage it to learn interpretable and meaningful representations. This is done by maximizing the\r\nmutual information between a fixed small subset of the GAN’s noise variables and the observations.\r\n\r\nFormally, InfoGAN is defined as a minimax game with a variational regularization of mutual information and the hyperparameter $\\lambda$:\r\n\r\n$$ \\min\\_{G, Q}\\max\\_{D}V\\_{INFOGAN}\\left(D, G, Q\\right) = V\\left(D, G\\right) - \\lambda{L}\\_{I}\\left(G, Q\\right) $$\r\n\r\nWhere $Q$ is an auxiliary distribution that approximates the posterior $P\\left(c\\mid{x}\\right)$ - the probability of the latent code $c$ given the data $x$ - and $L\\_{I}$ is the variational lower bound of the mutual information between the latent code and the observations.\r\n\r\nIn the practical implementation, there is another fully-connected layer to output parameters for the conditional distribution $Q$ (negligible computation ontop of regular GAN structures). Q is represented with a softmax non-linearity for a categorical latent code. For a continuous latent code, the authors assume a factored Gaussian." + } + methods: { + name: "Sigmoid Activation" + full_name: "Sigmoid Activation" + description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." + } + methods: { + name: "Leaky ReLU" + full_name: "Leaky ReLU" + description: "**Leaky Rectified Linear Unit**, or **Leaky ReLU**, is a type of activation function based on a [ReLU](https://paperswithcode.com/method/relu), but it has a small slope for negative values instead of a flat slope. The slope coefficient is determined before training, i.e. it is not learnt during training. This type of activation function is popular in tasks where we we may suffer from sparse gradients, for example training generative adversarial networks." + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "GAN" + full_name: "Generative Adversarial Network" + description: "A **GAN**, or **Generative Adversarial Network**, is a generative model that simultaneously trains\r\ntwo models: a generative model $G$ that captures the data distribution, and a discriminative model $D$ that estimates the\r\nprobability that a sample came from the training data rather than $G$.\r\n\r\nThe training procedure for $G$ is to maximize the probability of $D$ making\r\na mistake. This framework corresponds to a minimax two-player game. In the\r\nspace of arbitrary functions $G$ and $D$, a unique solution exists, with $G$\r\nrecovering the training data distribution and $D$ equal to $\\frac{1}{2}$\r\neverywhere. In the case where $G$ and $D$ are defined by multilayer perceptrons,\r\nthe entire system can be trained with backpropagation. \r\n\r\n(Image Source: [here](http://www.kdnuggets.com/2017/01/generative-adversarial-networks-hot-topic-machine-learning.html))" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + } + video: { + video_id: "_4jbgniqt_Q" + video_title: "PR-022: InfoGAN (OpenAI)" + number_of_likes: 42 + number_of_views: 5907 + published_date: { + seconds: 1499608297 + } + uploader: "차준범" + } + } +} +pr_id_to_video: { + key: 23 + value: { + pr_id: 23 + papers: { + paper_id: "yolo9000-better-faster-stronger" + title: "YOLO9000: Better, Faster, Stronger" + arxiv_id: "1612.08242" + abstract: "We introduce YOLO9000, a state-of-the-art, real-time object detection system\nthat can detect over 9000 object categories. First we propose various\nimprovements to the YOLO detection method, both novel and drawn from prior\nwork. The improved model, YOLOv2, is state-of-the-art on standard detection\ntasks like PASCAL VOC and COCO. At 67 FPS, YOLOv2 gets 76.8 mAP on VOC 2007. At\n40 FPS, YOLOv2 gets 78.6 mAP, outperforming state-of-the-art methods like\nFaster RCNN with ResNet and SSD while still running significantly faster.\nFinally we propose a method to jointly train on object detection and\nclassification. Using this method we train YOLO9000 simultaneously on the COCO\ndetection dataset and the ImageNet classification dataset. Our joint training\nallows YOLO9000 to predict detections for object classes that don't have\nlabelled detection data. We validate our approach on the ImageNet detection\ntask. YOLO9000 gets 19.7 mAP on the ImageNet detection validation set despite\nonly having detection data for 44 of the 200 classes. On the 156 classes not in\nCOCO, YOLO9000 gets 16.0 mAP. But YOLO can detect more than just 200 classes;\nit predicts detections for more than 9000 different object categories. And it\nstill runs in real-time." + pub_date: { + seconds: 1482624000 + } + authors: "Joseph Redmon" + authors: "Ali Farhadi" + repositories: { + url: "https://github.com/Qengineering/YoloV2-ncnn-Jetson-Nano" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + } + repositories: { + url: "https://github.com/Qengineering/YoloV2-ncnn-Raspberry-Pi-4" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "YoloV2 for bare Raspberry Pi 4" + } + repositories: { + url: "https://github.com/benjamintli/darknet-gun-detector" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + } + repositories: { + url: "https://github.com/DavianYang/yolo.ai" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Yolo Implementation (v1, v2, v3)" + } + repositories: { + url: "https://github.com/preste-nakam/AI_whiteboard" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 6 + description: "The system which helps to transform any wall or flat surface into an interactive whiteboard just with an ordinary RGB camera and a hand! " + } + repositories: { + url: "https://github.com/preste-ai/camera_ai_whiteboard" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 8 + description: "Transform any wall to an intelligent whiteboard" + } + repositories: { + url: "https://gitlab.com/eavise/lightnet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 56 + description: "Darknet got illuminated by PyTorch ~ Meet Lightnet" + } + repositories: { + url: "https://github.com/drscotthawley/SPNet" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Object detection for ESPI images of oscillating steelpan drums" + } + repositories: { + url: "https://github.com/Vijayabhaskar96/Object-Detection-Algorithms" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: "My Implementations of Popular Object detection algorithms in Pytorch." + } + repositories: { + url: "https://github.com/Maskify/darknet" + framework: FRAMEWORK_TENSORFLOW + } + methods: { + name: "SSD" + full_name: "SSD" + description: "**SSD** is a single-stage object detection method that discretizes the output space of bounding boxes into a set of default boxes over different aspect ratios and scales per feature map location. At prediction time, the network generates scores for the presence of each object category in each default box and produces adjustments to the box to better match the object shape. Additionally, the network combines predictions from multiple feature maps with different resolutions to naturally handle objects of various sizes. \r\n\r\nThe fundamental improvement in speed comes from eliminating bounding box proposals and the subsequent pixel or feature resampling stage. Improvements over competing single-stage methods include using a small convolutional filter to predict object categories and offsets in bounding box locations, using separate predictors (filters) for different aspect ratio detections, and applying these filters to multiple feature maps from the later stages of a network in order to perform detection at multiple scales." + } + methods: { + name: "Weight Decay" + full_name: "Weight Decay" + description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + } + methods: { + name: "Fast-YOLOv2" + full_name: "Fast-YOLOv2" + } + methods: { + name: "Random Resized Crop" + full_name: "Random Resized Crop" + description: "**RandomResizedCrop** is a type of image data augmentation where a crop of random size of the original size and a random aspect ratio of the original aspect ratio is made. This crop is finally resized to given size.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" + } + methods: { + name: "Darknet-19" + full_name: "Darknet-19" + description: "**Darknet-19** is a convolutional neural network that is used as the backbone of [YOLOv2](https://paperswithcode.com/method/yolov2). Similar to the [VGG](https://paperswithcode.com/method/vgg) models it mostly uses $3 \\times 3$ filters and doubles the number of channels after every pooling step. Following the work on Network in Network (NIN) it uses global average pooling to make predictions as well as $1 \\times 1$ filters to compress the feature representation between $3 \\times 3$ convolutions. Batch Normalization is used to stabilize training, speed up convergence, and regularize the model batch." + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "Non Maximum Suppression" + full_name: "Non Maximum Suppression" + description: "**Non Maximum Suppression** is a computer vision method that selects a single entity out of many overlapping entities (for example bounding boxes in object detection). The criteria is usually discarding entities that are below a given probability bound. With remaining entities we repeatedly pick the entity with the highest probability, output that as the prediction, and discard any remaining box where a $\\text{IoU} \\geq 0.5$ with the box output in the previous step.\r\n\r\nImage Credit: [Martin Kersner](https://github.com/martinkersner/non-maximum-suppression-cpp)" + } + methods: { + name: "ColorJitter" + full_name: "Color Jitter" + description: "**ColorJitter** is a type of image data augmentation where we randomly change the brightness, contrast and saturation of an image.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" + } + methods: { + name: "SGD with Momentum" + full_name: "SGD with Momentum" + description: "**SGD with Momentum** is a stochastic optimization method that adds a momentum term to regular stochastic gradient descent:\r\n\r\n$$v\\_{t} = \\gamma{v}\\_{t-1} + \\eta\\nabla\\_{\\theta}J\\left(\\theta\\right)$$\r\n$$\\theta\\_{t} = \\theta\\_{t-1} - v\\_{t} $$\r\n\r\nA typical value for $\\gamma$ is $0.9$. The momentum name comes from an analogy to physics, such as ball accelerating down a slope. In the case of weight updates, we can think of the weights as a particle traveling through parameter space which incurs acceleration from the gradient of the loss.\r\n\r\nImage Source: [Juan Du](https://www.researchgate.net/figure/The-compare-of-the-SGD-algorithms-with-and-without-momentum-Take-Task-1-as-example-The_fig1_333469047)" + } + methods: { + name: "Polynomial Rate Decay" + full_name: "Polynomial Rate Decay" + description: "**Polynomial Rate Decay** is a learning rate schedule where we polynomially decay the learning rate." + } + } + video: { + video_id: "6fdclSGgeio" + video_title: "PR-023: YOLO9000: Better, Faster, Stronger" + number_of_likes: 95 + number_of_views: 12517 + published_date: { + seconds: 1500299473 + } + uploader: "JinWon Lee" + } + } +} +pr_id_to_video: { + key: 24 + value: { + pr_id: 24 + papers: { + paper_id: "pixel-recurrent-neural-networks" + title: "Pixel Recurrent Neural Networks" + arxiv_id: "1601.06759" + abstract: "Modeling the distribution of natural images is a landmark problem in\nunsupervised learning. This task requires an image model that is at once\nexpressive, tractable and scalable. We present a deep neural network that\nsequentially predicts the pixels in an image along the two spatial dimensions.\nOur method models the discrete probability of the raw pixel values and encodes\nthe complete set of dependencies in the image. Architectural novelties include\nfast two-dimensional recurrent layers and an effective use of residual\nconnections in deep recurrent networks. We achieve log-likelihood scores on\nnatural images that are considerably better than the previous state of the art.\nOur main results also provide benchmarks on the diverse ImageNet dataset.\nSamples generated from the model appear crisp, varied and globally coherent." + pub_date: { + seconds: 1453680000 + } + authors: "Aaron van den Oord" + authors: "Nal Kalchbrenner" + authors: "Koray Kavukcuoglu" + repositories: { + url: "https://github.com/EugenHotaj/pytorch-generative/blob/master/pytorch_generative/models/autoregressive/pixel_cnn.py" + framework: FRAMEWORK_PYTORCH + number_of_stars: 144 + description: "Easy generative modeling in PyTorch." + } + repositories: { + url: "https://github.com/kamenbliznashki/pixel_models" + framework: FRAMEWORK_PYTORCH + number_of_stars: 18 + description: "Pytorch implementations of autoregressive pixel models - PixelCNN, PixelCNN++, PixelSNAIL" + } + repositories: { + url: "https://github.com/eyalbetzalel/pytorch-generative-v6" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/eyalbetzalel/pytorch-generative-v2" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/davidemartinelli/PixelCNN" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/doiodl/pixelcnn-rnn" + framework: FRAMEWORK_TENSORFLOW + description: "Реализация генеративных сетей PixelCNN и PixelRNN по оф. статье:https://arxiv.org/pdf/1601.06759.pdf . Стэк технологий: python, tensorflow и keras. Весь код был написан на google colab с tf 2.0" + } + repositories: { + url: "https://github.com/eyalbetzalel/pytorch-generative" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/tccnchsu/Artifical_Intelegent" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/vocong25/gated_pixelcnn" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/arcelien/hawc-deep-learning" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: "Reproducing physics simulations on HAWC data with deep learning" + } + methods: { + name: "Masked Convolution" + full_name: "Masked Convolution" + description: "A **Masked Convolution** is a type of convolution which masks certain pixels so that the model can only predict based on pixels already seen. This type of convolution was introduced with PixelRNN generative models, where an image is generated pixel by pixel, to ensure that the model was conditional only on pixels already visited." + } + methods: { + name: "Tanh Activation" + full_name: "Tanh Activation" + description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" + } + methods: { + name: "LSTM" + full_name: "Long Short-Term Memory" + description: "An **LSTM** is a type of [recurrent neural network](https://paperswithcode.com/methods/category/recurrent-neural-networks) that addresses the vanishing gradient problem in vanilla RNNs through additional cells, input and output gates. Intuitively, vanishing gradients are solved through additional *additive* components, and forget gate activations, that allow the gradients to flow through the network without vanishing as quickly.\r\n\r\n(Image Source [here](https://medium.com/datadriveninvestor/how-do-lstm-networks-solve-the-problem-of-vanishing-gradients-a6784971a577))\r\n\r\n(Introduced by Hochreiter and Schmidhuber)" + } + methods: { + name: "Sigmoid Activation" + full_name: "Sigmoid Activation" + description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." + } + methods: { + name: "PixelRNN" + full_name: "Pixel Recurrent Neural Network" + description: "**PixelRNNs** are generative neural networks that sequentially predicts the pixels in an image along the two spatial dimensions. They model the discrete probability of the raw pixel values and encode the complete set of dependencies in the image. Variants include the Row LSTM and the Diagonal BiLSTM, that scale more easily to larger datasets. Pixel values are treated as discrete random variables by using a softmax layer in the conditional distributions. Masked convolutions are employed to allow PixelRNNs to model full dependencies between the color channels." + } + } + video: { + video_id: "BvcwEz4VPIQ" + video_title: "PR-024: Pixel Recurrent Neural Network" + number_of_likes: 49 + number_of_views: 5537 + published_date: { + seconds: 1502156580 + } + uploader: "Jaejun Yoo" + } + } +} +pr_id_to_video: { + key: 25 + value: { + pr_id: 25 + papers: { + paper_id: "online-sensor-hallucination-via-knowledge" + title: "Online Sensor Hallucination via Knowledge Distillation for Multimodal Image Classification" + arxiv_id: "1908.10559" + abstract: "We deal with the problem of information fusion driven satellite image/scene classification and propose a generic hallucination architecture considering that all the available sensor information are present during training while some of the image modalities may be absent while testing. It is well-known that different sensors are capable of capturing complementary information for a given geographical area and a classification module incorporating information from all the sources are expected to produce an improved performance as compared to considering only a subset of the modalities. However, the classical classifier systems inherently require all the features used to train the module to be present for the test instances as well, which may not always be possible for typical remote sensing applications (say, disaster management). As a remedy, we provide a robust solution in terms of a hallucination module that can approximate the missing modalities from the available ones during the decision-making stage. In order to ensure better knowledge transfer during modality hallucination, we explicitly incorporate concepts of knowledge distillation for the purpose of exploring the privileged (side) information in our framework and subsequently introduce an intuitive modular training approach. The proposed network is evaluated extensively on a large-scale corpus of PAN-MS image pairs (scene recognition) as well as on a benchmark hyperspectral image dataset (image classification) where we follow different experimental scenarios and find that the proposed hallucination based module indeed is capable of capturing the multi-source information, albeit the explicit absence of some of the sensor information, and aid in improved scene characterization." + pub_date: { + seconds: 1566950400 + } + authors: "Saurabh Kumar" + authors: "Biplab Banerjee" + authors: "Subhasis Chaudhuri" + } + video: { + video_id: "KdRo7ATNs9g" + video_title: "PR-025: Learning with side information through modality hallucination (2016)" + number_of_likes: 18 + number_of_views: 1823 + published_date: { + seconds: 1500818803 + } + uploader: "Terry TaeWoong Um" + } + } +} +pr_id_to_video: { + key: 26 + value: { + pr_id: 26 + papers: { + paper_id: "u-net-convolutional-networks-for-biomedical" + title: "U-Net: Convolutional Networks for Biomedical Image Segmentation" + arxiv_id: "1505.04597" + abstract: "There is large consent that successful training of deep networks requires\nmany thousand annotated training samples. In this paper, we present a network\nand training strategy that relies on the strong use of data augmentation to use\nthe available annotated samples more efficiently. The architecture consists of\na contracting path to capture context and a symmetric expanding path that\nenables precise localization. We show that such a network can be trained\nend-to-end from very few images and outperforms the prior best method (a\nsliding-window convolutional network) on the ISBI challenge for segmentation of\nneuronal structures in electron microscopic stacks. Using the same network\ntrained on transmitted light microscopy images (phase contrast and DIC) we won\nthe ISBI cell tracking challenge 2015 in these categories by a large margin.\nMoreover, the network is fast. Segmentation of a 512x512 image takes less than\na second on a recent GPU. The full implementation (based on Caffe) and the\ntrained networks are available at\nhttp://lmb.informatik.uni-freiburg.de/people/ronneber/u-net ." + pub_date: { + seconds: 1431907200 + } + authors: "Olaf Ronneberger" + authors: "Philipp Fischer" + authors: "Thomas Brox" + repositories: { + url: "https://github.com/salem-devloper/COVID-Lung-Segment" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/mateuszbuda/brain-segmentation-pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 409 + description: "U-Net implementation in PyTorch for FLAIR abnormality segmentation in brain MRI" + } + repositories: { + url: "https://github.com/taha7ussein007/Papers_Implementation/tree/main/Paper_Implementation_From_Scratch/UNet_FromScratch_Pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "In this repo i'm going to practice implementing research, academic and business papers." + } + repositories: { + url: "https://github.com/ayushdabra/dubai-satellite-imagery-segmentation" + framework: FRAMEWORK_TENSORFLOW + description: "Multi-Class Semantic Segmentation on Dubai's Satellite Images." + } + repositories: { + url: "https://github.com/SahinTiryaki/Brain-tumor-segmentation-Vgg19UNet" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Brain tumor segmentation was performed using the Tensorflow Keras api." + } + repositories: { + url: "https://github.com/sagnik1511/U-Net-Reduced-with-keras" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 9 + description: "Complete U-net Implementation with keras" + } + repositories: { + url: "https://github.com/osmr/imgclsmob" + framework: FRAMEWORK_OTHERS + number_of_stars: 2202 + description: "Sandbox for training deep learning networks" + } + repositories: { + url: "https://github.com/creeper121386/vielab" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/zongyue-lu/pytorch-unet-family" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + } + repositories: { + url: "https://github.com/Aryavir07/Detecting-Brain-Tumor-Using-Deep-Learning" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Detecting Brain Tumor using Res-U-Net architecture. This would reduce the cost of cancer diagnosis and help in the early diagnosis of tumors which would essentially be a life saver." + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Max Pooling" + full_name: "Max Pooling" + description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + methods: { + name: "U-Net" + full_name: "U-Net" + description: "**U-Net** is an architecture for semantic segmentation. It consists of a contracting path and an expansive path. The contracting path follows the typical architecture of a convolutional network. It consists of the repeated application of two 3x3 convolutions (unpadded convolutions), each followed by a rectified linear unit (ReLU) and a 2x2 max pooling operation with stride 2 for downsampling. At each downsampling step we double the number of feature channels. Every step in the expansive path consists of an upsampling of the feature map followed by a 2x2 convolution (“up-convolution”) that halves the number of feature channels, a concatenation with the correspondingly cropped feature map from the contracting path, and two 3x3 convolutions, each followed by a ReLU. The cropping is necessary due to the loss of border pixels in every convolution. At the final layer a 1x1 convolution is used to map each 64-component feature vector to the desired number of classes. In total the network has 23 convolutional layers." + } + methods: { + name: "Concatenated Skip Connection" + full_name: "Concatenated Skip Connection" + description: "A **Concatenated Skip Connection** is a type of skip connection that seeks to reuse features by concatenating them to new layers, allowing more information to be retained from previous layers of the network. This contrasts with say, residual connections, where element-wise summation is used instead to incorporate information from previous layers. This type of skip connection is prominently used in DenseNets (and also Inception networks), which the Figure to the right illustrates." + } + } + video: { + video_id: "ZdPBkPGfRSk" + video_title: "PR-026: Notes for CVPR Machine Learning Session" + number_of_likes: 9 + number_of_views: 1502 + published_date: { + seconds: 1501469470 + } + uploader: "Taegyun Jeon" + } + } +} +pr_id_to_video: { + key: 27 + value: { + pr_id: 27 + papers: { + paper_id: "linking-glove-with-word2vec" + title: "Linking GloVe with word2vec" + arxiv_id: "1411.5595" + abstract: "The Global Vectors for word representation (GloVe), introduced by Jeffrey\nPennington et al. is reported to be an efficient and effective method for\nlearning vector representations of words. State-of-the-art performance is also\nprovided by skip-gram with negative-sampling (SGNS) implemented in the word2vec\ntool. In this note, we explain the similarities between the training objectives\nof the two models, and show that the objective of SGNS is similar to the\nobjective of a specialized form of GloVe, though their cost functions are\ndefined differently." + pub_date: { + seconds: 1416441600 + } + authors: "Tianze Shi" + authors: "Zhiyuan Liu" + methods: { + name: "GloVe" + full_name: "GloVe Embeddings" + description: "**GloVe Embeddings** are a type of word embedding that encode the co-occurrence probability ratio between two words as vector differences. GloVe uses a weighted least squares objective $J$ that minimizes the difference between the dot product of the vectors of two words and the logarithm of their number of co-occurrences:\r\n\r\n$$ J=\\sum\\_{i, j=1}^{V}f\\left(𝑋\\_{i j}\\right)(w^{T}\\_{i}\\tilde{w}_{j} + b\\_{i} + \\tilde{b}\\_{j} - \\log{𝑋}\\_{ij})^{2} $$\r\n\r\nwhere $w\\_{i}$ and $b\\_{i}$ are the word vector and bias respectively of word $i$, $\\tilde{w}_{j}$ and $b\\_{j}$ are the context word vector and bias respectively of word $k$, $X\\_{ij}$ is the number of times word $i$ occurs in the context of word $j$, and $f$ is a weighting function that assigns lower weights to rare and frequent co-occurrences." + } + } + video: { + video_id: "uZ2GtEe-50E" + video_title: "PR-027:GloVe - Global vectors for word representation" + number_of_likes: 65 + number_of_views: 4316 + published_date: { + seconds: 1502026123 + } + uploader: "keun bong Kwak" + } + } +} +pr_id_to_video: { + key: 28 + value: { + pr_id: 28 + papers: { + paper_id: "densely-connected-convolutional-networks" + title: "Densely Connected Convolutional Networks" + arxiv_id: "1608.06993" + abstract: "Recent work has shown that convolutional networks can be substantially\ndeeper, more accurate, and efficient to train if they contain shorter\nconnections between layers close to the input and those close to the output. In\nthis paper, we embrace this observation and introduce the Dense Convolutional\nNetwork (DenseNet), which connects each layer to every other layer in a\nfeed-forward fashion. Whereas traditional convolutional networks with L layers\nhave L connections - one between each layer and its subsequent layer - our\nnetwork has L(L+1)/2 direct connections. For each layer, the feature-maps of\nall preceding layers are used as inputs, and its own feature-maps are used as\ninputs into all subsequent layers. DenseNets have several compelling\nadvantages: they alleviate the vanishing-gradient problem, strengthen feature\npropagation, encourage feature reuse, and substantially reduce the number of\nparameters. We evaluate our proposed architecture on four highly competitive\nobject recognition benchmark tasks (CIFAR-10, CIFAR-100, SVHN, and ImageNet).\nDenseNets obtain significant improvements over the state-of-the-art on most of\nthem, whilst requiring less computation to achieve high performance. Code and\npre-trained models are available at https://github.com/liuzhuang13/DenseNet ." + pub_date: { + seconds: 1472083200 + } + authors: "Gao Huang" + authors: "Zhuang Liu" + authors: "Laurens van der Maaten" + authors: "Kilian Q. Weinberger" + repositories: { + url: "https://github.com/Duplums/bhb10k-dl-benchmark" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "A Reproducible Benchmark for CNN Models on the BHB-10K Dataset" + } + repositories: { + url: "https://github.com/priyavrat-misra/xrays-and-gradcam" + framework: FRAMEWORK_PYTORCH + number_of_stars: 14 + description: "Classification and Gradient-based Localization of Chest Radiographs using PyTorch." + } + repositories: { + url: "https://github.com/cmasch/densenet" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 38 + description: "Implementation of Densely Connected Convolutional Network with Keras and TensorFlow." + } + repositories: { + url: "https://github.com/lpirola13/flower-recognizer" + framework: FRAMEWORK_TENSORFLOW + description: "This project aims to create a deep learning model suitable in a mobile context that can recognize flowers from images." + } + repositories: { + url: "https://github.com/bozliu/E2E-Keyword-Spotting" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Wake-Up Keyword Detection With End To End Deep Neural Networks" + } + repositories: { + url: "https://github.com/pytorch/vision" + framework: FRAMEWORK_PYTORCH + number_of_stars: 9309 + description: "Datasets, Transforms and Models specific to Computer Vision" + } + repositories: { + url: "https://github.com/lpirola13/flower_recognizer" + framework: FRAMEWORK_TENSORFLOW + description: "This project aims to create a deep learning model suitable in a mobile context that can recognize flowers from images." + } + repositories: { + url: "https://github.com/osmr/imgclsmob" + framework: FRAMEWORK_OTHERS + number_of_stars: 2202 + description: "Sandbox for training deep learning networks" + } + repositories: { + url: "https://github.com/niranjana98/Image-Classification" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/PaddlePaddle/PaddleClas" + framework: FRAMEWORK_OTHERS + number_of_stars: 1547 + description: "A treasure chest for visual recognition powered by PaddlePaddle" + } + methods: { + name: "Nesterov Accelerated Gradient" + full_name: "Nesterov Accelerated Gradient" + description: "**Nesterov Accelerated Gradient** is a momentum-based SGD optimizer that \"looks ahead\" to where the parameters will be to calculate the gradient **ex post** rather than **ex ante**:\r\n\r\n$$ v\\_{t} = \\gamma{v}\\_{t-1} + \\eta\\nabla\\_{\\theta}J\\left(\\theta-\\gamma{v\\_{t-1}}\\right) $$\r\n$$\\theta\\_{t} = \\theta\\_{t-1} + v\\_{t}$$\r\n\r\nLike SGD with momentum $\\gamma$ is usually set to $0.9$.\r\n\r\nThe intuition is that the [standard momentum](https://paperswithcode.com/method/sgd-with-momentum) method first computes the gradient at the current location and then takes a big jump in the direction of the updated accumulated gradient. In contrast Nesterov momentum first makes a big jump in the direction of the previous accumulated gradient and then measures the gradient where it ends up and makes a correction. The idea being that it is better to correct a mistake after you have made it. \r\n\r\nImage Source: [Geoff Hinton lecture notes](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)" + } + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "Concatenated Skip Connection" + full_name: "Concatenated Skip Connection" + description: "A **Concatenated Skip Connection** is a type of skip connection that seeks to reuse features by concatenating them to new layers, allowing more information to be retained from previous layers of the network. This contrasts with say, residual connections, where element-wise summation is used instead to incorporate information from previous layers. This type of skip connection is prominently used in DenseNets (and also Inception networks), which the Figure to the right illustrates." + } + methods: { + name: "DenseNet" + full_name: "DenseNet" + description: "A **DenseNet** is a type of convolutional neural network that utilises dense connections between layers, through [Dense Blocks](http://www.paperswithcode.com/method/dense-block), where we connect *all layers* (with matching feature-map sizes) directly with each other. To preserve the feed-forward nature, each layer obtains additional inputs from all preceding layers and passes on its own feature-maps to all subsequent layers." + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Max Pooling" + full_name: "Max Pooling" + description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" + } + methods: { + name: "Weight Decay" + full_name: "Weight Decay" + description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + } + } + video: { + video_id: "fe2Vn0mwALI" + video_title: "PR-028: Densely Connected Convolutional Networks (CVPR 2017, Best Paper Award) by Gao Huang et al." + number_of_likes: 124 + number_of_views: 12477 + published_date: { + seconds: 1502159004 + } + uploader: "Sung Kim" + } + } +} +pr_id_to_video: { + key: 29 + value: { + pr_id: 29 + papers: { + paper_id: "apprenticeship-learning-using-inverse" + title: "Apprenticeship Learning using Inverse Reinforcement Learning and Gradient Methods" + arxiv_id: "1206.5264" + abstract: "In this paper we propose a novel gradient algorithm to learn a policy from an expert's observed behavior assuming that the expert behaves optimally with respect to some unknown reward function of a Markovian Decision Problem. The algorithm's aim is to find a reward function such that the resulting optimal policy matches well the expert's observed behavior. The main difficulty is that the mapping from the parameters to policies is both nonsmooth and highly redundant. Resorting to subdifferentials solves the first difficulty, while the second one is over- come by computing natural gradients. We tested the proposed method in two artificial domains and found it to be more reliable and efficient than some previous methods." + pub_date: { + seconds: 1340150400 + } + authors: "Gergely Neu" + authors: "Csaba Szepesvari" + } + video: { + video_id: "AXi4s3aFN6M" + video_title: "PR-029: Apprenticeship Learning via Inverse Reinforcement Learning" + number_of_likes: 17 + number_of_views: 2086 + published_date: { + seconds: 1505165154 + } + uploader: "Suh Kiho" + } + } +} +pr_id_to_video: { + key: 30 + value: { + pr_id: 30 + papers: { + paper_id: "photo-realistic-single-image-super-resolution" + title: "Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network" + arxiv_id: "1609.04802" + abstract: "Despite the breakthroughs in accuracy and speed of single image\nsuper-resolution using faster and deeper convolutional neural networks, one\ncentral problem remains largely unsolved: how do we recover the finer texture\ndetails when we super-resolve at large upscaling factors? The behavior of\noptimization-based super-resolution methods is principally driven by the choice\nof the objective function. Recent work has largely focused on minimizing the\nmean squared reconstruction error. The resulting estimates have high peak\nsignal-to-noise ratios, but they are often lacking high-frequency details and\nare perceptually unsatisfying in the sense that they fail to match the fidelity\nexpected at the higher resolution. In this paper, we present SRGAN, a\ngenerative adversarial network (GAN) for image super-resolution (SR). To our\nknowledge, it is the first framework capable of inferring photo-realistic\nnatural images for 4x upscaling factors. To achieve this, we propose a\nperceptual loss function which consists of an adversarial loss and a content\nloss. The adversarial loss pushes our solution to the natural image manifold\nusing a discriminator network that is trained to differentiate between the\nsuper-resolved images and original photo-realistic images. In addition, we use\na content loss motivated by perceptual similarity instead of similarity in\npixel space. Our deep residual network is able to recover photo-realistic\ntextures from heavily downsampled images on public benchmarks. An extensive\nmean-opinion-score (MOS) test shows hugely significant gains in perceptual\nquality using SRGAN. The MOS scores obtained with SRGAN are closer to those of\nthe original high-resolution images than to those obtained with any\nstate-of-the-art method." + pub_date: { + seconds: 1473897600 + } + authors: "Christian Ledig" + authors: "Lucas Theis" + authors: "Ferenc Huszar" + authors: "Jose Caballero" + authors: "Andrew Cunningham" + authors: "Alejandro Acosta" + authors: "Andrew Aitken" + authors: "Alykhan Tejani" + authors: "Johannes Totz" + authors: "Zehan Wang" + authors: "Wenzhe Shi" + repositories: { + url: "https://github.com/chaoxu0512/Pushbroom-satellite-image-SRGAN" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/omkarghugarkar007/Neural_Super_Sampling" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "An attempt to upsample images by a factor of 4 using GAN" + } + repositories: { + url: "https://github.com/AntonioAlgaida/Edge.SRGAN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "A solution of SISR that merges the ideas of SRGAN and Edge Informed SISR. This solution was presented on 1st SpainAI hackathon obtain 4th position." + } + repositories: { + url: "https://github.com/Idelcads/Super_Resolution_overview" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/Idelcads/IMKI_Technical_test" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/akanametov/SuperResolution" + framework: FRAMEWORK_PYTORCH + description: "A SuperResolution GAN trained on STL10 dataset" + } + repositories: { + url: "https://github.com/akanametov/Pix2Pix" + framework: FRAMEWORK_PYTORCH + description: "A Pix2Pix GAN trained on Facades dataset" + } + repositories: { + url: "https://github.com/TanyaChutani/Image-Super-Resolution-SRGAN-TF2.0" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + description: "A Tensorflow2.0 implementation of Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network" + } + repositories: { + url: "https://github.com/BearNinja123/esrGAN_vBearNinja123" + framework: FRAMEWORK_TENSORFLOW + description: "My implementation of the srGAN and esrGAN models." + } + repositories: { + url: "https://github.com/wkhademi/ImageEnhancement" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 37 + description: "Various models for handling underexposure, overexposure, super-resolution, shadow removal, etc." + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "VGG" + full_name: "VGG" + description: "**VGG** is a classical convolutional neural network architecture. It was based on an analysis of how to increase the depth of such networks. The network utilises small 3 x 3 filters. Otherwise the network is characterized by its simplicity: the only other components being pooling layers and a fully connected layer.\r\n\r\nImage: [Davi Frossard](https://www.cs.toronto.edu/frossard/post/vgg16/)" + } + methods: { + name: "SRGAN Residual Block" + full_name: "SRGAN Residual Block" + description: "**SRGAN Residual Block** is a residual block used in the [SRGAN](https://paperswithcode.com/method/srgan#) generator for image super-resolution. It is similar to standard [residual blocks](https://paperswithcode.com/method/residual-block), although it uses a [PReLU](https://paperswithcode.com/method/prelu) activation function to help training (preventing sparse gradients during GAN training)." + } + methods: { + name: "Adam" + full_name: "Adam" + description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + } + methods: { + name: "VGG Loss" + full_name: "VGG Loss" + description: "**VGG Loss** is a type of content loss intorduced in the [Perceptual Losses for Real-Time Style Transfer and Super-Resolution](https://paperswithcode.com/paper/perceptual-losses-for-real-time-style) super-resolution and style transfer framework. It is an alternative to pixel-wise losses; VGG Loss attempts to be closer to perceptual similarity. The VGG loss is based on the ReLU activation layers of the pre-trained 19 layer VGG network. With $\\phi\\_{i,j}$ we indicate the feature map obtained by the $j$-th convolution (after activation) before the $i$-th maxpooling layer within the VGG19 network, which we consider given. We then define the VGG loss as the euclidean distance between the feature representations of a reconstructed image $G\\_{\\theta\\_{G}}\\left(I^{LR}\\right)$ and the reference image $I^{HR}$:\r\n\r\n$$ l\\_{VGG/i.j} = \\frac{1}{W\\_{i,j}H\\_{i,j}}\\sum\\_{x=1}^{W\\_{i,j}}\\sum\\_{y=1}^{H\\_{i,j}}\\left(\\phi\\_{i,j}\\left(I^{HR}\\right)\\_{x, y} - \\phi\\_{i,j}\\left(G\\_{\\theta\\_{G}}\\left(I^{LR}\\right)\\right)\\_{x, y}\\right)^{2}$$ \r\n\r\nHere $W\\_{i,j}$ and $H\\_{i,j}$ describe the dimensions of the respective feature maps within the VGG network." + } + methods: { + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + } + methods: { + name: "Batch Normalization" + full_name: "Batch Normalization" + description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." + } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "SRGAN" + full_name: "SRGAN" + description: "**SRGAN** is a generative adversarial network for single image super-resolution. It uses a perceptual loss function which consists of an adversarial loss and a content loss. The adversarial loss pushes the solution to the natural image manifold using a discriminator network that is trained to differentiate between the super-resolved images and original photo-realistic images. In addition, the authors use a content loss motivated by perceptual similarity instead of similarity in pixel space. The actual networks - depicted in the Figure to the right - consist mainly of residual blocks for feature extraction.\r\n\r\nFormally we write the perceptual loss function as a weighted sum of a (VGG) content loss $l^{SR}\\_{X}$ and an adversarial loss component $l^{SR}\\_{Gen}$:\r\n\r\n$$ l^{SR} = l^{SR}\\_{X} + 10^{-3}l^{SR}\\_{Gen} $$" + } + } + video: { + video_id: "nGPMKnoJTcI" + video_title: "PR-030: Photo-Realistic Single Image Super Resolution Using a Generative Adversarial Network" + number_of_likes: 24 + number_of_views: 2797 + published_date: { + seconds: 1502636018 + } + uploader: "Seungil Kim" + } + } +} +pr_id_to_video: { + key: 31 + value: { + pr_id: 31 + papers: { + paper_id: "learning-to-learn-by-gradient-descent-by" + title: "Learning to learn by gradient descent by gradient descent" + arxiv_id: "1606.04474" + abstract: "The move from hand-designed features to learned features in machine learning\nhas been wildly successful. In spite of this, optimization algorithms are still\ndesigned by hand. In this paper we show how the design of an optimization\nalgorithm can be cast as a learning problem, allowing the algorithm to learn to\nexploit structure in the problems of interest in an automatic way. Our learned\nalgorithms, implemented by LSTMs, outperform generic, hand-designed competitors\non the tasks for which they are trained, and also generalize well to new tasks\nwith similar structure. We demonstrate this on a number of tasks, including\nsimple convex problems, training neural networks, and styling images with\nneural art." + pub_date: { + seconds: 1465862400 + } + authors: "Marcin Andrychowicz" + authors: "Misha Denil" + authors: "Sergio Gomez" + authors: "Matthew W. Hoffman" + authors: "David Pfau" + authors: "Tom Schaul" + authors: "Brendan Shillingford" + authors: "Nando de Freitas" + repositories: { + is_official: true + url: "https://github.com/deepmind/learning-to-learn" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4020 + description: "Learning to Learn in TensorFlow" + } + repositories: { + url: "https://github.com/chenwydj/learning-to-learn-by-gradient-descent-by-gradient-descent" + framework: FRAMEWORK_PYTORCH + number_of_stars: 31 + description: "Pytorch version of NIPS'16 \"Learning to learn by gradient descent by gradient descent\"" + } + repositories: { + url: "https://github.com/yangsenius/learning-to-learn-by-pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 35 + description: "\"Learning to learn by gradient descent by gradient descent \"by PyTorch -- a simple re-implementation." + } + } + video: { + video_id: "p55H46RiZ6k" + video_title: "PR-031: Learning to learn by gradient descent by gradient descent" + number_of_likes: 16 + number_of_views: 2375 + published_date: { + seconds: 1504453983 + } + uploader: "차준범" + } + } +} +pr_id_to_video: { + key: 32 + value: { + pr_id: 32 + papers: { + paper_id: "deep-visual-semantic-alignments-for" + title: "Deep Visual-Semantic Alignments for Generating Image Descriptions" + arxiv_id: "1412.2306" + abstract: "We present a model that generates natural language descriptions of images and\ntheir regions. Our approach leverages datasets of images and their sentence\ndescriptions to learn about the inter-modal correspondences between language\nand visual data. Our alignment model is based on a novel combination of\nConvolutional Neural Networks over image regions, bidirectional Recurrent\nNeural Networks over sentences, and a structured objective that aligns the two\nmodalities through a multimodal embedding. We then describe a Multimodal\nRecurrent Neural Network architecture that uses the inferred alignments to\nlearn to generate novel descriptions of image regions. We demonstrate that our\nalignment model produces state of the art results in retrieval experiments on\nFlickr8K, Flickr30K and MSCOCO datasets. We then show that the generated\ndescriptions significantly outperform retrieval baselines on both full images\nand on a new dataset of region-level annotations." + pub_date: { + seconds: 1417910400 + } + authors: "Andrej Karpathy" + authors: "Li Fei-Fei" + repositories: { + url: "https://github.com/IzabelaKrupinska/PROJBAD" + framework: FRAMEWORK_OTHERS + description: "Pliki do projektu badawczego." + } + repositories: { + url: "https://github.com/VinitSR7/Image-Caption-Generation" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 12 + description: "Image Captioning: Implementing the Neural Image Caption Generator" + } + } + video: { + video_id: "Q-Cm7nw85iE" + video_title: "PR-032: Deep Visual-Semantic Alignments for Generating Image Descriptions" + number_of_likes: 13 + number_of_views: 2031 + published_date: { + seconds: 1504445734 + } + uploader: "Jiyang Kang" + } + } +} +pr_id_to_video: { + key: 33 + value: { + pr_id: 33 + papers: { + paper_id: "pvanet-lightweight-deep-neural-networks-for" + title: "PVANet: Lightweight Deep Neural Networks for Real-time Object Detection" + arxiv_id: "1611.08588" + abstract: "In object detection, reducing computational cost is as important as improving\naccuracy for most practical usages. This paper proposes a novel network\nstructure, which is an order of magnitude lighter than other state-of-the-art\nnetworks while maintaining the accuracy. Based on the basic principle of more\nlayers with less channels, this new deep neural network minimizes its\nredundancy by adopting recent innovations including C.ReLU and Inception\nstructure. We also show that this network can be trained efficiently to achieve\nsolid results on well-known object detection benchmarks: 84.9% and 84.2% mAP on\nVOC2007 and VOC2012 while the required compute is less than 10% of the recent\nResNet-101." + pub_date: { + seconds: 1479859200 + } + authors: "Sanghoon Hong" + authors: "Byungseok Roh" + authors: "Kye-Hyeon Kim" + authors: "Yeongjae Cheon" + authors: "Minje Park" + repositories: { + is_official: true + url: "https://github.com/sanghoon/pva-faster-rcnn" + framework: FRAMEWORK_OTHERS + number_of_stars: 656 + description: "Demo code for PVANet" + } + repositories: { + url: "https://github.com/busyboxs/Some-resources-useful-for-me" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + } + repositories: { + url: "https://github.com/jeffshih/autoTrain" + framework: FRAMEWORK_OTHERS + description: "training tool for faster rcnn" + } + repositories: { + url: "https://github.com/wuyx/pva-faster-rcnn" + framework: FRAMEWORK_OTHERS + } + } + video: { + video_id: "TYDGTnxUGHQ" + video_title: "PR-033: PVANet: Lightweight Deep Neural Networks for Real-time Object Detection" + number_of_likes: 25 + number_of_views: 3382 + published_date: { + seconds: 1504446966 + } + uploader: "JinWon Lee" + } + } +} +pr_id_to_video: { + key: 34 + value: { + pr_id: 34 + papers: { + paper_id: "xception-deep-learning-with-depthwise" + title: "Xception: Deep Learning with Depthwise Separable Convolutions" + arxiv_id: "1610.02357" + abstract: "We present an interpretation of Inception modules in convolutional neural\nnetworks as being an intermediate step in-between regular convolution and the\ndepthwise separable convolution operation (a depthwise convolution followed by\na pointwise convolution). In this light, a depthwise separable convolution can\nbe understood as an Inception module with a maximally large number of towers.\nThis observation leads us to propose a novel deep convolutional neural network\narchitecture inspired by Inception, where Inception modules have been replaced\nwith depthwise separable convolutions. We show that this architecture, dubbed\nXception, slightly outperforms Inception V3 on the ImageNet dataset (which\nInception V3 was designed for), and significantly outperforms Inception V3 on a\nlarger image classification dataset comprising 350 million images and 17,000\nclasses. Since the Xception architecture has the same number of parameters as\nInception V3, the performance gains are not due to increased capacity but\nrather to a more efficient use of model parameters." + pub_date: { + seconds: 1475798400 + } + authors: "François Chollet" + repositories: { + url: "https://github.com/osmr/imgclsmob" + framework: FRAMEWORK_OTHERS + number_of_stars: 2202 + description: "Sandbox for training deep learning networks" + } + repositories: { + url: "https://github.com/tensorflow/models/tree/master/research/deeplab" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 70341 + description: "Models and examples built with TensorFlow" + } + repositories: { + url: "https://github.com/rwightman/pytorch-image-models" + framework: FRAMEWORK_PYTORCH + number_of_stars: 11097 + description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" + } + repositories: { + url: "https://github.com/PaddlePaddle/PaddleClas" + framework: FRAMEWORK_OTHERS + number_of_stars: 1547 + description: "A treasure chest for visual recognition powered by PaddlePaddle" + } + repositories: { + url: "https://github.com/amogh7joshi/engagement-detection" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 6 + description: "Engagement Detection, including facial detection and emotion recognition, using CNNs/LSTMs." + } + repositories: { + url: "https://github.com/amogh7joshi/fer" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 6 + description: "Engagement Detection, including facial detection and emotion recognition, using CNNs/LSTMs." + } + repositories: { + url: "https://github.com/ced-kin/dog-breed-ai" + framework: FRAMEWORK_TENSORFLOW + description: "android application for classifying dog breeds" + } + repositories: { + url: "https://github.com/krishnakarthi/COVID-19_Prediction" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Diagnose the COVID-19 from patient’s chest X-ray's using Convolution Neural Networks (CNN) Deep Transfer Learning technique in Azure ML workspace" + } + repositories: { + url: "https://github.com/bluejurand/Photos-colorization" + framework: FRAMEWORK_TENSORFLOW + description: "Keras repository which colorize black-white images." + } + repositories: { + url: "https://github.com/zotrick/Pneumonia_classification_Xception" + framework: FRAMEWORK_TENSORFLOW + description: "This projects uses Xception CNN for pneumonia classification with competitive results." + } + methods: { + name: "Pointwise Convolution" + full_name: "Pointwise Convolution" + description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "Max Pooling" + full_name: "Max Pooling" + description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" + } + methods: { + name: "Weight Decay" + full_name: "Weight Decay" + description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + methods: { + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + } + methods: { + name: "Step Decay" + full_name: "Step Decay" + description: "**Step Decay** is a learning rate schedule that drops the learning rate by a factor every few epochs, where the number of epochs is a hyperparameter.\r\n\r\nImage Credit: [Suki Lau](https://towardsdatascience.com/learning-rate-schedules-and-adaptive-learning-rate-methods-for-deep-learning-2c8f433990d1)" + } + methods: { + name: "SGD with Momentum" + full_name: "SGD with Momentum" + description: "**SGD with Momentum** is a stochastic optimization method that adds a momentum term to regular stochastic gradient descent:\r\n\r\n$$v\\_{t} = \\gamma{v}\\_{t-1} + \\eta\\nabla\\_{\\theta}J\\left(\\theta\\right)$$\r\n$$\\theta\\_{t} = \\theta\\_{t-1} - v\\_{t} $$\r\n\r\nA typical value for $\\gamma$ is $0.9$. The momentum name comes from an analogy to physics, such as ball accelerating down a slope. In the case of weight updates, we can think of the weights as a particle traveling through parameter space which incurs acceleration from the gradient of the loss.\r\n\r\nImage Source: [Juan Du](https://www.researchgate.net/figure/The-compare-of-the-SGD-algorithms-with-and-without-momentum-Take-Task-1-as-example-The_fig1_333469047)" + } + } + video: { + video_id: "V0dLhyg5_Dw" + video_title: "PR-034: Inception and Xception" + number_of_likes: 79 + number_of_views: 10208 + published_date: { + seconds: 1505052461 + } + uploader: "Jaejun Yoo" + } + } +} +pr_id_to_video: { + key: 35 + value: { + pr_id: 35 + papers: { + paper_id: "understanding-black-box-predictions-via" + title: "Understanding Black-box Predictions via Influence Functions" + arxiv_id: "1703.04730" + abstract: "How can we explain the predictions of a black-box model? In this paper, we use influence functions -- a classic technique from robust statistics -- to trace a model's prediction through the learning algorithm and back to its training data, thereby identifying training points most responsible for a given prediction. To scale up influence functions to modern machine learning settings, we develop a simple, efficient implementation that requires only oracle access to gradients and Hessian-vector products. We show that even on non-convex and non-differentiable models where the theory breaks down, approximations to influence functions can still provide valuable information. On linear models and convolutional neural networks, we demonstrate that influence functions are useful for multiple purposes: understanding model behavior, debugging models, detecting dataset errors, and even creating visually-indistinguishable training-set attacks." + pub_date: { + seconds: 1489449600 + } + authors: "Pang Wei Koh" + authors: "Percy Liang" + repositories: { + url: "https://github.com/4pygmalion/Federated_learning-filtering-non-influence-data" + framework: FRAMEWORK_TENSORFLOW + description: "Federated learning with influence function" + } + repositories: { + url: "https://github.com/nimarb/pytorch_influence_functions" + framework: FRAMEWORK_PYTORCH + number_of_stars: 121 + description: "This is a PyTorch reimplementation of Influence Functions from the ICML2017 best paper: Understanding Black-box Predictions via Influence Functions by Pang Wei Koh and Percy Liang." + } + repositories: { + url: "https://github.com/kohpangwei/influence-release" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 581 + } + repositories: { + is_official: true + url: "https://worksheets.codalab.org/worksheets/0x2b314dc3536b482dbba02783a24719fd" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/Timothy-Ye/example-based-explanation" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 3 + description: "CST Part II Project: Example Based Explanation in Machine Learning" + } + repositories: { + url: "https://github.com/Shmoo137/Interpretable-Phase-Classification" + framework: FRAMEWORK_PYTORCH + number_of_stars: 8 + description: "The repository accompanying the research paper \"Phase Detection with Neural Networks: Interpreting the Black Box\" by A. Dawid, P. Huembeli, M. Tomza, M. Lewenstein, and A. Dauphin" + } + repositories: { + url: "https://github.com/TooTouch/WhiteBox-Part2" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5 + description: "The White Box Project is a project that introduces many ways to solve the part of the black box of machine learning. This project is based on Interpretable Machine Learning by Christoph Molnar. I recommend you to read the book first and practice this project." + } + repositories: { + url: "https://github.com/bsharchilev/influence_boosting" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 54 + description: "Supporting code for the paper \"Finding Influential Training Samples for Gradient Boosted Decision Trees\"" + } + repositories: { + url: "https://github.com/ShinKyuY/Understanding-Black-box-Predictions-via-Influence-Functions-tutorial-MNIST-7-vs-1-Classification" + framework: FRAMEWORK_OTHERS + number_of_stars: 8 + description: "Tiny Tutorial on https://arxiv.org/abs/1703.04730" + } + repositories: { + url: "https://github.com/darkonhub/darkon" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 234 + description: "Toolkit to Hack Your Deep Learning Models" + } + } + video: { + video_id: "xlmlY8WHjkU" + video_title: "PR-035: Understanding Black-box Predictions via Influence Functions (2017)" + number_of_likes: 26 + number_of_views: 3395 + published_date: { + seconds: 1505051523 + } + uploader: "Terry TaeWoong Um" + } + } +} +pr_id_to_video: { + key: 36 + value: { + pr_id: 36 + papers: { + paper_id: "learning-to-remember-rare-events" + title: "Learning to Remember Rare Events" + arxiv_id: "1703.03129" + abstract: "Despite recent advances, memory-augmented deep neural networks are still\nlimited when it comes to life-long and one-shot learning, especially in\nremembering rare events. We present a large-scale life-long memory module for\nuse in deep learning. The module exploits fast nearest-neighbor algorithms for\nefficiency and thus scales to large memory sizes. Except for the\nnearest-neighbor query, the module is fully differentiable and trained\nend-to-end with no extra supervision. It operates in a life-long manner, i.e.,\nwithout the need to reset it during training.\n Our memory module can be easily added to any part of a supervised neural\nnetwork. To show its versatility we add it to a number of networks, from simple\nconvolutional ones tested on image classification to deep sequence-to-sequence\nand recurrent-convolutional models. In all cases, the enhanced network gains\nthe ability to remember and do life-long one-shot learning. Our module\nremembers training examples shown many thousands of steps in the past and it\ncan successfully generalize from them. We set new state-of-the-art for one-shot\nlearning on the Omniglot dataset and demonstrate, for the first time, life-long\none-shot learning in recurrent neural networks on a large-scale machine\ntranslation task." + pub_date: { + seconds: 1489017600 + } + authors: "Łukasz Kaiser" + authors: "Ofir Nachum" + authors: "Aurko Roy" + authors: "Samy Bengio" + repositories: { + is_official: true + url: "https://github.com/tensorflow/models" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 70339 + description: "Models and examples built with TensorFlow" + } + } + video: { + video_id: "S_fbBYbXypc" + video_title: "PR-036: Learning to Remember Rare Events" + number_of_likes: 7 + number_of_views: 1488 + published_date: { + seconds: 1505657142 + } + uploader: "Taegyun Jeon" + } + } +} +pr_id_to_video: { + key: 37 + value: { + pr_id: 37 + papers: { + paper_id: "ask-me-anything-dynamic-memory-networks-for" + title: "Ask Me Anything: Dynamic Memory Networks for Natural Language Processing" + arxiv_id: "1506.07285" + abstract: "Most tasks in natural language processing can be cast into question answering\n(QA) problems over language input. We introduce the dynamic memory network\n(DMN), a neural network architecture which processes input sequences and\nquestions, forms episodic memories, and generates relevant answers. Questions\ntrigger an iterative attention process which allows the model to condition its\nattention on the inputs and the result of previous iterations. These results\nare then reasoned over in a hierarchical recurrent sequence model to generate\nanswers. The DMN can be trained end-to-end and obtains state-of-the-art results\non several types of tasks and datasets: question answering (Facebook's bAbI\ndataset), text classification for sentiment analysis (Stanford Sentiment\nTreebank) and sequence modeling for part-of-speech tagging (WSJ-PTB). The\ntraining for these different tasks relies exclusively on trained word vector\nrepresentations and input-question-answer triplets." + pub_date: { + seconds: 1435104000 + } + authors: "Ankit Kumar" + authors: "Ozan Irsoy" + authors: "Peter Ondruska" + authors: "Mohit Iyyer" + authors: "James Bradbury" + authors: "Ishaan Gulrajani" + authors: "Victor Zhong" + authors: "Romain Paulus" + authors: "Richard Socher" + repositories: { + url: "https://github.com/DongjunLee/dmn-tensorflow" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 41 + description: "TensorFlow implementation of 'Ask Me Anything: Dynamic Memory Networks for Natural Language Processing (2015)'" + } + repositories: { + url: "https://github.com/scakc/QAwiki" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "Question Answering models that can get an answer from Wikipedia pages and select a sentence subset as a reply to your question." + } + repositories: { + url: "https://github.com/navodhya/DMN" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/macco3k/deepstories" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/radiodee1/awesome-chatbot" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 13 + description: "work in progress - python Keras, Tensorflow, or Pytorch implementation of a chatbot or possibly smart-speaker" + } + repositories: { + url: "https://github.com/rgsachin/DMTN" + framework: FRAMEWORK_OTHERS + number_of_stars: 13 + } + repositories: { + url: "https://github.com/Asteur/someChatbot" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/jxz542189/dmn_plus" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Dynamic memory network tensorflow tf.data tf.estimator" + } + repositories: { + url: "https://github.com/ajenningsfrankston/Dynamic-Memory-Network-Plus-master" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/vchudinov/dynamic_memory_networks_with_keras" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5 + description: "Keras implementation of the dynamic memory networks from https://arxiv.org/pdf/1603.01417.pdf" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "GRU" + full_name: "Gated Recurrent Unit" + description: "A **Gated Recurrent Unit**, or **GRU**, is a type of recurrent neural network. It is similar to an [LSTM](https://paperswithcode.com/method/lstm), but only has two gates - a reset gate and an update gate - and notably lacks an output gate. Fewer parameters means GRUs are generally easier/faster to train than their LSTM counterparts.\r\n\r\nImage Source: [here](https://www.google.com/url?sa=i&url=https%3A%2F%2Fcommons.wikimedia.org%2Fwiki%2FFile%3AGated_Recurrent_Unit%2C_type_1.svg&psig=AOvVaw3EmNX8QXC5hvyxeenmJIUn&ust=1590332062671000&source=images&cd=vfe&ved=0CA0QjhxqFwoTCMiev9-eyukCFQAAAAAdAAAAABAR)" + } + methods: { + name: "Dynamic Memory Network" + full_name: "Dynamic Memory Network" + description: "A **Dynamic Memory Network** is a neural network architecture which processes input sequences and questions, forms episodic memories, and generates relevant answers. Questions trigger an iterative attention process which allows the model to condition its attention on the inputs and the result of previous iterations. These results are then reasoned over in a hierarchical recurrent sequence model to generate answers. \r\n\r\nThe DMN consists of a number of modules:\r\n\r\n- Input Module: The input module encodes raw text inputs from the task into distributed vector representations. The input takes forms like a sentence, a long story, a movie review and so on.\r\n- Question Module: The question module encodes the question of the task into a distributed\r\nvector representation. For question answering, the question may be a sentence such as \"Where did the author first fly?\". The representation is fed into the episodic memory module, and forms the basis, or initial state, upon which the episodic memory module iterates.\r\n- Episodic Memory Module: Given a collection of input representations, the episodic memory module chooses which parts of the inputs to focus on through the attention mechanism. It then produces a ”memory” vector representation taking into account the question as well as the previous memory. Each iteration provides the module with newly relevant information about the input. In other words,\r\nthe module has the ability to retrieve new information, in the form of input representations, which were thought to be irrelevant in previous iterations.\r\n- Answer Module: The answer module generates an answer from the final memory vector of the memory module." + } + } + video: { + video_id: "oxSrjuspQEs" + video_title: "PR-037: Ask me anything: Dynamic memory networks for natural language processing" + number_of_likes: 24 + number_of_views: 2364 + published_date: { + seconds: 1505654553 + } + uploader: "keun bong Kwak" + } + } +} +pr_id_to_video: { + key: 38 + value: { + pr_id: 38 + papers: { + paper_id: "explaining-and-harnessing-adversarial" + title: "Explaining and Harnessing Adversarial Examples" + arxiv_id: "1412.6572" + abstract: "Several machine learning models, including neural networks, consistently\nmisclassify adversarial examples---inputs formed by applying small but\nintentionally worst-case perturbations to examples from the dataset, such that\nthe perturbed input results in the model outputting an incorrect answer with\nhigh confidence. Early attempts at explaining this phenomenon focused on\nnonlinearity and overfitting. We argue instead that the primary cause of neural\nnetworks' vulnerability to adversarial perturbation is their linear nature.\nThis explanation is supported by new quantitative results while giving the\nfirst explanation of the most intriguing fact about them: their generalization\nacross architectures and training sets. Moreover, this view yields a simple and\nfast method of generating adversarial examples. Using this approach to provide\nexamples for adversarial training, we reduce the test set error of a maxout\nnetwork on the MNIST dataset." + pub_date: { + seconds: 1419033600 + } + authors: "Ian J. Goodfellow" + authors: "Jonathon Shlens" + authors: "Christian Szegedy" + repositories: { + url: "https://github.com/anirudh9784/Adversarial-Defense" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + } + repositories: { + url: "https://github.com/anirudh9784/Major_Project" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + } + repositories: { + url: "https://github.com/openai/cleverhans" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5156 + description: "An adversarial example library for constructing attacks, building defenses, and benchmarking both" + } + repositories: { + url: "https://github.com/cleverhans-lab/cleverhans" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5156 + description: "An adversarial example library for constructing attacks, building defenses, and benchmarking both" + } + repositories: { + url: "https://github.com/dunky11/adversarial-frontier-stitching" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 6 + description: "Implementation of \"Adversarial Frontier Stitching for Remote Neural Network Watermarking\" in TensorFlow." + } + repositories: { + url: "https://github.com/Jupetus/ExplainableAI" + framework: FRAMEWORK_PYTORCH + description: "Collection of ways to explain NN outputs" + } + repositories: { + url: "https://github.com/pwj1996/mycleverhans" + framework: FRAMEWORK_TENSORFLOW + description: "修改的cleverhans框架" + } + repositories: { + url: "https://github.com/SifatMd/Research-Papers" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/axelbrando/Mixture-Density-Networks-for-distribution-and-uncertainty-estimation" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 260 + description: "A generic Mixture Density Networks (MDN) implementation for distribution and uncertainty estimation by using Keras (TensorFlow)" + } + repositories: { + url: "https://github.com/winycg/HCGNet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 25 + description: "[AAAI-2020] Official implementations of HCGNets: Gated Convolutional Networks with Hybrid Connectivity for Image Classification" + } + } + video: { + video_id: "7hRO2bS810M" + video_title: "PR-038: Explaining and Harnessing Adversarial Examples" + number_of_likes: 7 + number_of_views: 1540 + published_date: { + seconds: 1507170279 + } + uploader: "Suh Kiho" + } + } +} +pr_id_to_video: { + key: 39 + value: { + pr_id: 39 + papers: { + paper_id: "dropout-as-a-bayesian-approximation" + title: "Dropout as a Bayesian Approximation: Representing Model Uncertainty in Deep Learning" + arxiv_id: "1506.02142" + abstract: "Deep learning tools have gained tremendous attention in applied machine\nlearning. However such tools for regression and classification do not capture\nmodel uncertainty. In comparison, Bayesian models offer a mathematically\ngrounded framework to reason about model uncertainty, but usually come with a\nprohibitive computational cost. In this paper we develop a new theoretical\nframework casting dropout training in deep neural networks (NNs) as approximate\nBayesian inference in deep Gaussian processes. A direct result of this theory\ngives us tools to model uncertainty with dropout NNs -- extracting information\nfrom existing models that has been thrown away so far. This mitigates the\nproblem of representing uncertainty in deep learning without sacrificing either\ncomputational complexity or test accuracy. We perform an extensive study of the\nproperties of dropout's uncertainty. Various network architectures and\nnon-linearities are assessed on tasks of regression and classification, using\nMNIST as an example. We show a considerable improvement in predictive\nlog-likelihood and RMSE compared to existing state-of-the-art methods, and\nfinish by using dropout's uncertainty in deep reinforcement learning." + pub_date: { + seconds: 1433548800 + } + authors: "Yarin Gal" + authors: "Zoubin Ghahramani" + repositories: { + url: "https://github.com/cdebeunne/uncertainties_CNN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "A repo for toy examples to test uncertainties estimation of neural networks" + } + repositories: { + url: "https://github.com/asharakeh/probdet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 29 + description: "Code for \"Estimating and Evaluating Regression Predictive Uncertainty in Deep Object Detectors.\" (ICLR 2021)" + } + repositories: { + url: "https://github.com/erickgalinkin/dropout_privacy" + framework: FRAMEWORK_TENSORFLOW + description: "Project repository for Drexel CS590 " + } + repositories: { + url: "https://github.com/MayarLotfy/bayesianNN" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/arodriguezca/uncertainty-ts-forecasting" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/aredier/monte_carlo_dropout" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "using monte carlo dropout to have uncertainty estimation of predictions" + } + repositories: { + url: "https://github.com/agnesdeng/misle" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Multiple imputation through statistical learning" + } + repositories: { + url: "https://github.com/gtegner/hyper-gan" + framework: FRAMEWORK_PYTORCH + description: "Uncertainty Estimation with HyperGANS in PyTorch!" + } + repositories: { + url: "https://github.com/marcovirgolin/UncertaintyEstimationInDeepNets" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "Attempt to reproduce the toy experiment of http://bit.ly/2C9Z8St with an ensemble of nets and with dropout." + } + repositories: { + url: "https://github.com/jelleman8/TractSeg" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + methods: { + name: "Monte Carlo Dropout" + full_name: "Monte Carlo Dropout" + } + methods: { + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + } + } + video: { + video_id: "aU91bDGmy7I" + video_title: "PR-039: Dropout as a Bayesian approximation" + number_of_likes: 56 + number_of_views: 5082 + published_date: { + seconds: 1508076910 + } + uploader: "차준범" + } + } +} +pr_id_to_video: { + key: 40 + value: { + pr_id: 40 + papers: { + paper_id: "wavenet-a-generative-model-for-raw-audio" + title: "WaveNet: A Generative Model for Raw Audio" + arxiv_id: "1609.03499" + abstract: "This paper introduces WaveNet, a deep neural network for generating raw audio\nwaveforms. The model is fully probabilistic and autoregressive, with the\npredictive distribution for each audio sample conditioned on all previous ones;\nnonetheless we show that it can be efficiently trained on data with tens of\nthousands of samples per second of audio. When applied to text-to-speech, it\nyields state-of-the-art performance, with human listeners rating it as\nsignificantly more natural sounding than the best parametric and concatenative\nsystems for both English and Mandarin. A single WaveNet can capture the\ncharacteristics of many different speakers with equal fidelity, and can switch\nbetween them by conditioning on the speaker identity. When trained to model\nmusic, we find that it generates novel and often highly realistic musical\nfragments. We also show that it can be employed as a discriminative model,\nreturning promising results for phoneme recognition." + pub_date: { + seconds: 1473638400 + } + authors: "Aaron van den Oord" + authors: "Sander Dieleman" + authors: "Heiga Zen" + authors: "Karen Simonyan" + authors: "Oriol Vinyals" + authors: "Alex Graves" + authors: "Nal Kalchbrenner" + authors: "Andrew Senior" + authors: "Koray Kavukcuoglu" + repositories: { + url: "https://github.com/pbrandl/aNN_Audio" + framework: FRAMEWORK_PYTORCH + description: "Digital twin of analog audio distortion devices (WavNet based)." + } + repositories: { + url: "https://github.com/ibab/tensorflow-wavenet" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5127 + description: "A TensorFlow implementation of DeepMind's WaveNet paper" + } + repositories: { + url: "https://github.com/otosense/slang" + framework: FRAMEWORK_OTHERS + number_of_stars: 3 + description: "A light weight version of Slang: Tools to build a language of sound." + } + repositories: { + url: "https://github.com/isadrtdinov/wavenet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "WaveNet vocoder implementation for speech synthesis task" + } + repositories: { + url: "https://github.com/AI-Huang/WaveNet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "Keras and PyTorch implementations for Google's WaveNet" + } + repositories: { + url: "https://github.com/stdereka/liverpool-ion-switching" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 13 + description: "Liverpool Ion Switching kaggle competition 2nd place winning solution" + } + repositories: { + url: "https://github.com/pascalbakker/WaveNet-Implementation" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Tensorflow implementation of Wavenet architecture " + } + repositories: { + url: "https://github.com/randomrandom/deep-atrous-cnn-sentiment" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 65 + description: "Deep-Atrous-CNN-Text-Network: End-to-end word level model for sentiment analysis and other text classifications" + } + repositories: { + url: "https://github.com/sriharireddypusapati/speech-to-text-wavenet2" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/thorwhalen/slang" + framework: FRAMEWORK_OTHERS + number_of_stars: 3 + description: "A light weight version of Slang: Tools to build a language of sound." + } + methods: { + name: "WaveNet" + full_name: "WaveNet" + description: "**WaveNet** is an audio generative model based on the [PixelCNN](https://paperswithcode.com/method/pixelcnn) architecture. In order to deal with long-range temporal dependencies needed for raw audio generation, architectures are developed based on dilated causal convolutions, which exhibit very large receptive fields.\r\n\r\nThe joint probability of a waveform $\\vec{x} = \\{ x_1, \\dots, x_T \\}$ is factorised as a product of conditional probabilities as follows:\r\n\r\n$$p\\left(\\vec{x}\\right) = \\prod_{t=1}^{T} p\\left(x_t \\mid x_1, \\dots ,x_{t-1}\\right)$$\r\n\r\nEach audio sample $x_t$ is therefore conditioned on the samples at all previous timesteps." + } + methods: { + name: "Dilated Causal Convolution" + full_name: "Dilated Causal Convolution" + description: "A **Dilated Causal Convolution** is a causal convolution where the filter is applied over an area larger than its length by skipping input values with a certain step. A dilated causal convolution effectively allows the network to have very large receptive fields with just a few layers." + } + methods: { + name: "Causal Convolution" + full_name: "Causal Convolution" + description: "**Causal convolutions** are a type of convolution used for temporal data which ensures the model cannot violate the ordering in which we model the data: the prediction $p(x_{t+1} | x_{1}, \\ldots, x_{t})$ emitted by the model at timestep $t$ cannot depend on any of the future timesteps $x_{t+1}, x_{t+2}, \\ldots, x_{T}$. For images, the equivalent of a causal convolution is a masked convolution which can be implemented by constructing a mask tensor and doing an element-wise multiplication of this mask with the convolution kernel before applying it. For 1-D data such as audio one can more easily implement this by shifting the output of a normal convolution by a few timesteps." + } + methods: { + name: "Mixture of Logistic Distributions" + full_name: "Mixture of Logistic Distributions" + description: "**Mixture of Logistic Distributions (MoL)** is a type of output function, and an alternative to a [softmax](https://paperswithcode.com/method/softmax) layer. Discretized logistic mixture likelihood is used in PixelCNN++ and [WaveNet](https://paperswithcode.com/method/wavenet) to predict discrete values.\r\n\r\nImage Credit: [Hao Gao](https://medium.com/@smallfishbigsea/an-explanation-of-discretized-logistic-mixture-likelihood-bdfe531751f0)" + } + } + video: { + video_id: "GyQnex_DK2k" + video_title: "PR-040: WaveNet - A Generative Model for Raw Audio" + number_of_likes: 63 + number_of_views: 7143 + published_date: { + seconds: 1508077701 + } + uploader: "Seungil Kim" + } + } +} +pr_id_to_video: { + key: 41 + value: { + pr_id: 41 + papers: { + paper_id: "show-and-tell-a-neural-image-caption" + title: "Show and Tell: A Neural Image Caption Generator" + arxiv_id: "1411.4555" + abstract: "Automatically describing the content of an image is a fundamental problem in\nartificial intelligence that connects computer vision and natural language\nprocessing. In this paper, we present a generative model based on a deep\nrecurrent architecture that combines recent advances in computer vision and\nmachine translation and that can be used to generate natural sentences\ndescribing an image. The model is trained to maximize the likelihood of the\ntarget description sentence given the training image. Experiments on several\ndatasets show the accuracy of the model and the fluency of the language it\nlearns solely from image descriptions. Our model is often quite accurate, which\nwe verify both qualitatively and quantitatively. For instance, while the\ncurrent state-of-the-art BLEU-1 score (the higher the better) on the Pascal\ndataset is 25, our approach yields 59, to be compared to human performance\naround 69. We also show BLEU-1 score improvements on Flickr30k, from 56 to 66,\nand on SBU, from 19 to 28. Lastly, on the newly released COCO dataset, we\nachieve a BLEU-4 of 27.7, which is the current state-of-the-art." + pub_date: { + seconds: 1416182400 + } + authors: "Oriol Vinyals" + authors: "Alexander Toshev" + authors: "Samy Bengio" + authors: "Dumitru Erhan" + repositories: { + url: "https://github.com/supreethub/Image-Captioning" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "A complete pipeline of Convolutional Neural Networks (CNN) and Recurrent Neural Networks (RNN) knowledge to build a deep learning model that produces captions given an input image." + } + repositories: { + url: "https://github.com/jelifysh/Image-Captioning" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 16 + description: "Implemented 3 different architectures to tackle the Image Caption problem, i.e, Merged Encoder-Decoder - Bahdanau Attention - Transformers" + } + repositories: { + url: "https://github.com/juletx/image-caption-generation" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Automatic Image Caption Generation model that uses a CNN to condition a LSTM based language model" + } + repositories: { + url: "https://github.com/Djmcflush/Quantum-Hackathon" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/sd2001/Image2Caption" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 8 + description: "🎥Image2Caption🔤: Upload an image and let the model generate a caption for you🤖." + } + repositories: { + url: "https://github.com/sd2001/Auto-Image2Caption" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 8 + description: "🎥Image2Caption🔤: Upload an image and let the model generate a caption for you🤖." + } + repositories: { + url: "https://github.com/Tamim-MR14/Image_Caption_Generator" + framework: FRAMEWORK_PYTORCH + description: "Project Done as a part of requirements of Graduation of Udacity computer Vision Nanodegree" + } + repositories: { + url: "https://github.com/simnyatsanga/image-caption-generator" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Image Caption Generators in TensorFlow and Keras" + } + repositories: { + url: "https://github.com/neerav47/Image-Captioning" + framework: FRAMEWORK_PYTORCH + number_of_stars: 7 + description: "Complete pipeline to predict captions for a given image." + } + repositories: { + url: "https://github.com/atharv6/Image-Captioning" + framework: FRAMEWORK_PYTORCH + description: "Generating Captions from Images" + } + } + video: { + video_id: "BrmCnoYhQb4" + video_title: "PR-041: Show and Tell: A Neural Image Caption Generator" + number_of_likes: 26 + number_of_views: 4494 + published_date: { + seconds: 1508678893 + } + uploader: "Jiyang Kang" + } + } +} +pr_id_to_video: { + key: 42 + value: { + pr_id: 42 + papers: { + paper_id: "adam-a-method-for-stochastic-optimization" + title: "Adam: A Method for Stochastic Optimization" + arxiv_id: "1412.6980" + abstract: "We introduce Adam, an algorithm for first-order gradient-based optimization\nof stochastic objective functions, based on adaptive estimates of lower-order\nmoments. The method is straightforward to implement, is computationally\nefficient, has little memory requirements, is invariant to diagonal rescaling\nof the gradients, and is well suited for problems that are large in terms of\ndata and/or parameters. The method is also appropriate for non-stationary\nobjectives and problems with very noisy and/or sparse gradients. The\nhyper-parameters have intuitive interpretations and typically require little\ntuning. Some connections to related algorithms, on which Adam was inspired, are\ndiscussed. We also analyze the theoretical convergence properties of the\nalgorithm and provide a regret bound on the convergence rate that is comparable\nto the best known results under the online convex optimization framework.\nEmpirical results demonstrate that Adam works well in practice and compares\nfavorably to other stochastic optimization methods. Finally, we discuss AdaMax,\na variant of Adam based on the infinity norm." + pub_date: { + seconds: 1419206400 + } + authors: "Diederik P. Kingma" + authors: "Jimmy Ba" + repositories: { + url: "https://github.com/vanyle/vlearn" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "A machine learning framework written in C++ designed for distributed computing " + } + repositories: { + url: "https://github.com/joseluis1061/neuralnilm" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Neural nilm python 3.3" + } + repositories: { + url: "https://github.com/chuiyunjun/projectCSC413" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/nnaisense/pgpelib" + framework: FRAMEWORK_PYTORCH + number_of_stars: 22 + description: "A mini library for Policy Gradients with Parameter-based Exploration, with reference implementation of the ClipUp optimizer (https://arxiv.org/abs/2008.02387) from NNAISENSE." + } + repositories: { + url: "https://github.com/lab-ml/nn/tree/master/labml_nn/optimizers" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3069 + description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." + } + repositories: { + url: "https://github.com/offscale/cdd-python" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + description: "Open API to/fro routes, models, and tests. Convert between docstrings, classes, methods, argparse, and SQLalchemy." + } + repositories: { + url: "https://github.com/SamuelMarks/doctrans" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + description: "Open API to/fro routes, models, and tests. Convert between docstrings, classes, methods, argparse, and SQLalchemy." + } + repositories: { + url: "https://github.com/safakkbilici/Academic-Paper-Title-Recommendation" + framework: FRAMEWORK_OTHERS + number_of_stars: 12 + description: "Supervised text summarization (title generation/recommendation) based on academic paper abstracts, with Seq2Seq LSTM and the power of Transfer Learning and T5." + } + repositories: { + url: "https://github.com/JaneliaSciComp/SongExplorer" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + description: "deep learning for acoustic signals" + } + repositories: { + url: "https://github.com/zhuchen03/maxva" + framework: FRAMEWORK_PYTORCH + } + methods: { + name: "Adam" + full_name: "Adam" + description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + } + methods: { + name: "AdaMax" + full_name: "AdaMax" + description: "**AdaMax** is a generalisation of [Adam](https://paperswithcode.com/method/adam) from the $l\\_{2}$ norm to the $l\\_{\\infty}$ norm. Define:\r\n\r\n$$ u\\_{t} = \\beta^{\\infty}\\_{2}v\\_{t-1} + \\left(1-\\beta^{\\infty}\\_{2}\\right)|g\\_{t}|^{\\infty}$$\r\n\r\n$$ = \\max\\left(\\beta\\_{2}\\cdot{v}\\_{t-1}, |g\\_{t}|\\right)$$\r\n\r\nWe can plug into the Adam update equation by replacing $\\sqrt{\\hat{v}_{t} + \\epsilon}$ with $u\\_{t}$ to obtain the AdaMax update rule:\r\n\r\n$$ \\theta\\_{t+1} = \\theta\\_{t} - \\frac{\\eta}{u\\_{t}}\\hat{m}\\_{t} $$\r\n\r\nCommon default values are $\\eta = 0.002$ and $\\beta\\_{1}=0.9$ and $\\beta\\_{2}=0.999$." + } + } + video: { + video_id: "KN120w3PZIA" + video_title: "PR-042: Adam: A Method for Stochastic Optimization" + number_of_likes: 39 + number_of_views: 4167 + published_date: { + seconds: 1508682336 + } + uploader: "Ji-Hoon Kim" + } + } +} +pr_id_to_video: { + key: 43 + value: { + pr_id: 43 + papers: { + paper_id: "hypernetworks" + title: "HyperNetworks" + arxiv_id: "1609.09106" + abstract: "This work explores hypernetworks: an approach of using a one network, also\nknown as a hypernetwork, to generate the weights for another network.\nHypernetworks provide an abstraction that is similar to what is found in\nnature: the relationship between a genotype - the hypernetwork - and a\nphenotype - the main network. Though they are also reminiscent of HyperNEAT in\nevolution, our hypernetworks are trained end-to-end with backpropagation and\nthus are usually faster. The focus of this work is to make hypernetworks useful\nfor deep convolutional networks and long recurrent networks, where\nhypernetworks can be viewed as relaxed form of weight-sharing across layers.\nOur main result is that hypernetworks can generate non-shared weights for LSTM\nand achieve near state-of-the-art results on a variety of sequence modelling\ntasks including character-level language modelling, handwriting generation and\nneural machine translation, challenging the weight-sharing paradigm for\nrecurrent networks. Our results also show that hypernetworks applied to\nconvolutional networks still achieve respectable results for image recognition\ntasks compared to state-of-the-art baseline models while requiring fewer\nlearnable parameters." + pub_date: { + seconds: 1474934400 + } + authors: "David Ha" + authors: "Andrew Dai" + authors: "Quoc V. Le" + repositories: { + is_official: true + url: "https://github.com/hardmaru/supercell" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 182 + description: "supercell" + } + repositories: { + url: "https://github.com/lab-ml/nn" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3070 + description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." + } + repositories: { + url: "https://github.com/gtegner/hyper-gan" + framework: FRAMEWORK_PYTORCH + description: "Uncertainty Estimation with HyperGANS in PyTorch!" + } + repositories: { + url: "https://github.com/gahaalt/continual-learning-with-hypernets" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + } + repositories: { + url: "https://github.com/g1910/HyperNetworks" + framework: FRAMEWORK_PYTORCH + number_of_stars: 113 + description: "PyTorch implementation of HyperNetworks (Ha et al., ICLR 2017) for ResNet (Residual Networks)" + } + repositories: { + url: "https://github.com/gahaalt/continual-learning-overview" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + } + methods: { + name: "HyperNetwork" + full_name: "HyperNetwork" + description: "A **HyperNetwork** is a network that generates a network for a main network. The behavior of the main network is the same with any usual neural network: it learns to map some raw inputs to their desired targets; whereas the hypernetwork takes a set of inputs that contain information about the structure of the weights and generates the weight for that layer." + } + } + video: { + video_id: "-tUQXSdEsMk" + video_title: "PR-043: HyperNetworks" + number_of_likes: 13 + number_of_views: 1681 + published_date: { + seconds: 1509287449 + } + uploader: "taesu" + } + } +} +pr_id_to_video: { + key: 44 + value: { + pr_id: 44 + papers: { + paper_id: "mobilenets-efficient-convolutional-neural" + title: "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" + arxiv_id: "1704.04861" + abstract: "We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization." + pub_date: { + seconds: 1492387200 + } + authors: "Andrew G. Howard" + authors: "Menglong Zhu" + authors: "Bo Chen" + authors: "Dmitry Kalenichenko" + authors: "Weijun Wang" + authors: "Tobias Weyand" + authors: "Marco Andreetto" + authors: "Hartwig Adam" + repositories: { + url: "https://github.com/prasadji/Flower-Classifaction-with-Fine-Tuned-Mobilenet" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + } + repositories: { + url: "https://github.com/osmr/imgclsmob" + framework: FRAMEWORK_OTHERS + number_of_stars: 2202 + description: "Sandbox for training deep learning networks" + } + repositories: { + url: "https://github.com/akrapukhin/MobileNetV3" + framework: FRAMEWORK_PYTORCH + description: "An implementation of the MobileNetV3 models in Pytorch with scripts for training, testing and measuring latency." + } + repositories: { + url: "https://github.com/rsreetech/MultiModalSearch" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "In this repository I demonstrate how you can perform multimodal(image+text) search to find similar images+texts given a test image+text from a multimodal (texts+images) database . I use the Kaggle Shopee dataset. I use Tensorflow MobileNet CNN and hugging face sentence transformers BERT to extract image and text embeddings to create a joint embedding search space. Given an image and it text description I extract joint embedding and then use nearest neighbours algorithm to find top 5 similar images+texts description from my joint embedding search space" + } + repositories: { + url: "https://github.com/Video-Streaming-Pipeline/Video-Streaming-Pipeline" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "실시간 이미지 처리 모델을 위한 모바일, 클라우드 영상 전송 파이프라인 개발" + } + repositories: { + url: "https://github.com/SalvadorAlbarran/TFG2020" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Aceleración de AI en dispositivos de bajo consumo" + } + repositories: { + url: "https://github.com/lpirola13/flower-recognizer" + framework: FRAMEWORK_TENSORFLOW + description: "This project aims to create a deep learning model suitable in a mobile context that can recognize flowers from images." + } + repositories: { + url: "https://github.com/Rishit-dagli/Greenathon-Plant-AI" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 23 + description: "Identify Diseases in Plants☘️ with Machine Learning on the web using TFJS" + } + repositories: { + url: "https://github.com/tensorflow/models/tree/master/research/slim" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 70341 + description: "Models and examples built with TensorFlow" + } + repositories: { + url: "https://github.com/jaykshirsagar05/CrowdCounting" + framework: FRAMEWORK_OTHERS + } + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Random Horizontal Flip" + full_name: "Random Horizontal Flip" + description: "**RandomHorizontalFlip** is a type of image data augmentation which horizontally flips a given image with a given probability.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "MobileNetV1" + full_name: "MobileNetV1" + description: "**MobileNet** is a type of convolutional neural network designed for mobile and embedded vision applications. They are based on a streamlined architecture that uses depthwise separable convolutions to build lightweight deep neural networks that can have low latency for mobile and embedded devices." + } + methods: { + name: "Depthwise Separable Convolution" + full_name: "Depthwise Separable Convolution" + description: "While [standard convolution](https://paperswithcode.com/method/convolution) performs the channelwise and spatial-wise computation in one step, **Depthwise Separable Convolution** splits the computation into two steps: depthwise convolution applies a single convolutional filter per each input channel and pointwise convolution is used to create a linear combination of the output of the depthwise convolution. The comparison of standard convolution and depthwise separable convolution is shown to the right.\r\n\r\nCredit: [Depthwise Convolution Is All You Need for Learning Multiple Visual Domains](https://paperswithcode.com/paper/depthwise-convolution-is-all-you-need-for)" + } + methods: { + name: "Depthwise Convolution" + full_name: "Depthwise Convolution" + description: "**Depthwise Convolution** is a type of convolution where we apply a single convolutional filter for each input channel. In the regular 2D [convolution](https://paperswithcode.com/method/convolution) performed over multiple input channels, the filter is as deep as the input and lets us freely mix channels to generate each element in the output. In contrast, depthwise convolutions keep each channel separate. To summarize the steps, we:\r\n\r\n1. Split the input and filter into channels.\r\n2. We convolve each input with the respective filter.\r\n3. We stack the convolved outputs together.\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" + } + methods: { + name: "Pointwise Convolution" + full_name: "Pointwise Convolution" + description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + } + video: { + video_id: "7UoOFKcyIvM" + video_title: "PR-044: MobileNet" + number_of_likes: 140 + number_of_views: 14777 + published_date: { + seconds: 1509456696 + } + uploader: "JinWon Lee" + } + } +} +pr_id_to_video: { + key: 45 + value: { + pr_id: 45 + papers: { + paper_id: "deeplab-semantic-image-segmentation-with-deep" + title: "DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs" + arxiv_id: "1606.00915" + abstract: "In this work we address the task of semantic image segmentation with Deep\nLearning and make three main contributions that are experimentally shown to\nhave substantial practical merit. First, we highlight convolution with\nupsampled filters, or 'atrous convolution', as a powerful tool in dense\nprediction tasks. Atrous convolution allows us to explicitly control the\nresolution at which feature responses are computed within Deep Convolutional\nNeural Networks. It also allows us to effectively enlarge the field of view of\nfilters to incorporate larger context without increasing the number of\nparameters or the amount of computation. Second, we propose atrous spatial\npyramid pooling (ASPP) to robustly segment objects at multiple scales. ASPP\nprobes an incoming convolutional feature layer with filters at multiple\nsampling rates and effective fields-of-views, thus capturing objects as well as\nimage context at multiple scales. Third, we improve the localization of object\nboundaries by combining methods from DCNNs and probabilistic graphical models.\nThe commonly deployed combination of max-pooling and downsampling in DCNNs\nachieves invariance but has a toll on localization accuracy. We overcome this\nby combining the responses at the final DCNN layer with a fully connected\nConditional Random Field (CRF), which is shown both qualitatively and\nquantitatively to improve localization performance. Our proposed \"DeepLab\"\nsystem sets the new state-of-art at the PASCAL VOC-2012 semantic image\nsegmentation task, reaching 79.7% mIOU in the test set, and advances the\nresults on three other datasets: PASCAL-Context, PASCAL-Person-Part, and\nCityscapes. All of our code is made publicly available online." + pub_date: { + seconds: 1464825600 + } + authors: "Liang-Chieh Chen" + authors: "George Papandreou" + authors: "Iasonas Kokkinos" + authors: "Kevin Murphy" + authors: "Alan L. Yuille" + repositories: { + url: "https://github.com/johnnylu305/Simple-does-it-weakly-supervised-instance-and-semantic-segmentation" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 43 + description: "Weakly Supervised Segmentation by Tensorflow. Implements semantic segmentation in Simple Does It: Weakly Supervised Instance and Semantic Segmentation, by Khoreva et al. (CVPR 2017)." + } + repositories: { + url: "https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1/configs/deeplabv3" + framework: FRAMEWORK_OTHERS + number_of_stars: 1482 + description: "End-to-end image segmentation kit based on PaddlePaddle. " + } + repositories: { + url: "https://github.com/tensorflow/models/tree/master/research/deeplab" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 70341 + description: "Models and examples built with TensorFlow" + } + repositories: { + url: "https://github.com/NASA-NeMO-Net/NeMO-Net" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + } + repositories: { + url: "https://github.com/CompVis/taming-transformers" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1442 + description: "Taming Transformers for High-Resolution Image Synthesis" + } + repositories: { + url: "https://github.com/leimao/DeepLab-V3" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 78 + description: "Google DeepLab V3 for Image Semantic Segmentation" + } + repositories: { + url: "https://github.com/kuangbixia/DeepLab" + framework: FRAMEWORK_PYTORCH + description: "Backup the source codes I learned and modified." + } + repositories: { + url: "https://github.com/Media-Smart/vedaseg" + framework: FRAMEWORK_PYTORCH + number_of_stars: 382 + description: "A semantic segmentation toolbox based on PyTorch" + } + repositories: { + url: "https://github.com/Popcorn-sugar/Deep_v2" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/Qengineering/TensorFlow_Lite_Segmentation_RPi_32-bit" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "TensorFlow Lite segmentation on Raspberry Pi 4 aka Unet at 4.2 FPS" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Residual Block" + full_name: "Residual Block" + description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "Max Pooling" + full_name: "Max Pooling" + description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" + } + methods: { + name: "Weight Decay" + full_name: "Weight Decay" + description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + methods: { + name: "Spatial Pyramid Pooling" + full_name: "Spatial Pyramid Pooling" + description: "** Spatial Pyramid Pooling (SPP)** is a pooling layer that removes the fixed-size constraint of the network, i.e. a CNN does not require a fixed-size input image. Specifically, we add an SPP layer on top of the last convolutional layer. The SPP layer pools the features and generates fixed-length outputs, which are then fed into the fully-connected layers (or other classifiers). In other words, we perform some information aggregation at a deeper stage of the network hierarchy (between convolutional layers and fully-connected layers) to avoid the need for cropping or warping at the beginning." + } + methods: { + name: "Kaiming Initialization" + full_name: "Kaiming Initialization" + description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." + } + methods: { + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + } + } + video: { + video_id: "JiC78rUF4iI" + video_title: "PR-045: DeepLab: Semantic Image Segmentation" + number_of_likes: 93 + number_of_views: 8715 + published_date: { + seconds: 1509896571 + } + uploader: "Taeoh Kim" + } + } +} +pr_id_to_video: { + key: 46 + value: { + pr_id: 46 + papers: { + paper_id: "deep-knowledge-tracing" + title: "Deep Knowledge Tracing" + arxiv_id: "1506.05908" + abstract: "Knowledge tracing---where a machine models the knowledge of a student as they\ninteract with coursework---is a well established problem in computer supported\neducation. Though effectively modeling student knowledge would have high\neducational impact, the task has many inherent challenges. In this paper we\nexplore the utility of using Recurrent Neural Networks (RNNs) to model student\nlearning. The RNN family of models have important advantages over previous\nmethods in that they do not require the explicit encoding of human domain\nknowledge, and can capture more complex representations of student knowledge.\nUsing neural networks results in substantial improvements in prediction\nperformance on a range of knowledge tracing datasets. Moreover the learned\nmodel can be used for intelligent curriculum design and allows straightforward\ninterpretation and discovery of structure in student tasks. These results\nsuggest a promising new line of research for knowledge tracing and an exemplary\napplication task for RNNs." + pub_date: { + seconds: 1434672000 + } + authors: "Chris Piech" + authors: "Jonathan Spencer" + authors: "Jonathan Huang" + authors: "Surya Ganguli" + authors: "Mehran Sahami" + authors: "Leonidas Guibas" + authors: "Jascha Sohl-Dickstein" + repositories: { + url: "https://github.com/YangZhouEdu/DKT_pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "Deep Knowledge Tracing by Pytorch" + } + repositories: { + url: "https://github.com/jdxyw/deepKT" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "A repo for knowledge tracing implementation by PyTorch" + } + repositories: { + is_official: true + url: "https://github.com/chrispiech/DeepKnowledgeTracing" + framework: FRAMEWORK_OTHERS + number_of_stars: 183 + description: "source code for the paper Deep Knowledge Tracing" + } + repositories: { + url: "https://github.com/jarviszhb/KnowledgeTracing" + framework: FRAMEWORK_PYTORCH + number_of_stars: 9 + description: "Some implementations of knowledge tracing with pytorch" + } + methods: { + name: "LINE" + full_name: "Large-scale Information Network Embedding" + description: "LINE is a novel network embedding method which is suitable for arbitrary types of information networks: undirected, directed, and/or weighted. The method optimizes a carefully designed objective function that preserves both the local and global network structures.\r\n\r\nSource: [Tang et al.](https://arxiv.org/pdf/1503.03578v1.pdf)\r\n\r\nImage source: [Tang et al.](https://arxiv.org/pdf/1503.03578v1.pdf)" + } + } + video: { + video_id: "8hdY6Jns5-k" + video_title: "PR-046: Deep Knowledge Tracing" + number_of_views: 2017 + published_date: { + seconds: 1509893052 + } + uploader: "Byung-Hak Kim" + } + } +} +pr_id_to_video: { + key: 47 + value: { + pr_id: 47 + papers: { + paper_id: "learning-deep-features-for-discriminative" + title: "Learning Deep Features for Discriminative Localization" + arxiv_id: "1512.04150" + abstract: "In this work, we revisit the global average pooling layer proposed in [13],\nand shed light on how it explicitly enables the convolutional neural network to\nhave remarkable localization ability despite being trained on image-level\nlabels. While this technique was previously proposed as a means for\nregularizing training, we find that it actually builds a generic localizable\ndeep representation that can be applied to a variety of tasks. Despite the\napparent simplicity of global average pooling, we are able to achieve 37.1%\ntop-5 error for object localization on ILSVRC 2014, which is remarkably close\nto the 34.2% top-5 error achieved by a fully supervised CNN approach. We\ndemonstrate that our network is able to localize the discriminative image\nregions on a variety of tasks despite not being trained for them" + pub_date: { + seconds: 1450051200 + } + authors: "Bolei Zhou" + authors: "Aditya Khosla" + authors: "Agata Lapedriza" + authors: "Aude Oliva" + authors: "Antonio Torralba" + repositories: { + url: "https://github.com/zhoubolei/CAM" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1499 + description: "Class Activation Mapping" + } + repositories: { + url: "https://github.com/FrancescoSaverioZuppichini/A-journey-into-Convolutional-Neural-Network-visualization-" + framework: FRAMEWORK_PYTORCH + number_of_stars: 185 + description: "A journey into Convolutional Neural Network visualization " + } + repositories: { + url: "https://github.com/frgfm/torch-cam" + framework: FRAMEWORK_PYTORCH + number_of_stars: 338 + description: "Class activation maps for your PyTorch models (CAM, Grad-CAM, Grad-CAM++, Smooth Grad-CAM++, Score-CAM, SS-CAM, IS-CAM, XGrad-CAM)" + } + repositories: { + url: "https://github.com/HRanWang/Spatial-Re-Scaling" + framework: FRAMEWORK_PYTORCH + number_of_stars: 129 + } + repositories: { + url: "https://github.com/vlue-c/PyTorch-Explanations" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/jsr66/Machine-Learning-Phases-of-Matter-with-Discriminative-Localization-" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/Seb-Good/deep_ecg" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 53 + description: "A library for classifying single-lead ECG waveforms as either Normal Sinus Rhythm, Atrial Fibrillation, or Other Rhythm." + } + repositories: { + url: "https://github.com/Tetsuya-Nishikawa/CAM" + framework: FRAMEWORK_TENSORFLOW + description: "CAM(class activation map)の実験(https://arxiv.org/pdf/1512.04150.pdf)" + } + repositories: { + url: "https://github.com/FelixFu520/CAM-Cifar10" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/metalbubble/CAM" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1499 + description: "Class Activation Mapping" + } + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + } + video: { + video_id: "-Z1NIzLxgRU" + video_title: "PR047: Learning Deep Features for Discriminative Localization" + number_of_likes: 31 + number_of_views: 2217 + published_date: { + seconds: 1510500873 + } + uploader: "이광희" + } + } +} +pr_id_to_video: { + key: 48 + value: { + pr_id: 48 + papers: { + paper_id: "towards-principled-methods-for-training" + title: "Towards Principled Methods for Training Generative Adversarial Networks" + arxiv_id: "1701.04862" + abstract: "The goal of this paper is not to introduce a single algorithm or method, but\nto make theoretical steps towards fully understanding the training dynamics of\ngenerative adversarial networks. In order to substantiate our theoretical\nanalysis, we perform targeted experiments to verify our assumptions, illustrate\nour claims, and quantify the phenomena. This paper is divided into three\nsections. The first section introduces the problem at hand. The second section\nis dedicated to studying and proving rigorously the problems including\ninstability and saturation that arize when training generative adversarial\nnetworks. The third section examines a practical and theoretically grounded\ndirection towards solving these problems, while introducing new tools to study\nthem." + pub_date: { + seconds: 1484611200 + } + authors: "Martin Arjovsky" + authors: "Léon Bottou" + repositories: { + url: "https://github.com/voqtuyen/GAN-Intuition" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } + } + video: { + video_id: "RlAgB0Ooxaw" + video_title: "PR-048: Towards Principled Methods for Training Generative Adversarial Networks" + number_of_likes: 19 + number_of_views: 1704 + published_date: { + seconds: 1510652207 + } + uploader: "Ji-Hoon Kim" + } + } +} +pr_id_to_video: { + key: 49 + value: { + pr_id: 49 + papers: { + paper_id: "attention-is-all-you-need" + title: "Attention Is All You Need" + arxiv_id: "1706.03762" + abstract: "The dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks in an encoder-decoder configuration. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transformer, based\nsolely on attention mechanisms, dispensing with recurrence and convolutions\nentirely. Experiments on two machine translation tasks show these models to be\nsuperior in quality while being more parallelizable and requiring significantly\nless time to train. Our model achieves 28.4 BLEU on the WMT 2014\nEnglish-to-German translation task, improving over the existing best results,\nincluding ensembles by over 2 BLEU. On the WMT 2014 English-to-French\ntranslation task, our model establishes a new single-model state-of-the-art\nBLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction\nof the training costs of the best models from the literature. We show that the\nTransformer generalizes well to other tasks by applying it successfully to\nEnglish constituency parsing both with large and limited training data." + pub_date: { + seconds: 1497225600 + } + authors: "Ashish Vaswani" + authors: "Noam Shazeer" + authors: "Niki Parmar" + authors: "Jakob Uszkoreit" + authors: "Llion Jones" + authors: "Aidan N. Gomez" + authors: "Lukasz Kaiser" + authors: "Illia Polosukhin" + repositories: { + url: "https://github.com/bangoc123/transformer" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 7 + description: "Build English-Vietnamese machine translation with ProtonX Transformer. :D" + } + repositories: { + url: "https://github.com/brainsqueeze/text2vec" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 6 + description: "Contextual embedding for text blobs." + } + repositories: { + url: "https://github.com/maroxtn/tun-sentiment" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: "My solution in Zindi Tunisian Sentiment Analysis competition. Ranked #1st." + } + repositories: { + url: "https://github.com/han-shi/SparseBERT" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + } + repositories: { + url: "https://github.com/rupakdas18/SuperGlue-tasks-using-BERT" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "In this project we have implemented 2 SuperGlue tasks (RTE and BOOLQ)." + } + repositories: { + url: "https://github.com/PaddlePaddle/PaddleNLP/tree/develop/examples/machine_translation/transformer" + framework: FRAMEWORK_OTHERS + number_of_stars: 1379 + description: "An NLP library with Awesome pre-trained Transformer models and easy-to-use interface, supporting wide-range of NLP tasks from research to industrial applications." + } + repositories: { + url: "https://github.com/mitran27/Attention-is-all-you-Need" + framework: FRAMEWORK_OTHERS + description: "building the Transformer (new world of NLP) completely from scratch" + } + repositories: { + url: "https://github.com/xmu-xiaoma666/External-Attention-pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 495 + description: "Pytorch implementation of various Attention Mechanism" + } + repositories: { + url: "https://github.com/stevinc/Transformer_Timeseries" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + description: "Pytorch code for Google's Temporal Fusion Transformer" + } + repositories: { + url: "https://github.com/xydaytoy/BMI-NMT" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "Layer Normalization" + full_name: "Layer Normalization" + description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Label Smoothing" + full_name: "Label Smoothing" + description: "**Label Smoothing** is a regularization technique that introduces noise for the labels. This accounts for the fact that datasets may have mistakes in them, so maximizing the likelihood of $\\log{p}\\left(y\\mid{x}\\right)$ directly can be harmful. Assume for a small constant $\\epsilon$, the training set label $y$ is correct with probability $1-\\epsilon$ and incorrect otherwise. Label Smoothing regularizes a model based on a softmax with $k$ output values by replacing the hard $0$ and $1$ classification targets with targets of $\\frac{\\epsilon}{k-1}$ and $1-\\epsilon$ respectively.\r\n\r\nSource: Deep Learning, Goodfellow et al\r\n\r\nImage Source: [When Does Label Smoothing Help?](https://arxiv.org/abs/1906.02629)" + } + methods: { + name: "Transformer" + full_name: "Transformer" + description: "A **Transformer** is a model architecture that eschews recurrence and instead relies entirely on an [attention mechanism](https://paperswithcode.com/methods/category/attention-mechanisms-1) to draw global dependencies between input and output. Before Transformers, the dominant sequence transduction models were based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The Transformer also employs an encoder and decoder, but removing recurrence in favor of [attention mechanisms](https://paperswithcode.com/methods/category/attention-mechanisms-1) allows for significantly more parallelization than methods like [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and [CNNs](https://paperswithcode.com/methods/category/convolutional-neural-networks)." + } + methods: { + name: "Scaled Dot-Product Attention" + full_name: "Scaled Dot-Product Attention" + description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "Multi-Head Attention" + full_name: "Multi-Head Attention" + description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" + } + methods: { + name: "Adam" + full_name: "Adam" + description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + } + video: { + video_id: "6zGgVIlStXs" + video_title: "PR-049: Attention is All You Need" + number_of_views: 7234 + published_date: { + seconds: 1512304902 + } + uploader: "Suh Kiho" + } + } +} +pr_id_to_video: { + key: 50 + value: { + pr_id: 50 + papers: { + paper_id: "convolutional-lstm-network-a-machine-learning" + title: "Convolutional LSTM Network: A Machine Learning Approach for Precipitation Nowcasting" + arxiv_id: "1506.04214" + abstract: "The goal of precipitation nowcasting is to predict the future rainfall\nintensity in a local region over a relatively short period of time. Very few\nprevious studies have examined this crucial and challenging weather forecasting\nproblem from the machine learning perspective. In this paper, we formulate\nprecipitation nowcasting as a spatiotemporal sequence forecasting problem in\nwhich both the input and the prediction target are spatiotemporal sequences. By\nextending the fully connected LSTM (FC-LSTM) to have convolutional structures\nin both the input-to-state and state-to-state transitions, we propose the\nconvolutional LSTM (ConvLSTM) and use it to build an end-to-end trainable model\nfor the precipitation nowcasting problem. Experiments show that our ConvLSTM\nnetwork captures spatiotemporal correlations better and consistently\noutperforms FC-LSTM and the state-of-the-art operational ROVER algorithm for\nprecipitation nowcasting." + pub_date: { + seconds: 1434153600 + } + authors: "Xingjian Shi" + authors: "Zhourong Chen" + authors: "Hao Wang" + authors: "Dit-Yan Yeung" + authors: "Wai-kin Wong" + authors: "Wang-chun Woo" + repositories: { + url: "https://github.com/georgeyiasemis/2D-Convolutional-Recurrent-Neural-Networks-with-PyTorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "2D Convolutional Recurrent Neural Networks implemented in PyTorch" + } + repositories: { + url: "https://github.com/czifan/ConvLSTM.pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 25 + } + repositories: { + url: "https://github.com/Tetsuya-Nishikawa/ConvLSTM_DEMO" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "自作ConvLSTMデモ" + } + repositories: { + url: "https://github.com/rogertrullo/pytorch_convlstm" + framework: FRAMEWORK_PYTORCH + number_of_stars: 126 + description: "convolutional lstm implementation in pytorch" + } + repositories: { + url: "https://github.com/trichtu/ConvLSTM-RAU-net" + framework: FRAMEWORK_PYTORCH + number_of_stars: 18 + description: "Spatial-temperal Prediction Model based on history observation and WRF numerical prediction " + } + repositories: { + url: "https://github.com/ndrplz/ConvLSTM_pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 831 + description: "Implementation of Convolutional LSTM in PyTorch." + } + repositories: { + url: "https://github.com/automan000/Convolution_LSTM_pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 675 + description: "Multi-layer convolutional LSTM with Pytorch" + } + methods: { + name: "Tanh Activation" + full_name: "Tanh Activation" + description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "LSTM" + full_name: "Long Short-Term Memory" + description: "An **LSTM** is a type of [recurrent neural network](https://paperswithcode.com/methods/category/recurrent-neural-networks) that addresses the vanishing gradient problem in vanilla RNNs through additional cells, input and output gates. Intuitively, vanishing gradients are solved through additional *additive* components, and forget gate activations, that allow the gradients to flow through the network without vanishing as quickly.\r\n\r\n(Image Source [here](https://medium.com/datadriveninvestor/how-do-lstm-networks-solve-the-problem-of-vanishing-gradients-a6784971a577))\r\n\r\n(Introduced by Hochreiter and Schmidhuber)" + } + methods: { + name: "Sigmoid Activation" + full_name: "Sigmoid Activation" + description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." + } + methods: { + name: "ConvLSTM" + full_name: "ConvLSTM" + description: "**ConvLSTM** is a type of recurrent neural network for spatio-temporal prediction that has convolutional structures in both the input-to-state and state-to-state transitions. The ConvLSTM determines the future state of a certain cell in the grid by the inputs and past states of its local neighbors. This can easily be achieved by using a convolution operator in the state-to-state and input-to-state transitions (see Figure). The key equations of ConvLSTM are shown below, where $∗$ denotes the convolution operator and $\\odot$ the Hadamard product:\r\n\r\n$$ i\\_{t} = \\sigma\\left(W\\_{xi} ∗ X\\_{t} + W\\_{hi} ∗ H\\_{t−1} + W\\_{ci} \\odot \\mathcal{C}\\_{t−1} + b\\_{i}\\right) $$\r\n\r\n$$ f\\_{t} = \\sigma\\left(W\\_{xf} ∗ X\\_{t} + W\\_{hf} ∗ H\\_{t−1} + W\\_{cf} \\odot \\mathcal{C}\\_{t−1} + b\\_{f}\\right) $$\r\n\r\n$$ \\mathcal{C}\\_{t} = f\\_{t} \\odot \\mathcal{C}\\_{t−1} + i\\_{t} \\odot \\text{tanh}\\left(W\\_{xc} ∗ X\\_{t} + W\\_{hc} ∗ \\mathcal{H}\\_{t−1} + b\\_{c}\\right) $$\r\n\r\n$$ o\\_{t} = \\sigma\\left(W\\_{xo} ∗ X\\_{t} + W\\_{ho} ∗ \\mathcal{H}\\_{t−1} + W\\_{co} \\odot \\mathcal{C}\\_{t} + b\\_{o}\\right) $$\r\n\r\n$$ \\mathcal{H}\\_{t} = o\\_{t} \\odot \\text{tanh}\\left(C\\_{t}\\right) $$\r\n\r\nIf we view the states as the hidden representations of moving objects, a ConvLSTM with a larger transitional kernel should be able to capture faster motions while one with a smaller kernel can capture slower motions. \r\n\r\nTo ensure that the states have the same number of rows and same number of columns as the inputs, padding is needed before applying the convolution operation. Here, padding of the hidden states on the boundary points can be viewed as using the state of the outside world for calculation. Usually, before the first input comes, we initialize all the states of the LSTM to zero which corresponds to \"total ignorance\" of the future." + } + } + video: { + video_id: "3cFfCM4CXws" + video_title: "PR-050: Convolutional LSTM Network: A Machine Learning Approach for Precipitation Nowcasting" + number_of_likes: 38 + number_of_views: 6949 + published_date: { + seconds: 1511707163 + } + uploader: "Taegyun Jeon" + } + } +} +pr_id_to_video: { + key: 51 + value: { + pr_id: 51 + papers: { + paper_id: "conditional-generative-adversarial-nets" + title: "Conditional Generative Adversarial Nets" + arxiv_id: "1411.1784" + abstract: "Generative Adversarial Nets [8] were recently introduced as a novel way to train generative models. In this work we introduce the conditional version of generative adversarial nets, which can be constructed by simply feeding the data, y, we wish to condition on to both the generator and discriminator. We show that this model can generate MNIST digits conditioned on class labels. We also illustrate how this model could be used to learn a multi-modal model, and provide preliminary examples of an application to image tagging in which we demonstrate how this approach can generate descriptive tags which are not part of training labels." + pub_date: { + seconds: 1415232000 + } + authors: "Mehdi Mirza" + authors: "Simon Osindero" + repositories: { + url: "https://github.com/asiltureli/gan-in-colab" + framework: FRAMEWORK_PYTORCH + description: "GAN implementations on Google Colab" + } + repositories: { + url: "https://github.com/AshishSingh2261/GAN" + framework: FRAMEWORK_OTHERS + description: "Contains code for different types of GANs trained on different datasets." + } + repositories: { + url: "https://github.com/YigitGunduc/Conditional-GANs-CGANs" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + description: "Conditional Generative Adversarial Networks(cgans) to convert text to image implemented in Python and TensorFlow & Keras" + } + repositories: { + url: "https://github.com/kynk94/TF2-Image-Generation" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 9 + description: "Tensorflow2 reimplementation of image generation model (GAN, Style Transfer, Image to Image Translation, etc)" + } + repositories: { + url: "https://github.com/otepencelik/GAN-Artwork-Generation" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + } + repositories: { + url: "https://github.com/MCLYang/RhythmGAN_pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "The pytorch implementation for RhythmGAN" + } + repositories: { + url: "https://github.com/Lornatang/CGAN-PyTorch" + framework: FRAMEWORK_PYTORCH + description: "Simple implementation of conditional general adverse nets in pytorch machine learning framework" + } + repositories: { + url: "https://github.com/jamesloyys/PyTorch-Lightning-GAN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 10 + description: "Implementations of various GAN architectures using PyTorch Lightning" + } + repositories: { + url: "https://github.com/gordicaleksa/pytorch-gans" + framework: FRAMEWORK_PYTORCH + number_of_stars: 288 + description: "My implementation of various GAN (generative adversarial networks) architectures like vanilla GAN (Goodfellow et al.), cGAN (Mirza et al.), DCGAN (Radford et al.), etc." + } + repositories: { + url: "https://github.com/alles9fresser/Conditional-GAN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 7 + } + } + video: { + video_id: "iCgT8G4PkqI" + video_title: "PR-051: Conditional Generative Adversarial Nets" + number_of_likes: 24 + number_of_views: 3429 + published_date: { + seconds: 1512310569 + } + uploader: "Seungil Kim" + } + } +} +pr_id_to_video: { + key: 52 + value: { + pr_id: 52 + papers: { + paper_id: "multiplayer-alphazero" + title: "Multiplayer AlphaZero" + arxiv_id: "1910.13012" + abstract: "The AlphaZero algorithm has achieved superhuman performance in two-player, deterministic, zero-sum games where perfect information of the game state is available. This success has been demonstrated in Chess, Shogi, and Go where learning occurs solely through self-play. Many real-world applications (e.g., equity trading) require the consideration of a multiplayer environment. In this work, we suggest novel modifications of the AlphaZero algorithm to support multiplayer environments, and evaluate the approach in two simple 3-player games. Our experiments show that multiplayer AlphaZero learns successfully and consistently outperforms a competing approach: Monte Carlo tree search. These results suggest that our modified AlphaZero can learn effective strategies in multiplayer game scenarios. Our work supports the use of AlphaZero in multiplayer games and suggests future research for more complex environments." + pub_date: { + seconds: 1572307200 + } + authors: "Nick Petosa" + authors: "Tucker Balch" + repositories: { + is_official: true + url: "https://github.com/petosa/multiplayer-alphazero" + framework: FRAMEWORK_PYTORCH + number_of_stars: 18 + description: "PyTorch AlphaZero implementation with multiplayer support [NeurIPS 2019 Deep Reinforcement Learning Workshop]" + } + methods: { + name: "AlphaZero" + full_name: "AlphaZero" + description: "**AlphaZero** is a reinforcement learning agent for playing board games such as Go, chess, and shogi. " + } + } + video: {} + } +} +pr_id_to_video: { + key: 53 + value: { + pr_id: 53 + papers: { + paper_id: "grad-cam-visual-explanations-from-deep" + title: "Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization" + arxiv_id: "1610.02391" + abstract: "We propose a technique for producing \"visual explanations\" for decisions from a large class of CNN-based models, making them more transparent. Our approach - Gradient-weighted Class Activation Mapping (Grad-CAM), uses the gradients of any target concept, flowing into the final convolutional layer to produce a coarse localization map highlighting important regions in the image for predicting the concept. Grad-CAM is applicable to a wide variety of CNN model-families: (1) CNNs with fully-connected layers, (2) CNNs used for structured outputs, (3) CNNs used in tasks with multimodal inputs or reinforcement learning, without any architectural changes or re-training. We combine Grad-CAM with fine-grained visualizations to create a high-resolution class-discriminative visualization and apply it to off-the-shelf image classification, captioning, and visual question answering (VQA) models, including ResNet-based architectures. In the context of image classification models, our visualizations (a) lend insights into their failure modes, (b) are robust to adversarial images, (c) outperform previous methods on localization, (d) are more faithful to the underlying model and (e) help achieve generalization by identifying dataset bias. For captioning and VQA, we show that even non-attention based models can localize inputs. We devise a way to identify important neurons through Grad-CAM and combine it with neuron names to provide textual explanations for model decisions. Finally, we design and conduct human studies to measure if Grad-CAM helps users establish appropriate trust in predictions from models and show that Grad-CAM helps untrained users successfully discern a 'stronger' nodel from a 'weaker' one even when both make identical predictions. Our code is available at https://github.com/ramprs/grad-cam/, along with a demo at http://gradcam.cloudcv.org, and a video at youtu.be/COjUB9Izk6E." + pub_date: { + seconds: 1475798400 + } + authors: "Ramprasaath R. Selvaraju" + authors: "Michael Cogswell" + authors: "Abhishek Das" + authors: "Ramakrishna Vedantam" + authors: "Devi Parikh" + authors: "Dhruv Batra" + repositories: { + url: "https://github.com/CMU-CREATE-Lab/deep-smoke-machine" + framework: FRAMEWORK_PYTORCH + number_of_stars: 62 + description: "Deep learning models and dataset for recognizing industrial smoke emissions" + } + repositories: { + url: "https://github.com/novice03/timm-vis" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: "Visualizer for PyTorch image models" + } + repositories: { + url: "https://github.com/sauravmishra1710/EXPLAINABLE-AI---Skin-Cancer-Detection-explained-with-GRADCAM" + framework: FRAMEWORK_TENSORFLOW + description: "Diagnose the presence of skin cancer in a person using CNN and as well explain what led the CNN to arrive at the decision. Visual explanations are made utilizing the Gradient-weighted Class Activation Mapping (Grad-CAM), the gradients flowing into the final convolutional layer to produce a coarse localization map highlighting the important regions in the image for considered for arriving at the decision. The original paper for GRADCAM can be found @ https://arxiv.org/abs/1610.02391" + } + repositories: { + url: "https://github.com/xn1997/pytorch-grad-cam" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "特征图可视化(个人修改版)" + } + repositories: { + url: "https://github.com/priyavrat-misra/xrays-and-gradcam" + framework: FRAMEWORK_PYTORCH + number_of_stars: 14 + description: "Classification and Gradient-based Localization of Chest Radiographs using PyTorch." + } + repositories: { + url: "https://github.com/jordan-bird/synthetic-fruit-image-generator" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + description: "Use a CGAN to generate synthetic images of healthy and unhealthy lemons" + } + repositories: { + url: "https://github.com/FrancescoSaverioZuppichini/A-journey-into-Convolutional-Neural-Network-visualization-" + framework: FRAMEWORK_PYTORCH + number_of_stars: 185 + description: "A journey into Convolutional Neural Network visualization " + } + repositories: { + url: "https://github.com/samson6460/tf_keras_gradcamplusplus" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 10 + description: "tensorflow.keras implementation of gradcam and gradcam++" + } + repositories: { + url: "https://github.com/dtanoglidis/DeepShadows" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 10 + description: "Repository for the project \"DeepShadows: Separating LSBGs from artifacts using Deep Learning\"" + } + repositories: { + url: "https://github.com/ayulockin/interpretabilitycnn" + framework: FRAMEWORK_OTHERS + number_of_stars: 7 + description: "Custom Keras Callbacks for Feature Visualization, Class Activation Map, Grad-CAM" + } + } + video: { + video_id: "faGsrPX1yFM" + video_title: "PR-053: Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization" + number_of_likes: 34 + number_of_views: 6464 + published_date: { + seconds: 1512915707 + } + uploader: "taesu" + } + } +} +pr_id_to_video: { + key: 54 + value: { + pr_id: 54 + papers: { + paper_id: "shufflenet-an-extremely-efficient" + title: "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" + arxiv_id: "1707.01083" + abstract: "We introduce an extremely computation-efficient CNN architecture named\nShuffleNet, which is designed specially for mobile devices with very limited\ncomputing power (e.g., 10-150 MFLOPs). The new architecture utilizes two new\noperations, pointwise group convolution and channel shuffle, to greatly reduce\ncomputation cost while maintaining accuracy. Experiments on ImageNet\nclassification and MS COCO object detection demonstrate the superior\nperformance of ShuffleNet over other structures, e.g. lower top-1 error\n(absolute 7.8%) than recent MobileNet on ImageNet classification task, under\nthe computation budget of 40 MFLOPs. On an ARM-based mobile device, ShuffleNet\nachieves ~13x actual speedup over AlexNet while maintaining comparable\naccuracy." + pub_date: { + seconds: 1499126400 + } + authors: "Xiangyu Zhang" + authors: "Xinyu Zhou" + authors: "Mengxiao Lin" + authors: "Jian Sun" + repositories: { + url: "https://github.com/tensorpack/tensorpack/tree/master/examples/ImageNetModels" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 6036 + description: "A Neural Net Training Interface on TensorFlow, with focus on speed + flexibility" + } + repositories: { + url: "https://github.com/afzalahmad0203/Tensorflow-Shufflenet" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Shufflenet implementation in tensorflow based on https://arxiv.org/abs/1707.01083" + } + repositories: { + url: "https://github.com/osmr/imgclsmob" + framework: FRAMEWORK_OTHERS + number_of_stars: 2202 + description: "Sandbox for training deep learning networks" + } + repositories: { + url: "https://github.com/eogussla12/Shufflenet_CIFAR10_Pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Pytorch-Shufflenet-CIFAR10" + } + repositories: { + url: "https://github.com/MrRen-sdhm/Embedded_Multi_Object_Detection_CNN" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 6 + description: "Multi-object detection by lightweight CNN on embedded system" + } + repositories: { + url: "https://github.com/alalagong/LEDNet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "Small changed LEDNet" + } + repositories: { + url: "https://github.com/clavichord93/MENet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 11 + description: "This repo contains code for *Merging and Evolution: Improving Convolutional Neural Networks for Mobile Applications*." + } + repositories: { + url: "https://github.com/europa1610/Tensorflow-Shufflenet" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Shufflenet implementation in tensorflow based on https://arxiv.org/abs/1707.01083" + } + repositories: { + url: "https://github.com/afzalahmad0203/Numpy-Shufflenet" + framework: FRAMEWORK_OTHERS + description: "Numpy implementation of shufflenet based on https://arxiv.org/abs/1707.01083" + } + repositories: { + url: "https://github.com/minhto2802/keras-shufflenet" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + } + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + } + methods: { + name: "RPN" + full_name: "Region Proposal Network" + description: "A **Region Proposal Network**, or **RPN**, is a fully convolutional network that simultaneously predicts object bounds and objectness scores at each position. The RPN is trained end-to-end to generate high-quality region proposals. RPN and algorithms like Fast R-CNN can be merged into a single network by sharing their convolutional features - using the recently popular terminology of neural networks with attention mechanisms, the RPN component tells the unified network where to look.\r\n\r\nRPNs are designed to efficiently predict region proposals with a wide range of scales and aspect ratios. RPNs use anchor boxes that serve as references at multiple scales and aspect ratios. The scheme can be thought of as a pyramid of regression references, which avoids enumerating images or filters of multiple scales or aspect ratios." + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "AlexNet" + full_name: "AlexNet" + description: "**AlexNet** is a classic convolutional neural network architecture. It consists of convolutions, max pooling and dense layers as the basic building blocks. Grouped convolutions are used in order to fit the model across two GPUs." + } + methods: { + name: "Random Horizontal Flip" + full_name: "Random Horizontal Flip" + description: "**RandomHorizontalFlip** is a type of image data augmentation which horizontally flips a given image with a given probability.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "RoIPool" + full_name: "RoIPool" + description: "**Region of Interest Pooling**, or **RoIPool**, is an operation for extracting a small feature map (e.g., $7×7$) from each RoI in detection and segmentation based tasks. Features are extracted from each candidate box, and thereafter in models like Fast R-CNN, are then classified and bounding box regression performed.\r\n\r\nThe actual scaling to, e.g., $7×7$, occurs by dividing the region proposal into equally sized sections, finding the largest value in each section, and then copying these max values to the output buffer. In essence, **RoIPool** is max pooling on a discrete grid based on a box.\r\n\r\nImage Source: [Joyce Xu](https://towardsdatascience.com/deep-learning-for-object-detection-a-comprehensive-review-73930816d8d9)" + } + methods: { + name: "Channel Shuffle" + full_name: "Channel Shuffle" + description: "**Channel Shuffle** is an operation to help information flow across feature channels in convolutional neural networks. It was used as part of the [ShuffleNet](https://paperswithcode.com/method/shufflenet) architecture. \r\n\r\nIf we allow a group convolution to obtain input data from different groups, the input and output channels will be fully related. Specifically, for the feature map generated from the previous group layer, we can first divide the channels in each group into several subgroups, then feed each group in the next layer with different subgroups. \r\n\r\nThe above can be efficiently and elegantly implemented by a channel shuffle operation: suppose a convolutional layer with $g$ groups whose output has $g \\times n$ channels; we first reshape the output channel dimension into $\\left(g, n\\right)$, transposing and then flattening it back as the input of next layer. Channel shuffle is also differentiable, which means it can be embedded into network structures for end-to-end training." + } + methods: { + name: "Faster R-CNN" + full_name: "Faster R-CNN" + description: "**Faster R-CNN** is an object detection model that improves on [Fast R-CNN](https://paperswithcode.com/method/fast-r-cnn) by utilising a region proposal network ([RPN](https://paperswithcode.com/method/rpn)) with the CNN model. The RPN shares full-image convolutional features with the detection network, enabling nearly cost-free region proposals. It is a fully convolutional network that simultaneously predicts object bounds and objectness scores at each position. The RPN is trained end-to-end to generate high-quality region proposals, which are used by [Fast R-CNN](https://paperswithcode.com/method/fast-r-cnn) for detection. RPN and Fast R-CNN are merged into a single network by sharing their convolutional features: the RPN component tells the unified network where to look.\r\n\r\nAs a whole, Faster R-CNN consists of two modules. The first module is a deep fully convolutional network that proposes regions, and the second module is the Fast R-CNN detector that uses the proposed regions." + } + methods: { + name: "Depthwise Convolution" + full_name: "Depthwise Convolution" + description: "**Depthwise Convolution** is a type of convolution where we apply a single convolutional filter for each input channel. In the regular 2D [convolution](https://paperswithcode.com/method/convolution) performed over multiple input channels, the filter is as deep as the input and lets us freely mix channels to generate each element in the output. In contrast, depthwise convolutions keep each channel separate. To summarize the steps, we:\r\n\r\n1. Split the input and filter into channels.\r\n2. We convolve each input with the respective filter.\r\n3. We stack the convolved outputs together.\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" + } + } + video: { + video_id: "pNuBdj53Hbc" + video_title: "PR-054: ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" + number_of_likes: 52 + number_of_views: 6174 + published_date: { + seconds: 1513005030 + } + uploader: "JinWon Lee" + } + } +} +pr_id_to_video: { + key: 55 + value: { + pr_id: 55 + papers: { + paper_id: "neural-machine-translation-by-jointly" + title: "Neural Machine Translation by Jointly Learning to Align and Translate" + arxiv_id: "1409.0473" + abstract: "Neural machine translation is a recently proposed approach to machine\ntranslation. Unlike the traditional statistical machine translation, the neural\nmachine translation aims at building a single neural network that can be\njointly tuned to maximize the translation performance. The models proposed\nrecently for neural machine translation often belong to a family of\nencoder-decoders and consists of an encoder that encodes a source sentence into\na fixed-length vector from which a decoder generates a translation. In this\npaper, we conjecture that the use of a fixed-length vector is a bottleneck in\nimproving the performance of this basic encoder-decoder architecture, and\npropose to extend this by allowing a model to automatically (soft-)search for\nparts of a source sentence that are relevant to predicting a target word,\nwithout having to form these parts as a hard segment explicitly. With this new\napproach, we achieve a translation performance comparable to the existing\nstate-of-the-art phrase-based system on the task of English-to-French\ntranslation. Furthermore, qualitative analysis reveals that the\n(soft-)alignments found by the model agree well with our intuition." + pub_date: { + seconds: 1409529600 + } + authors: "Dzmitry Bahdanau" + authors: "Kyunghyun Cho" + authors: "Yoshua Bengio" + repositories: { + url: "https://github.com/dl4nlp-tuda2021/deep-learning-for-nlp-lectures" + framework: FRAMEWORK_PYTORCH + number_of_stars: 72 + description: "Deep Learning for Natural Language Processing - Lectures 2021" + } + repositories: { + url: "https://github.com/prakhargurawa/Neural-Machine-Translation-Keras-Attention" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Machine translation using LSTM Model. Created two translation models with/without attention mechanisms for translation between French-English and German-English." + } + repositories: { + url: "https://github.com/AMNAALMGLY/NLP" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/prakhargurawa/Neural-Machine-Translation-Keras-German-English" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Machine translation using LSTM Model. Created two translation models with/without attention mechanisms for translation between French-English and German-English." + } + repositories: { + url: "https://github.com/millenialSpirou/ift6010" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/yinghao1019/NLP_and_DL_practice" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "This repository is used for NLP Model practice and learning" + } + repositories: { + url: "https://github.com/tree-park/kor-to-eng-translation" + framework: FRAMEWORK_PYTORCH + description: "Translator by transforemer and seq2seq (with attention mechanism) - Pytorch" + } + repositories: { + url: "https://github.com/hiun/learning-transformers" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Transformers Tutorials with Open Source Implementations" + } + repositories: { + url: "https://github.com/chenqianqianxiaoxiannv/seq2seq" + framework: FRAMEWORK_TENSORFLOW + description: "seq2seq" + } + repositories: { + url: "https://github.com/xhlulu/arxiv-assistant" + framework: FRAMEWORK_OTHERS + description: "A simple webapp for helping you navigate Arxiv.org" + } + methods: { + name: "Tanh Activation" + full_name: "Tanh Activation" + description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" + } + methods: { + name: "Additive Attention" + full_name: "Additive Attention" + description: "**Additive Attention**, also known as **Bahdanau Attention**, uses a one-hidden layer feed-forward network to calculate the attention alignment score:\r\n\r\n$$f_{att}\\left(\\textbf{h}_{i}, \\textbf{s}\\_{j}\\right) = v\\_{a}^{T}\\tanh\\left(\\textbf{W}\\_{a}\\left[\\textbf{h}\\_{i};\\textbf{s}\\_{j}\\right]\\right)$$\r\n\r\nwhere $\\textbf{v}\\_{a}$ and $\\textbf{W}\\_{a}$ are learned attention parameters. Here $\\textbf{h}$ refers to the hidden states for the encoder, and $\\textbf{s}$ is the hidden states for the decoder. The function above is thus a type of alignment score function. We can use a matrix of alignment scores to show the correlation between source and target words, as the Figure to the right shows.\r\n\r\nWithin a neural network, once we have the alignment scores, we calculate the final scores using a softmax function of these alignment scores (ensuring it sums to 1)." + } + } + video: { + video_id: "upskBSbA9cA" + video_title: "PR-055: Neural Machine Translation by Jointly Learning to Align and Translate" + number_of_likes: 27 + number_of_views: 2832 + published_date: { + seconds: 1513516897 + } + uploader: "Jiyang Kang" + } + } +} +pr_id_to_video: { + key: 56 + value: { + pr_id: 56 + papers: { + paper_id: "dynamic-routing-between-capsules" + title: "Dynamic Routing Between Capsules" + arxiv_id: "1710.09829" + abstract: "A capsule is a group of neurons whose activity vector represents the\ninstantiation parameters of a specific type of entity such as an object or an\nobject part. We use the length of the activity vector to represent the\nprobability that the entity exists and its orientation to represent the\ninstantiation parameters. Active capsules at one level make predictions, via\ntransformation matrices, for the instantiation parameters of higher-level\ncapsules. When multiple predictions agree, a higher level capsule becomes\nactive. We show that a discrimininatively trained, multi-layer capsule system\nachieves state-of-the-art performance on MNIST and is considerably better than\na convolutional net at recognizing highly overlapping digits. To achieve these\nresults we use an iterative routing-by-agreement mechanism: A lower-level\ncapsule prefers to send its output to higher level capsules whose activity\nvectors have a big scalar product with the prediction coming from the\nlower-level capsule." + pub_date: { + seconds: 1508976000 + } + authors: "Sara Sabour" + authors: "Nicholas Frosst" + authors: "Geoffrey E Hinton" + repositories: { + url: "https://github.com/Egesabanci/capsuleNetworks" + framework: FRAMEWORK_TENSORFLOW + description: ":pill: CapsNets implementation according to the paper: Dynamic Routing Between Capsules - Sara Sabour, Nicholas Frosst, Geoffrey E Hinton" + } + repositories: { + url: "https://github.com/ecstayalive/Degenerate-capsule-neural-network" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "退化胶囊神经网络是通过改变极少的胶囊神经网络结构并将其应用与一些对位置要求不敏感的一些识别场合,但是保留了其快速泛化的特性" + } + repositories: { + url: "https://github.com/razvanalex/CapsLayer" + framework: FRAMEWORK_TENSORFLOW + description: "CapsLayer: An advanced library for capsule theory" + } + repositories: { + url: "https://github.com/naturomics/CapsLayer" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 352 + description: "CapsLayer: An advanced library for capsule theory" + } + repositories: { + url: "https://github.com/lab-ml/nn/tree/master/labml_nn/capsule_networks" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3069 + description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." + } + repositories: { + url: "https://github.com/jelifysh/Capsule-Networks" + framework: FRAMEWORK_PYTORCH + number_of_stars: 12 + description: "Pytorch Implementation of Capsule Networks" + } + repositories: { + url: "https://github.com/EscVM/Efficient-CapsNet" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 126 + description: "Official TensorFlow code for the forthcoming paper \"Efficient-CapsNet: Capsule Network with Self-Attention Routing\"." + } + repositories: { + url: "https://github.com/OwenLeng/pytorch-capsule" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/noureldinalaa/Capsule-Networks" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: "Simply explain and implement Capsule Networks on MNIST dataset using Pytorch." + } + repositories: { + url: "https://github.com/dedhiaparth98/capsule-network" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 8 + description: "A TensorFlow implementation of Capsule Network as described in the paper Dynamic Routing Between Capsules" + } + } + video: { + video_id: "_YT_8CT2w_Q" + video_title: "PR-056: Capsule Network" + number_of_likes: 67 + number_of_views: 5738 + published_date: { + seconds: 1513522378 + } + uploader: "Jaejun Yoo" + } + } +} +pr_id_to_video: { + key: 57 + value: { + pr_id: 57 + papers: { + paper_id: "mask-r-cnn" + title: "Mask R-CNN" + arxiv_id: "1703.06870" + abstract: "We present a conceptually simple, flexible, and general framework for object\ninstance segmentation. Our approach efficiently detects objects in an image\nwhile simultaneously generating a high-quality segmentation mask for each\ninstance. The method, called Mask R-CNN, extends Faster R-CNN by adding a\nbranch for predicting an object mask in parallel with the existing branch for\nbounding box recognition. Mask R-CNN is simple to train and adds only a small\noverhead to Faster R-CNN, running at 5 fps. Moreover, Mask R-CNN is easy to\ngeneralize to other tasks, e.g., allowing us to estimate human poses in the\nsame framework. We show top results in all three tracks of the COCO suite of\nchallenges, including instance segmentation, bounding-box object detection, and\nperson keypoint detection. Without bells and whistles, Mask R-CNN outperforms\nall existing, single-model entries on every task, including the COCO 2016\nchallenge winners. We hope our simple and effective approach will serve as a\nsolid baseline and help ease future research in instance-level recognition.\nCode has been made available at: https://github.com/facebookresearch/Detectron" + pub_date: { + seconds: 1489968000 + } + authors: "Kaiming He" + authors: "Georgia Gkioxari" + authors: "Piotr Dollár" + authors: "Ross Girshick" + repositories: { + url: "https://github.com/SonginCV/GMPHD_MAF" + framework: FRAMEWORK_OTHERS + number_of_stars: 10 + description: "The official implementation of the GMPHD_MAF Tracker" + } + repositories: { + url: "https://github.com/miaohua1982/simple_fasterrcnn_pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/tuguldurs/vivus" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "venous intravascular ultrasound image processing" + } + repositories: { + url: "https://github.com/SonginCV/GMPHD_SAF" + framework: FRAMEWORK_OTHERS + number_of_stars: 10 + description: "The official implementation of the GMPHD_MAF Tracker" + } + repositories: { + url: "https://github.com/alexalm4190/Mask_RCNN-Vizzy_Hand" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } + repositories: { + url: "https://github.com/deolipankaj/Stone_Detection_MRCNN" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + description: "Stone detection in an off-road environment with Mask R-CNN" + } + repositories: { + url: "https://github.com/EmGarr/kerod" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 34 + description: "DETR - Faster RCNN implementation in tensorflow 2" + } + repositories: { + url: "https://github.com/polospeter/TensorFlow-Advanced-Techniques-Specialization" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5 + } + repositories: { + is_official: true + url: "https://github.com/facebookresearch/detectron2" + framework: FRAMEWORK_PYTORCH + number_of_stars: 16954 + description: "Detectron2 is FAIR's next-generation platform for object detection, segmentation and other visual recognition tasks." + } + repositories: { + url: "https://github.com/raymon-tian/hourglass-facekeypoints-detection" + framework: FRAMEWORK_PYTORCH + number_of_stars: 214 + description: "face keypoints deteciton based on stackedhourglass" + } + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "Bottleneck Residual Block" + full_name: "Bottleneck Residual Block" + description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." + } + methods: { + name: "RoIAlign" + full_name: "RoIAlign" + description: "**Region of Interest Align**, or **RoIAlign**, is an operation for extracting a small feature map from each RoI in detection and segmentation based tasks. It removes the harsh quantization of [RoI Pool](https://paperswithcode.com/method/roi-pooling), properly *aligning* the extracted features with the input. To avoid any quantization of the RoI boundaries or bins (using $x/16$ instead of $[x/16]$), RoIAlign uses bilinear interpolation to compute the exact values of the input features at four regularly sampled locations in each RoI bin, and the result is then aggregated (using max or average)." + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + } + methods: { + name: "Mask R-CNN" + full_name: "Mask R-CNN" + description: "**Mask R-CNN** extends [Faster R-CNN](http://paperswithcode.com/method/faster-r-cnn) to solve instance segmentation tasks. It achieves this by adding a branch for predicting an object mask in parallel with the existing branch for bounding box recognition. In principle, Mask R-CNN is an intuitive extension of Faster R-CNN, but constructing the mask branch properly is critical for good results. \r\n\r\nMost importantly, Faster R-CNN was not designed for pixel-to-pixel alignment between network inputs and outputs. This is evident in how [RoIPool](http://paperswithcode.com/method/roi-pooling), the *de facto* core operation for attending to instances, performs coarse spatial quantization for feature extraction. To fix the misalignment, Mask R-CNN utilises a simple, quantization-free layer, called [RoIAlign](http://paperswithcode.com/method/roi-align), that faithfully preserves exact spatial locations. \r\n\r\nSecondly, Mask R-CNN *decouples* mask and class prediction: it predicts a binary mask for each class independently, without competition among classes, and relies on the network's RoI classification branch to predict the category. In contrast, an [FCN](http://paperswithcode.com/method/fcn) usually perform per-pixel multi-class categorization, which couples segmentation and classification." + } + methods: { + name: "ResNet" + full_name: "Residual Network" + description: "**Residual Networks**, or **ResNets**, learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. Instead of hoping each few stacked layers directly fit a desired underlying mapping, residual nets let these layers fit a residual mapping. They stack [residual blocks](https://paperswithcode.com/method/residual-block) ontop of each other to form network: e.g. a ResNet-50 has fifty layers using these blocks. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}(x)$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}(x):=\\mathcal{H}(x)-x$. The original mapping is recast into $\\mathcal{F}(x)+x$.\r\n\r\nThere is empirical evidence that these types of network are easier to optimize, and can gain accuracy from considerably increased depth." + } + methods: { + name: "ResNeXt Block" + full_name: "ResNeXt Block" + description: "A **ResNeXt Block** is a type of residual block used as part of the [ResNeXt](https://paperswithcode.com/method/resnext) CNN architecture. It uses a \"split-transform-merge\" strategy (branched paths within a single module) similar to an [Inception module](https://paperswithcode.com/method/inception-module), i.e. it aggregates a set of transformations. Compared to a Residual Block, it exposes a new dimension, *cardinality* (size of set of transformations) $C$, as an essential factor in addition to depth and width. \r\n\r\nFormally, a set of aggregated transformations can be represented as: $\\mathcal{F}(x)=\\sum_{i=1}^{C}\\mathcal{T}_i(x)$, where $\\mathcal{T}_i(x)$ can be an arbitrary function. Analogous to a simple neuron, $\\mathcal{T}_i$ should project $x$ into an (optionally low-dimensional) embedding and then transform it." + } + } + video: { + video_id: "RtSZALC9DlU" + video_title: "PR-057: Mask R-CNN" + number_of_likes: 133 + number_of_views: 10992 + published_date: { + seconds: 1515330928 + } + uploader: "Taeoh Kim" + } + } +} +pr_id_to_video: { + key: 58 + value: { + pr_id: 58 + papers: { + paper_id: "the-consciousness-prior" + title: "The Consciousness Prior" + arxiv_id: "1709.08568" + abstract: "A new prior is proposed for learning representations of high-level concepts of the kind we manipulate with language. This prior can be combined with other priors in order to help disentangling abstract factors from each other. It is inspired by cognitive neuroscience theories of consciousness, seen as a bottleneck through which just a few elements, after having been selected by attention from a broader pool, are then broadcast and condition further processing, both in perception and decision-making. The set of recently selected elements one becomes aware of is seen as forming a low-dimensional conscious state. This conscious state is combining the few concepts constituting a conscious thought, i.e., what one is immediately conscious of at a particular moment. We claim that this architectural and information-processing constraint corresponds to assumptions about the joint distribution between high-level concepts. To the extent that these assumptions are generally true (and the form of natural language seems consistent with them), they can form a useful prior for representation learning. A low-dimensional thought or conscious state is analogous to a sentence: it involves only a few variables and yet can make a statement with very high probability of being true. This is consistent with a joint distribution (over high-level concepts) which has the form of a sparse factor graph, i.e., where the dependencies captured by each factor of the factor graph involve only very few variables while creating a strong dip in the overall energy function. The consciousness prior also makes it natural to map conscious states to natural language utterances or to express classical AI knowledge in a form similar to facts and rules, albeit capturing uncertainty as well as efficient search mechanisms implemented by attention mechanisms." + pub_date: { + seconds: 1506297600 + } + authors: "Yoshua Bengio" + } + video: { + video_id: "7fIAdhl0KYc" + video_title: "PR-058: The Consciousness Prior" + number_of_views: 1151 + published_date: { + seconds: 1515333966 + } + uploader: "Byung-Hak Kim" + } + } +} +pr_id_to_video: { + key: 59 + value: { + pr_id: 59 + papers: { + paper_id: "style-transfer-from-non-parallel-text-by" + title: "Style Transfer from Non-Parallel Text by Cross-Alignment" + arxiv_id: "1705.09655" + abstract: "This paper focuses on style transfer on the basis of non-parallel text. This\nis an instance of a broad family of problems including machine translation,\ndecipherment, and sentiment modification. The key challenge is to separate the\ncontent from other aspects such as style. We assume a shared latent content\ndistribution across different text corpora, and propose a method that leverages\nrefined alignment of latent representations to perform style transfer. The\ntransferred sentences from one style should match example sentences from the\nother style as a population. We demonstrate the effectiveness of this\ncross-alignment method on three tasks: sentiment modification, decipherment of\nword substitution ciphers, and recovery of word order." + pub_date: { + seconds: 1495756800 + } + authors: "Tianxiao Shen" + authors: "Tao Lei" + authors: "Regina Barzilay" + authors: "Tommi Jaakkola" + repositories: { + url: "https://github.com/jpark621/language-style-transfer" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 16 + description: "Reimplementation of NLP Style Transfer from Non-parallel Text with Adversarial Alignment (https://arxiv.org/abs/1705.09655)" + } + repositories: { + url: "https://github.com/jishavm/TextStyleTransfer" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/kyuer/language-style-transfer" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "\"Style Transfer from Non-Parallel Text by Cross-Alignment\". Tianxiao Shen, Tao Lei, Regina Barzilay, and Tommi Jaakkola. NIPS 2017." + } + repositories: { + url: "https://github.com/kaletap/language-style-transfer-pytorch" + framework: FRAMEWORK_TENSORFLOW + description: "Experiments to rewrite style transfer code from tensorflow to pytorch (not finished yet)" + } + repositories: { + url: "https://github.com/qfzhu/st" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + is_official: true + url: "https://github.com/shentianxiao/language-style-transfer" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 495 + } + repositories: { + url: "https://github.com/mariob6/style_text" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + } + repositories: { + url: "https://github.com/sy-sunmoon/Clever-Commenter-Let-s-Try-More-Apps" + framework: FRAMEWORK_PYTORCH + description: "This project contrains of the Clever Commenter: Let's Try More Apps project in Google AI ML Winter Camp. by 赶论文ing" + } + repositories: { + url: "https://github.com/WhiskyChoy/language-style-transfer" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Forked from https://github.com/shentianxiao/language-style-transfer" + } + repositories: { + url: "https://github.com/nlahlaf/Text-Style-Transfer" + framework: FRAMEWORK_TENSORFLOW + description: "Final Project for Deep Learning on Text Style Transfer" + } + } + video: { + video_id: "w-P2V2LlrHg" + video_title: "PR-059: Style Transfer from Non-Parallel Text by Cross-Alignment" + number_of_likes: 12 + number_of_views: 1050 + published_date: { + seconds: 1515977170 + } + uploader: "Suh Kiho" + } + } +} +pr_id_to_video: { + key: 60 + value: { + pr_id: 60 + papers: { + paper_id: "deep-learning-based-recommender-system-a" + title: "Deep Learning based Recommender System: A Survey and New Perspectives" + arxiv_id: "1707.07435" + abstract: "With the ever-growing volume of online information, recommender systems have been an effective strategy to overcome such information overload. The utility of recommender systems cannot be overstated, given its widespread adoption in many web applications, along with its potential impact to ameliorate many problems related to over-choice. In recent years, deep learning has garnered considerable interest in many research fields such as computer vision and natural language processing, owing not only to stellar performance but also the attractive property of learning feature representations from scratch. The influence of deep learning is also pervasive, recently demonstrating its effectiveness when applied to information retrieval and recommender systems research. Evidently, the field of deep learning in recommender system is flourishing. This article aims to provide a comprehensive review of recent research efforts on deep learning based recommender systems. More concretely, we provide and devise a taxonomy of deep learning based recommendation models, along with providing a comprehensive summary of the state-of-the-art. Finally, we expand on current trends and provide new perspectives pertaining to this new exciting development of the field." + pub_date: { + seconds: 1500854400 + } + authors: "Shuai Zhang" + authors: "Lina Yao" + authors: "Aixin Sun" + authors: "Yi Tay" + repositories: { + url: "https://github.com/YichenLin/MATH-80600A-Project" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/abmitra84/recommender_system" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } + repositories: { + url: "https://github.com/anuragreddygv323/Important-stuff" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + } + repositories: { + url: "https://github.com/ginobaltazar7/DS" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Data Science, Deep Learning, Databases" + } + repositories: { + url: "https://github.com/DreamingRaven/Nemesyst" + framework: FRAMEWORK_OTHERS + number_of_stars: 13 + description: "Generalised and highly customisable, hybrid-parallelism, database based, deep learning framework." + } + repositories: { + url: "https://github.com/ginobaltazar7/Data-Science" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Data Science, Deep Learning, Databases" + } + repositories: { + url: "https://github.com/sunhwan-lee/recommender_system" + framework: FRAMEWORK_TENSORFLOW + description: "Collection of codes and papers in the topic of recommender system" + } + } + video: { + video_id: "V6zixdCIOqw" + video_title: "PR-060: Deep Neural Networks for YouTube Recommendations" + number_of_likes: 49 + number_of_views: 4014 + published_date: { + seconds: 1516540254 + } + uploader: "keun bong Kwak" + } + } +} +pr_id_to_video: { + key: 61 + value: { + pr_id: 61 + papers: { + paper_id: "understanding-deep-learning-requires" + title: "Understanding deep learning requires rethinking generalization" + arxiv_id: "1611.03530" + abstract: "Despite their massive size, successful deep artificial neural networks can\nexhibit a remarkably small difference between training and test performance.\nConventional wisdom attributes small generalization error either to properties\nof the model family, or to the regularization techniques used during training.\n Through extensive systematic experiments, we show how these traditional\napproaches fail to explain why large neural networks generalize well in\npractice. Specifically, our experiments establish that state-of-the-art\nconvolutional networks for image classification trained with stochastic\ngradient methods easily fit a random labeling of the training data. This\nphenomenon is qualitatively unaffected by explicit regularization, and occurs\neven if we replace the true images by completely unstructured random noise. We\ncorroborate these experimental findings with a theoretical construction showing\nthat simple depth two neural networks already have perfect finite sample\nexpressivity as soon as the number of parameters exceeds the number of data\npoints as it usually does in practice.\n We interpret our experimental findings by comparison with traditional models." + pub_date: { + seconds: 1478736000 + } + authors: "Chiyuan Zhang" + authors: "Samy Bengio" + authors: "Moritz Hardt" + authors: "Benjamin Recht" + authors: "Oriol Vinyals" + repositories: { + url: "https://github.com/randyshee/TensorFlow-Projects" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/iwzy7071/graph_neural_network" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/2xic/notebooks" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "machine learning and computer vision is just algorithms and data structures with more fancy properties. be real about the hype" + } + repositories: { + url: "https://github.com/pluskid/fitting-random-labels" + framework: FRAMEWORK_PYTORCH + number_of_stars: 145 + description: "Example code for the paper \"Understanding deep learning requires rethinking generalization\"" + } + repositories: { + url: "https://github.com/aaronpeikert/methods-meetup" + framework: FRAMEWORK_OTHERS + number_of_stars: 10 + description: "Meeting of some friends to discuss methods, philosophy, stats, psychology and surrounding topics." + } + repositories: { + url: "https://github.com/glouppe/info8010-deep-learning" + framework: FRAMEWORK_PYTORCH + number_of_stars: 666 + description: "Lectures for INFO8010 - Deep Learning, ULiège" + } + repositories: { + url: "https://github.com/jessemzhang/dl_spectral_normalization" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 10 + } + repositories: { + url: "https://github.com/KellyHwong/rethinking_generalization" + framework: FRAMEWORK_TENSORFLOW + description: "UNDERSTANDING DEEP LEARNING REQUIRES RETHINKING GENERALIZATION" + } + } + video: { + video_id: "UxJNG7ENRNg" + video_title: "PR-061: Understanding Deep Learning Requires Rethinking Generalization" + number_of_likes: 49 + number_of_views: 3238 + published_date: { + seconds: 1516543607 + } + uploader: "Jaejun Yoo" + } + } +} +pr_id_to_video: { + key: 62 + value: { + pr_id: 62 + papers: { + paper_id: "deep-learning-a-critical-appraisal" + title: "Deep Learning: A Critical Appraisal" + arxiv_id: "1801.00631" + abstract: "Although deep learning has historical roots going back decades, neither the\nterm \"deep learning\" nor the approach was popular just over five years ago,\nwhen the field was reignited by papers such as Krizhevsky, Sutskever and\nHinton's now classic (2012) deep network model of Imagenet. What has the field\ndiscovered in the five subsequent years? Against a background of considerable\nprogress in areas such as speech recognition, image recognition, and game\nplaying, and considerable enthusiasm in the popular press, I present ten\nconcerns for deep learning, and suggest that deep learning must be supplemented\nby other techniques if we are to reach artificial general intelligence." + pub_date: { + seconds: 1514851200 + } + authors: "Gary Marcus" + repositories: { + url: "https://github.com/astoycos/Mini_Project2" + framework: FRAMEWORK_TENSORFLOW + } + } + video: { + video_id: "6hg5d10SZr0" + video_title: "PR-062: Deep Learning: A Critical Appraisal (2018)" + number_of_likes: 56 + number_of_views: 3615 + published_date: { + seconds: 1517147263 + } + uploader: "Terry TaeWoong Um" + } + } +} +pr_id_to_video: { + key: 63 + value: { + pr_id: 63 + papers: { + paper_id: "peephole-predicting-network-performance" + title: "Peephole: Predicting Network Performance Before Training" + arxiv_id: "1712.03351" + abstract: "The quest for performant networks has been a significant force that drives\nthe advancements of deep learning in recent years. While rewarding, improving\nnetwork design has never been an easy journey. The large design space combined\nwith the tremendous cost required for network training poses a major obstacle\nto this endeavor. In this work, we propose a new approach to this problem,\nnamely, predicting the performance of a network before training, based on its\narchitecture. Specifically, we develop a unified way to encode individual\nlayers into vectors and bring them together to form an integrated description\nvia LSTM. Taking advantage of the recurrent network's strong expressive power,\nthis method can reliably predict the performances of various network\narchitectures. Our empirical studies showed that it not only achieved accurate\npredictions but also produced consistent rankings across datasets -- a key\ndesideratum in performance prediction." + pub_date: { + seconds: 1512777600 + } + authors: "Boyang Deng" + authors: "Junjie Yan" + authors: "Dahua Lin" + methods: { + name: "Tanh Activation" + full_name: "Tanh Activation" + description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" + } + methods: { + name: "LSTM" + full_name: "Long Short-Term Memory" + description: "An **LSTM** is a type of [recurrent neural network](https://paperswithcode.com/methods/category/recurrent-neural-networks) that addresses the vanishing gradient problem in vanilla RNNs through additional cells, input and output gates. Intuitively, vanishing gradients are solved through additional *additive* components, and forget gate activations, that allow the gradients to flow through the network without vanishing as quickly.\r\n\r\n(Image Source [here](https://medium.com/datadriveninvestor/how-do-lstm-networks-solve-the-problem-of-vanishing-gradients-a6784971a577))\r\n\r\n(Introduced by Hochreiter and Schmidhuber)" + } + methods: { + name: "Sigmoid Activation" + full_name: "Sigmoid Activation" + description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." + } + } + video: { + video_id: "ZO4bXgdcCQA" + video_title: "PR-063 Peephole: Predicting Network Performance Before Training" + number_of_likes: 5 + number_of_views: 778 + published_date: { + seconds: 1517147277 + } + uploader: "Taegyun Jeon" + } + } +} +pr_id_to_video: { + key: 64 + value: { + pr_id: 64 + papers: { + paper_id: "wide-deep-learning-for-recommender-systems" + title: "Wide & Deep Learning for Recommender Systems" + arxiv_id: "1606.07792" + abstract: "Generalized linear models with nonlinear feature transformations are widely\nused for large-scale regression and classification problems with sparse inputs.\nMemorization of feature interactions through a wide set of cross-product\nfeature transformations are effective and interpretable, while generalization\nrequires more feature engineering effort. With less feature engineering, deep\nneural networks can generalize better to unseen feature combinations through\nlow-dimensional dense embeddings learned for the sparse features. However, deep\nneural networks with embeddings can over-generalize and recommend less relevant\nitems when the user-item interactions are sparse and high-rank. In this paper,\nwe present Wide & Deep learning---jointly trained wide linear models and deep\nneural networks---to combine the benefits of memorization and generalization\nfor recommender systems. We productionized and evaluated the system on Google\nPlay, a commercial mobile app store with over one billion active users and over\none million apps. Online experiment results show that Wide & Deep significantly\nincreased app acquisitions compared with wide-only and deep-only models. We\nhave also open-sourced our implementation in TensorFlow." + pub_date: { + seconds: 1466726400 + } + authors: "Heng-Tze Cheng" + authors: "Levent Koc" + authors: "Jeremiah Harmsen" + authors: "Tal Shaked" + authors: "Tushar Chandra" + authors: "Hrishi Aradhye" + authors: "Glen Anderson" + authors: "Greg Corrado" + authors: "Wei Chai" + authors: "Mustafa Ispir" + authors: "Rohan Anil" + authors: "Zakaria Haque" + authors: "Lichan Hong" + authors: "Vihan Jain" + authors: "Xiaobing Liu" + authors: "Hemal Shah" + repositories: { + url: "https://github.com/shenweichen/DeepCTR" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5044 + description: "Easy-to-use,Modular and Extendible package of deep-learning based CTR models ." + } + repositories: { + url: "https://github.com/PaddlePaddle/PaddleRec/tree/release/2.1.0/models/rank/wide_deep" + framework: FRAMEWORK_OTHERS + number_of_stars: 530 + description: "大规模推荐模型训练工具" + } + repositories: { + url: "https://github.com/fengtong-xiao/DMBGN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: "The implementation of the submitted paper \"Deep Multi-Behaviors Graph Network for Voucher Redemption Rate Prediction\" in SIGKDD 2021 Applied Data Science Track." + } + repositories: { + url: "https://github.com/aivolcano/RecSys_tf2" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + } + repositories: { + url: "https://github.com/NVIDIA/HugeCTR" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 432 + description: "HugeCTR is a high efficiency GPU framework designed for Click-Through-Rate (CTR) estimating training" + } + repositories: { + url: "https://github.com/jsleroux/Recommender-Systems" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/codlife/NLP" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/vinaymittal22/Income_Prediction_US" + framework: FRAMEWORK_OTHERS + description: "Adult data set solve for predict income of US population" + } + repositories: { + url: "https://github.com/yil479/yelp_review" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/sandeepnair2812/Deep-Learning-Based-Search-and-Recommendation-System" + framework: FRAMEWORK_TENSORFLOW + } + } + video: { + video_id: "hKoJPqWLrI4" + video_title: "PR-064: Wide&Deep Learning for Recommender Systems" + number_of_likes: 31 + number_of_views: 2140 + published_date: { + seconds: 1517749978 + } + uploader: "keun bong Kwak" + } + } +} +pr_id_to_video: { + key: 65 + value: { + pr_id: 65 + papers: { + paper_id: "high-resolution-image-synthesis-and-semantic" + title: "High-Resolution Image Synthesis and Semantic Manipulation with Conditional GANs" + arxiv_id: "1711.11585" + abstract: "We present a new method for synthesizing high-resolution photo-realistic\nimages from semantic label maps using conditional generative adversarial\nnetworks (conditional GANs). Conditional GANs have enabled a variety of\napplications, but the results are often limited to low-resolution and still far\nfrom realistic. In this work, we generate 2048x1024 visually appealing results\nwith a novel adversarial loss, as well as new multi-scale generator and\ndiscriminator architectures. Furthermore, we extend our framework to\ninteractive visual manipulation with two additional features. First, we\nincorporate object instance segmentation information, which enables object\nmanipulations such as removing/adding objects and changing the object category.\nSecond, we propose a method to generate diverse results given the same input,\nallowing users to edit the object appearance interactively. Human opinion\nstudies demonstrate that our method significantly outperforms existing methods,\nadvancing both the quality and the resolution of deep image synthesis and\nediting." + pub_date: { + seconds: 1512000000 + } + authors: "Ting-Chun Wang" + authors: "Ming-Yu Liu" + authors: "Jun-Yan Zhu" + authors: "Andrew Tao" + authors: "Jan Kautz" + authors: "Bryan Catanzaro" + repositories: { + url: "https://github.com/JeongHyunJin/Pix2PixHD" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + } + repositories: { + url: "https://github.com/ubc-vision/DwNet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 19 + } + repositories: { + url: "https://github.com/haru-256/pix2pixHD.pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/SeniorDev009/ONNX-project" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + } + repositories: { + is_official: true + url: "https://github.com/NVIDIA/pix2pixHD" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5317 + description: "Synthesizing and manipulating 2048x1024 images with conditional GANs" + } + repositories: { + url: "https://github.com/rickyHong/pix2pixHD-repl" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/edricwu/Testing-1" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/mingyuliutw/UNIT" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1757 + description: "Unsupervised Image-to-Image Translation" + } + repositories: { + url: "https://github.com/wentao99/pix2pixHD" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/LiuNull/pix2pix_Liu" + framework: FRAMEWORK_PYTORCH + } + } + video: { + video_id: "_5ofbwltEKU" + video_title: "PR-065 : High-Resolution Image Synthesis and Semantic Manipulation with Conditional GANs" + number_of_likes: 16 + number_of_views: 1891 + published_date: { + seconds: 1517753318 + } + uploader: "이광희" + } + } +} +pr_id_to_video: { + key: 66 + value: { + pr_id: 66 + papers: { + paper_id: "dont-decay-the-learning-rate-increase-the" + title: "Don't Decay the Learning Rate, Increase the Batch Size" + arxiv_id: "1711.00489" + abstract: "It is common practice to decay the learning rate. Here we show one can\nusually obtain the same learning curve on both training and test sets by\ninstead increasing the batch size during training. This procedure is successful\nfor stochastic gradient descent (SGD), SGD with momentum, Nesterov momentum,\nand Adam. It reaches equivalent test accuracies after the same number of\ntraining epochs, but with fewer parameter updates, leading to greater\nparallelism and shorter training times. We can further reduce the number of\nparameter updates by increasing the learning rate $\\epsilon$ and scaling the\nbatch size $B \\propto \\epsilon$. Finally, one can increase the momentum\ncoefficient $m$ and scale $B \\propto 1/(1-m)$, although this tends to slightly\nreduce the test accuracy. Crucially, our techniques allow us to repurpose\nexisting training schedules for large batch training with no hyper-parameter\ntuning. We train ResNet-50 on ImageNet to $76.1\\%$ validation accuracy in under\n30 minutes." + pub_date: { + seconds: 1509494400 + } + authors: "Samuel L. Smith" + authors: "Pieter-Jan Kindermans" + authors: "Chris Ying" + authors: "Quoc V. Le" + repositories: { + url: "https://github.com/rbkim1990/capstone-age-estimation" + framework: FRAMEWORK_OTHERS + } + methods: { + name: "SGD" + full_name: "Stochastic Gradient Descent" + description: "**Stochastic Gradient Descent** is an iterative optimization technique that uses minibatches of data to form an expectation of the gradient, rather than the full gradient using all available data. That is for weights $w$ and a loss function $L$ we have:\r\n\r\n$$ w\\_{t+1} = w\\_{t} - \\eta\\hat{\\nabla}\\_{w}{L(w\\_{t})} $$\r\n\r\nWhere $\\eta$ is a learning rate. SGD reduces redundancy compared to batch gradient descent - which recomputes gradients for similar examples before each parameter update - so it is usually much faster.\r\n\r\n(Image Source: [here](http://rasbt.github.io/mlxtend/user_guide/general_concepts/gradient-optimization/))" + } + methods: { + name: "Adam" + full_name: "Adam" + description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + } + } + video: { + video_id: "jFpO-E4RPhQ" + video_title: "PR-066: Don't decay the learning rate, increase the batch size" + number_of_likes: 19 + number_of_views: 2360 + published_date: { + seconds: 1518357854 + } + uploader: "차준범" + } + } +} +pr_id_to_video: { + key: 67 + value: { + pr_id: 67 + papers: { + paper_id: "audio-super-resolution-using-neural-networks" + title: "Audio Super Resolution using Neural Networks" + arxiv_id: "1708.00853" + abstract: "We introduce a new audio processing technique that increases the sampling\nrate of signals such as speech or music using deep convolutional neural\nnetworks. Our model is trained on pairs of low and high-quality audio examples;\nat test-time, it predicts missing samples within a low-resolution signal in an\ninterpolation process similar to image super-resolution. Our method is simple\nand does not involve specialized audio processing techniques; in our\nexperiments, it outperforms baselines on standard speech and music benchmarks\nat upscaling ratios of 2x, 4x, and 6x. The method has practical applications in\ntelephony, compression, and text-to-speech generation; it demonstrates the\neffectiveness of feed-forward convolutional architectures on an audio\ngeneration task." + pub_date: { + seconds: 1501632000 + } + authors: "Volodymyr Kuleshov" + authors: "S. Zayd Enam" + authors: "Stefano Ermon" + repositories: { + url: "https://github.com/johnathanchiu/audio-upsampling" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Uses CNN to upsample low-res audio files" + } + repositories: { + url: "https://github.com/kuleshov/audio-super-res" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 546 + description: "Audio super resolution using neural networks" + } + repositories: { + url: "https://github.com/Amuzak-NTL/ASR-for-Speech-Recog" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + } + repositories: { + url: "https://github.com/TrizteX/Audio-SuperRes" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + description: "Audio Super-Resolution performed on VCTK corpus" + } + } + video: { + video_id: "iqN08EPMjSs" + video_title: "PR-067: Audio Super Resolution using Neural Nets" + number_of_likes: 21 + number_of_views: 3366 + published_date: { + seconds: 1518357824 + } + uploader: "Seungil Kim" + } + } +} +pr_id_to_video: { + key: 68 + value: { + pr_id: 68 + papers: { + paper_id: "deepar-probabilistic-forecasting-with" + title: "DeepAR: Probabilistic Forecasting with Autoregressive Recurrent Networks" + arxiv_id: "1704.04110" + abstract: "Probabilistic forecasting, i.e. estimating the probability distribution of a\ntime series' future given its past, is a key enabler for optimizing business\nprocesses. In retail businesses, for example, forecasting demand is crucial for\nhaving the right inventory available at the right time at the right place. In\nthis paper we propose DeepAR, a methodology for producing accurate\nprobabilistic forecasts, based on training an auto regressive recurrent network\nmodel on a large number of related time series. We demonstrate how by applying\ndeep learning techniques to forecasting, one can overcome many of the\nchallenges faced by widely-used classical approaches to the problem. We show\nthrough extensive empirical evaluation on several real-world forecasting data\nsets accuracy improvements of around 15% compared to state-of-the-art methods." + pub_date: { + seconds: 1492041600 + } + authors: "David Salinas" + authors: "Valentin Flunkert" + authors: "Jan Gasthaus" + repositories: { + url: "https://github.com/kshmawj111/solar_energy_forecast" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/jdb78/pytorch-forecasting" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1132 + description: "Time series forecasting with PyTorch" + } + repositories: { + url: "https://github.com/bingblackbean/water_supply_network_pressure_pred_deepar" + framework: FRAMEWORK_OTHERS + number_of_stars: 9 + description: "use deepar to predict water supply network pressure " + } + repositories: { + url: "https://github.com/ensembles4612/product_demand_forecast_using_DeepAR_Amazon_SageMaker" + framework: FRAMEWORK_OTHERS + description: "I built a forecast tool using DeepAR (autoregressive RNN with LSTM cells) in Sagemaker that can predict the demand of hundreds of products simultaneously." + } + repositories: { + url: "https://github.com/skp2/Electricity-Load" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "Predict Electricity load from historical time series" + } + repositories: { + url: "https://github.com/Yonder-OSS/D3M-Primitives" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/Timbasa/Sample_GluonTS" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/nuankw/Summer-Research-2018-Part-One" + framework: FRAMEWORK_PYTORCH + number_of_stars: 8 + description: "PART I DeepAR implementation based on paper: https://arxiv.org/pdf/1704.04110.pdf" + } + repositories: { + url: "https://github.com/husnejahan/DeepAR-pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 44 + } + repositories: { + url: "https://github.com/zhykoties/DeepAR" + framework: FRAMEWORK_PYTORCH + number_of_stars: 160 + description: "Implementation of deep learning models for time series in PyTorch." + } + } + video: { + video_id: "okyo61ZZivA" + video_title: "PR-068: DeepAR: Probabilistic Forecasting with Autoregressive Recurrent Networks" + number_of_likes: 23 + number_of_views: 3943 + published_date: { + seconds: 1519565309 + } + uploader: "Jiyang Kang" + } + } +} +pr_id_to_video: { + key: 69 + value: { + pr_id: 69 + papers: { + paper_id: "efficient-neural-architecture-search-via-1" + title: "Efficient Neural Architecture Search via Parameter Sharing" + arxiv_id: "1802.03268" + abstract: "We propose Efficient Neural Architecture Search (ENAS), a fast and\ninexpensive approach for automatic model design. In ENAS, a controller learns\nto discover neural network architectures by searching for an optimal subgraph\nwithin a large computational graph. The controller is trained with policy\ngradient to select a subgraph that maximizes the expected reward on the\nvalidation set. Meanwhile the model corresponding to the selected subgraph is\ntrained to minimize a canonical cross entropy loss. Thanks to parameter sharing\nbetween child models, ENAS is fast: it delivers strong empirical performances\nusing much fewer GPU-hours than all existing automatic model design approaches,\nand notably, 1000x less expensive than standard Neural Architecture Search. On\nthe Penn Treebank dataset, ENAS discovers a novel architecture that achieves a\ntest perplexity of 55.8, establishing a new state-of-the-art among all methods\nwithout post-training processing. On the CIFAR-10 dataset, ENAS designs novel\narchitectures that achieve a test error of 2.89%, which is on par with NASNet\n(Zoph et al., 2018), whose test error is 2.65%." + pub_date: { + seconds: 1518134400 + } + authors: "Hieu Pham" + authors: "Melody Y. Guan" + authors: "Barret Zoph" + authors: "Quoc V. Le" + authors: "Jeff Dean" + repositories: { + url: "https://github.com/distrue/enas_tensorflow" + framework: FRAMEWORK_TENSORFLOW + description: "Implementation of Multi-Objective reward based on ENAS backbone" + } + repositories: { + url: "https://github.com/guoyongcs/NATv2" + framework: FRAMEWORK_PYTORCH + number_of_stars: 20 + description: "Implementation for NATv2." + } + repositories: { + url: "https://github.com/f51980280/ENAS-Implement" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "DeepLearning Systems and Inference Realization" + } + repositories: { + url: "https://github.com/nikitati/Nas.jl" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "Programmable Neural Architecture Search" + } + repositories: { + url: "https://github.com/invisibleForce/ENAS-Pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "a pytorch implementation of ENAS " + } + repositories: { + url: "https://github.com/senthilva/Keras_functional_API_CNN" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/WillButAgain/ENAS" + framework: FRAMEWORK_PYTORCH + description: "scratch implementation of \"Efficient Neural Architecture Search via Parameter Sharing\" https://arxiv.org/pdf/1802.03268.pdf" + } + repositories: { + url: "https://github.com/melodyguan/enas" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1515 + description: "TensorFlow Code for paper \"Efficient Neural Architecture Search via Parameter Sharing\"" + } + repositories: { + url: "https://github.com/cshannonn/blackscholes_nas" + framework: FRAMEWORK_OTHERS + number_of_stars: 3 + description: "Can a neural network learn Black Scholes, yes..." + } + repositories: { + url: "https://github.com/ahundt/enas" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 12 + description: "TensorFlow code for paper \"Training Frankenstein's Creature to Stack: HyperTree Architecture Search\"" + } + methods: { + name: "Tanh Activation" + full_name: "Tanh Activation" + description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "LSTM" + full_name: "Long Short-Term Memory" + description: "An **LSTM** is a type of [recurrent neural network](https://paperswithcode.com/methods/category/recurrent-neural-networks) that addresses the vanishing gradient problem in vanilla RNNs through additional cells, input and output gates. Intuitively, vanishing gradients are solved through additional *additive* components, and forget gate activations, that allow the gradients to flow through the network without vanishing as quickly.\r\n\r\n(Image Source [here](https://medium.com/datadriveninvestor/how-do-lstm-networks-solve-the-problem-of-vanishing-gradients-a6784971a577))\r\n\r\n(Introduced by Hochreiter and Schmidhuber)" + } + methods: { + name: "Sigmoid Activation" + full_name: "Sigmoid Activation" + description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." + } + } + video: { + video_id: "fbCcJaSQPPA" + video_title: "PR-069: Efficient Neural Architecture Search via Parameter Sharing" + number_of_likes: 44 + number_of_views: 4298 + published_date: { + seconds: 1520088191 + } + uploader: "JinWon Lee" + } + } +} +pr_id_to_video: { + key: 70 + value: { + pr_id: 70 + papers: { + paper_id: "safetynets-verifiable-execution-of-deep" + title: "SafetyNets: Verifiable Execution of Deep Neural Networks on an Untrusted Cloud" + arxiv_id: "1706.10268" + abstract: "Inference using deep neural networks is often outsourced to the cloud since\nit is a computationally demanding task. However, this raises a fundamental\nissue of trust. How can a client be sure that the cloud has performed inference\ncorrectly? A lazy cloud provider might use a simpler but less accurate model to\nreduce its own computational load, or worse, maliciously modify the inference\nresults sent to the client. We propose SafetyNets, a framework that enables an\nuntrusted server (the cloud) to provide a client with a short mathematical\nproof of the correctness of inference tasks that they perform on behalf of the\nclient. Specifically, SafetyNets develops and implements a specialized\ninteractive proof (IP) protocol for verifiable execution of a class of deep\nneural networks, i.e., those that can be represented as arithmetic circuits.\nOur empirical results on three- and four-layer deep neural networks demonstrate\nthe run-time costs of SafetyNets for both the client and server are low.\nSafetyNets detects any incorrect computations of the neural network by the\nuntrusted server with high probability, while achieving state-of-the-art\naccuracy on the MNIST digit recognition (99.4%) and TIMIT speech recognition\ntasks (75.22%)." + pub_date: { + seconds: 1498780800 + } + authors: "Zahra Ghodsi" + authors: "Tianyu Gu" + authors: "Siddharth Garg" + } + video: { + video_id: "CtaPFqq8P00" + video_title: "PR-070: SafetyNets: Verifiable Execution of Deep Neural Networks on an Untrusted Cloud" + number_of_likes: 2 + number_of_views: 447 + published_date: { + seconds: 1520171150 + } + uploader: "taesu" + } + } +} +pr_id_to_video: { + key: 71 + value: { + pr_id: 71 + papers: { + paper_id: "categorical-reparameterization-with-gumbel" + title: "Categorical Reparameterization with Gumbel-Softmax" + arxiv_id: "1611.01144" + abstract: "Categorical variables are a natural choice for representing discrete\nstructure in the world. However, stochastic neural networks rarely use\ncategorical latent variables due to the inability to backpropagate through\nsamples. In this work, we present an efficient gradient estimator that replaces\nthe non-differentiable sample from a categorical distribution with a\ndifferentiable sample from a novel Gumbel-Softmax distribution. This\ndistribution has the essential property that it can be smoothly annealed into a\ncategorical distribution. We show that our Gumbel-Softmax estimator outperforms\nstate-of-the-art gradient estimators on structured output prediction and\nunsupervised generative modeling tasks with categorical latent variables, and\nenables large speedups on semi-supervised classification." + pub_date: { + seconds: 1478131200 + } + authors: "Eric Jang" + authors: "Shixiang Gu" + authors: "Ben Poole" + repositories: { + url: "https://github.com/tensorflow/models/tree/master/research/rebar" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 70344 + description: "Models and examples built with TensorFlow" + } + repositories: { + url: "https://github.com/karpathy/deep-vector-quantization" + framework: FRAMEWORK_PYTORCH + number_of_stars: 244 + description: "VQVAEs, GumbelSoftmaxes and friends" + } + repositories: { + url: "https://github.com/Jmkernes/PAR-Transformer-XL" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 3 + description: "An implementation of the Pay Attention when Required transformer: https://arxiv.org/pdf/2009.04534.pdf" + } + repositories: { + url: "https://github.com/EddieCunningham/GraphLSSM" + framework: FRAMEWORK_OTHERS + number_of_stars: 5 + description: "Graphical Latent State Space Models" + } + repositories: { + url: "https://github.com/stefanthaler/gumbel-softmax-exploration" + framework: FRAMEWORK_TENSORFLOW + description: "Exploration of the Gumbel Softmax Paper https://arxiv.org/pdf/1611.01144.pdf" + } + repositories: { + url: "https://github.com/kampta/pytorch-distributions" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + description: "Basic VAE flow using pytorch distributions" + } + repositories: { + url: "https://github.com/OlivierAlgoet/Tensorflow2-GMM" + framework: FRAMEWORK_TENSORFLOW + description: "Gaussian mixture model" + } + repositories: { + url: "https://github.com/tensorflow/models" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 70339 + description: "Models and examples built with TensorFlow" + } + repositories: { + url: "https://github.com/GuyLor/direct_vae" + framework: FRAMEWORK_PYTORCH + number_of_stars: 12 + description: "Implementation of the paper \"Direct Optimization through argmax for discrete Variational Auto-Encoder\"" + } + repositories: { + url: "https://github.com/crowdflowTUe/stampnet" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 3 + description: "code for \"StampNet: unsupervised multi-class object discovery\" by Visser, Corbetta, Menkovski and Toschi (https://arxiv.org/abs/1902.02693)" + } + methods: { + name: "Gumbel Softmax" + full_name: "Gumbel Softmax" + description: "**Gumbel-Softmax** is a continuous distribution that has the property that it can be smoothly annealed into a categorical distribution, and whose parameter gradients can be easily computed via the reparameterization trick." + } + } + video: { + video_id: "ty3SciyoIyk" + video_title: "PR-071: Categorical Reparameterization with Gumbel Softmax" + number_of_likes: 41 + number_of_views: 4268 + published_date: { + seconds: 1520172922 + } + uploader: "Jaejun Yoo" + } + } +} +pr_id_to_video: { + key: 72 + value: { + pr_id: 72 + papers: { + paper_id: "deep-compression-compressing-deep-neural" + title: "Deep Compression: Compressing Deep Neural Networks with Pruning, Trained Quantization and Huffman Coding" + arxiv_id: "1510.00149" + abstract: "Neural networks are both computationally intensive and memory intensive,\nmaking them difficult to deploy on embedded systems with limited hardware\nresources. To address this limitation, we introduce \"deep compression\", a three\nstage pipeline: pruning, trained quantization and Huffman coding, that work\ntogether to reduce the storage requirement of neural networks by 35x to 49x\nwithout affecting their accuracy. Our method first prunes the network by\nlearning only the important connections. Next, we quantize the weights to\nenforce weight sharing, finally, we apply Huffman coding. After the first two\nsteps we retrain the network to fine tune the remaining connections and the\nquantized centroids. Pruning, reduces the number of connections by 9x to 13x;\nQuantization then reduces the number of bits that represent each connection\nfrom 32 to 5. On the ImageNet dataset, our method reduced the storage required\nby AlexNet by 35x, from 240MB to 6.9MB, without loss of accuracy. Our method\nreduced the size of VGG-16 by 49x from 552MB to 11.3MB, again with no loss of\naccuracy. This allows fitting the model into on-chip SRAM cache rather than\noff-chip DRAM memory. Our compression method also facilitates the use of\ncomplex neural networks in mobile applications where application size and\ndownload bandwidth are constrained. Benchmarked on CPU, GPU and mobile GPU,\ncompressed network has 3x to 4x layerwise speedup and 3x to 7x better energy\nefficiency." + pub_date: { + seconds: 1443657600 + } + authors: "Song Han" + authors: "Huizi Mao" + authors: "William J. Dally" + repositories: { + url: "https://github.com/songhan/Deep-Compression-AlexNet" + framework: FRAMEWORK_OTHERS + number_of_stars: 571 + description: "Deep Compression on AlexNet" + } + repositories: { + url: "https://github.com/heguixiang/caffe_deep_compression" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/may0324/DeepCompression-caffe" + framework: FRAMEWORK_OTHERS + number_of_stars: 234 + description: "Caffe for Deep Compression" + } + repositories: { + url: "https://github.com/bemova/Deep-Compression-Compressing-Deep-Neural-Networks-with-Pruning-Trained-Quantization-and-Huffman" + framework: FRAMEWORK_PYTORCH + number_of_stars: 14 + description: "It is a pytorch implementation of https://arxiv.org/abs/1510.00149 paper." + } + repositories: { + url: "https://github.com/songhan/SqueezeNet-Deep-Compression" + framework: FRAMEWORK_OTHERS + number_of_stars: 398 + } + repositories: { + url: "https://github.com/isha-garg/Deep_Compression" + framework: FRAMEWORK_OTHERS + number_of_stars: 8 + description: "Recreated deep compression's pruning, quantization and huffman encoding pipeline" + } + repositories: { + url: "https://github.com/KarenUllrich/Tutorial_BayesianCompressionForDL" + framework: FRAMEWORK_PYTORCH + number_of_stars: 183 + description: "A tutorial on \"Bayesian Compression for Deep Learning\" published at NIPS (2017)." + } + repositories: { + url: "https://github.com/cambridge-mlg/variational-shannon-coding" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 15 + description: "This repository contains the code for our recent paper `Minimal Random Code Learning: Getting Bits Back from Compressed Model Parameters'" + } + repositories: { + url: "https://github.com/cambridge-mlg/miracle" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 15 + description: "This repository contains the code for our recent paper `Minimal Random Code Learning: Getting Bits Back from Compressed Model Parameters'" + } + repositories: { + url: "https://github.com/lovepan1/caffe_ssd_traffic" + framework: FRAMEWORK_OTHERS + description: " Updated a minute ago used ssd by caffe in transportation object detection , included car bus minbus persopn minibus bicycle." + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "AlexNet" + full_name: "AlexNet" + description: "**AlexNet** is a classic convolutional neural network architecture. It consists of convolutions, max pooling and dense layers as the basic building blocks. Grouped convolutions are used in order to fit the model across two GPUs." + } + methods: { + name: "VGG-16" + full_name: "VGG-16" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "Local Response Normalization" + full_name: "Local Response Normalization" + description: "**Local Response Normalization** is a normalization layer that implements the idea of lateral inhibition. Lateral inhibition is a concept in neurobiology that refers to the phenomenon of an excited neuron inhibiting its neighbours: this leads to a peak in the form of a local maximum, creating contrast in that area and increasing sensory perception. In practice, we can either normalize within the same channel or normalize across channels when we apply LRN to convolutional neural networks.\r\n\r\n$$ b_{c} = a_{c}\\left(k + \\frac{\\alpha}{n}\\sum_{c'=\\max(0, c-n/2)}^{\\min(N-1,c+n/2)}a_{c'}^2\\right)^{-\\beta} $$\r\n\r\nWhere the size is the number of neighbouring channels used for normalization, $\\alpha$ is multiplicative factor, $\\beta$ an exponent and $k$ an additive factor" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Max Pooling" + full_name: "Max Pooling" + description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + methods: { + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + } + } + video: { + video_id: "9mFZmpIbMDs" + video_title: "PR-072: Deep Compression" + number_of_likes: 27 + number_of_views: 2167 + published_date: { + seconds: 1520777304 + } + uploader: "Taeoh Kim" + } + } +} +pr_id_to_video: { + key: 73 + value: { + pr_id: 73 + papers: { + paper_id: "generative-semantic-manipulation-with" + title: "Generative Semantic Manipulation with Contrasting GAN" + arxiv_id: "1708.00315" + abstract: "Generative Adversarial Networks (GANs) have recently achieved significant\nimprovement on paired/unpaired image-to-image translation, such as\nphoto$\\rightarrow$ sketch and artist painting style transfer. However, existing\nmodels can only be capable of transferring the low-level information (e.g.\ncolor or texture changes), but fail to edit high-level semantic meanings (e.g.,\ngeometric structure or content) of objects. On the other hand, while some\nresearches can synthesize compelling real-world images given a class label or\ncaption, they cannot condition on arbitrary shapes or structures, which largely\nlimits their application scenarios and interpretive capability of model\nresults. In this work, we focus on a more challenging semantic manipulation\ntask, which aims to modify the semantic meaning of an object while preserving\nits own characteristics (e.g. viewpoints and shapes), such as\ncow$\\rightarrow$sheep, motor$\\rightarrow$ bicycle, cat$\\rightarrow$dog. To\ntackle such large semantic changes, we introduce a contrasting GAN\n(contrast-GAN) with a novel adversarial contrasting objective. Instead of\ndirectly making the synthesized samples close to target data as previous GANs\ndid, our adversarial contrasting objective optimizes over the distance\ncomparisons between samples, that is, enforcing the manipulated data be\nsemantically closer to the real data with target category than the input data.\nEquipped with the new contrasting objective, a novel mask-conditional\ncontrast-GAN architecture is proposed to enable disentangle image background\nwith object semantic changes. Experiments on several semantic manipulation\ntasks on ImageNet and MSCOCO dataset show considerable performance gain by our\ncontrast-GAN over other conditional GANs. Quantitative results further\ndemonstrate the superiority of our model on generating manipulated results with\nhigh visual fidelity and reasonable object semantics." + pub_date: { + seconds: 1501545600 + } + authors: "Xiaodan Liang" + authors: "Hao Zhang" + authors: "Eric P. Xing" + } + video: { + video_id: "U8IpNf1b57w" + video_title: "PR-073: Generative Semantic Manipulation with Contrasting GAN" + number_of_likes: 4 + number_of_views: 757 + published_date: { + seconds: 1520778031 + } + uploader: "이광희" + } + } +} +pr_id_to_video: { + key: 74 + value: { + pr_id: 74 + papers: { + paper_id: "obamanet-photo-realistic-lip-sync-from-text" + title: "ObamaNet: Photo-realistic lip-sync from text" + arxiv_id: "1801.01442" + abstract: "We present ObamaNet, the first architecture that generates both audio and\nsynchronized photo-realistic lip-sync videos from any new text. Contrary to\nother published lip-sync approaches, ours is only composed of fully trainable\nneural modules and does not rely on any traditional computer graphics methods.\nMore precisely, we use three main modules: a text-to-speech network based on\nChar2Wav, a time-delayed LSTM to generate mouth-keypoints synced to the audio,\nand a network based on Pix2Pix to generate the video frames conditioned on the\nkeypoints." + pub_date: { + seconds: 1512518400 + } + authors: "Rithesh Kumar" + authors: "Jose Sotelo" + authors: "Kundan Kumar" + authors: "Alexandre de Brebisson" + authors: "Yoshua Bengio" + repositories: { + url: "https://github.com/ung200/thats-what-obama-said" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 18 + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + methods: { + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + } + methods: { + name: "Batch Normalization" + full_name: "Batch Normalization" + description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." + } + methods: { + name: "Tanh Activation" + full_name: "Tanh Activation" + description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" + } + methods: { + name: "Pix2Pix" + full_name: "Pix2Pix" + description: "**Pix2Pix** is a conditional image-to-image translation architecture that uses a conditional GAN objective combined with a reconstruction loss. The conditional GAN objective for observed images $x$, output images $y$ and the random noise vector $z$ is:\r\n\r\n$$ \\mathcal{L}\\_{cGAN}\\left(G, D\\right) =\\mathbb{E}\\_{x,y}\\left[\\log D\\left(x, y\\right)\\right]+\r\n\\mathbb{E}\\_{x,z}\\left[log(1 − D\\left(x, G\\left(x, z\\right)\\right)\\right] $$\r\n\r\nWe augment this with a reconstruction term:\r\n\r\n$$ \\mathcal{L}\\_{L1}\\left(G\\right) = \\mathbb{E}\\_{x,y,z}\\left[||y - G\\left(x, z\\right)||\\_{1}\\right] $$\r\n\r\nand we get the final objective as:\r\n\r\n$$ G^{*} = \\arg\\min\\_{G}\\max\\_{D}\\mathcal{L}\\_{cGAN}\\left(G, D\\right) + \\lambda\\mathcal{L}\\_{L1}\\left(G\\right) $$\r\n\r\nThe architectures employed for the generator and discriminator closely follow [DCGAN](https://paperswithcode.com/method/dcgan), with a few modifications:\r\n\r\n- Concatenated skip connections are used to \"shuttle\" low-level information between the input and output, similar to a [U-Net](https://paperswithcode.com/method/u-net).\r\n- The use of a PatchGAN discriminator that only penalizes structure at the scale of patches." + } + methods: { + name: "LSTM" + full_name: "Long Short-Term Memory" + description: "An **LSTM** is a type of [recurrent neural network](https://paperswithcode.com/methods/category/recurrent-neural-networks) that addresses the vanishing gradient problem in vanilla RNNs through additional cells, input and output gates. Intuitively, vanishing gradients are solved through additional *additive* components, and forget gate activations, that allow the gradients to flow through the network without vanishing as quickly.\r\n\r\n(Image Source [here](https://medium.com/datadriveninvestor/how-do-lstm-networks-solve-the-problem-of-vanishing-gradients-a6784971a577))\r\n\r\n(Introduced by Hochreiter and Schmidhuber)" + } + methods: { + name: "Sigmoid Activation" + full_name: "Sigmoid Activation" + description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." + } + methods: { + name: "Leaky ReLU" + full_name: "Leaky ReLU" + description: "**Leaky Rectified Linear Unit**, or **Leaky ReLU**, is a type of activation function based on a [ReLU](https://paperswithcode.com/method/relu), but it has a small slope for negative values instead of a flat slope. The slope coefficient is determined before training, i.e. it is not learnt during training. This type of activation function is popular in tasks where we we may suffer from sparse gradients, for example training generative adversarial networks." + } + methods: { + name: "PatchGAN" + full_name: "PatchGAN" + description: "**PatchGAN** is a type of discriminator for generative adversarial networks which only penalizes structure at the scale of local image patches. The PatchGAN discriminator tries to classify if each $N \\times N$ patch in an image is real or fake. This discriminator is run convolutionally across the image, averaging all responses to provide the ultimate output of $D$. Such a discriminator effectively models the image as a Markov random field, assuming independence between pixels separated by more than a patch diameter. It can be understood as a type of texture/style loss." + } + methods: { + name: "Concatenated Skip Connection" + full_name: "Concatenated Skip Connection" + description: "A **Concatenated Skip Connection** is a type of skip connection that seeks to reuse features by concatenating them to new layers, allowing more information to be retained from previous layers of the network. This contrasts with say, residual connections, where element-wise summation is used instead to incorporate information from previous layers. This type of skip connection is prominently used in DenseNets (and also Inception networks), which the Figure to the right illustrates." + } + } + video: { + video_id: "A1o6SUsWd98" + video_title: "PR-074: ObamaNet: Photo-realistic lip-sync from text" + number_of_views: 2009 + published_date: { + seconds: 1521381942 + } + uploader: "Byung-Hak Kim" + } + } +} +pr_id_to_video: { + key: 75 + value: { + pr_id: 75 + papers: { + paper_id: "on-calibration-of-modern-neural-networks" + title: "On Calibration of Modern Neural Networks" + arxiv_id: "1706.04599" + abstract: "Confidence calibration -- the problem of predicting probability estimates\nrepresentative of the true correctness likelihood -- is important for\nclassification models in many applications. We discover that modern neural\nnetworks, unlike those from a decade ago, are poorly calibrated. Through\nextensive experiments, we observe that depth, width, weight decay, and Batch\nNormalization are important factors influencing calibration. We evaluate the\nperformance of various post-processing calibration methods on state-of-the-art\narchitectures with image and document classification datasets. Our analysis and\nexperiments not only offer insights into neural network learning, but also\nprovide a simple and straightforward recipe for practical settings: on most\ndatasets, temperature scaling -- a single-parameter variant of Platt Scaling --\nis surprisingly effective at calibrating predictions." + pub_date: { + seconds: 1497398400 + } + authors: "Chuan Guo" + authors: "Geoff Pleiss" + authors: "Yu Sun" + authors: "Kilian Q. Weinberger" + repositories: { + url: "https://github.com/sleep3r/garrus" + framework: FRAMEWORK_OTHERS + number_of_stars: 13 + description: "Python framework for high quality confidence estimation of deep neural networks, providing methods such as confidence calibration and ordinal ranking" + } + repositories: { + url: "https://github.com/bayesgroup/pytorch-ensembles" + framework: FRAMEWORK_PYTORCH + number_of_stars: 140 + description: "Pitfalls of In-Domain Uncertainty Estimation and Ensembling in Deep Learning, ICLR 2020" + } + repositories: { + url: "https://github.com/artnitolog/diary" + framework: FRAMEWORK_OTHERS + description: "Accompanying repository for the 3rd year corsework. CMC MSU, MMF, 2020-2021." + } + repositories: { + url: "https://github.com/johntd54/stanford_car" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: "Classification model for fine-grained visual classification on the Stanford Car dataset." + } + repositories: { + is_official: true + url: "https://github.com/gpleiss/temperature_scaling" + framework: FRAMEWORK_PYTORCH + number_of_stars: 551 + description: "A simple way to calibrate your neural network." + } + repositories: { + url: "https://github.com/AnanyaKumar/verified_calibration" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 58 + description: "Calibration library and code for the paper: Verified Uncertainty Calibration. Ananya Kumar, Percy Liang, Tengyu Ma. NeurIPS 2019 (Spotlight)." + } + repositories: { + url: "https://github.com/Andreas12321/Est-Cert-Final" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/Jonathan-Pearce/calibration_library" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + description: "Pytorch library for model calibration metrics and visualizations as well as recalibration methods. In progress!" + } + repositories: { + url: "https://github.com/Eric-Wallace/deep-knn" + framework: FRAMEWORK_PYTORCH + number_of_stars: 32 + description: "Code for the 2018 EMNLP Interpretability Workshop Paper \"Interpreting Neural Networks with Nearest Neighbors\"" + } + repositories: { + url: "https://github.com/Jonathan-Pearce/cnn_calibration" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + description: "Pytorch library for model calibration metrics and visualizations as well as recalibration methods. In progress!" + } + } + video: { + video_id: "odNHEkfJAc4" + video_title: "PR-075: On Calibration of Modern Neural Networks (2017)" + number_of_likes: 27 + number_of_views: 2882 + published_date: { + seconds: 1521987100 + } + uploader: "Terry TaeWoong Um" + } + } +} +pr_id_to_video: { + key: 76 + value: { + pr_id: 76 + papers: { + paper_id: "distributed-representations-of-sentences-and" + title: "Distributed Representations of Sentences and Documents" + arxiv_id: "1405.4053" + abstract: "Many machine learning algorithms require the input to be represented as a\nfixed-length feature vector. When it comes to texts, one of the most common\nfixed-length features is bag-of-words. Despite their popularity, bag-of-words\nfeatures have two major weaknesses: they lose the ordering of the words and\nthey also ignore semantics of the words. For example, \"powerful,\" \"strong\" and\n\"Paris\" are equally distant. In this paper, we propose Paragraph Vector, an\nunsupervised algorithm that learns fixed-length feature representations from\nvariable-length pieces of texts, such as sentences, paragraphs, and documents.\nOur algorithm represents each document by a dense vector which is trained to\npredict words in the document. Its construction gives our algorithm the\npotential to overcome the weaknesses of bag-of-words models. Empirical results\nshow that Paragraph Vectors outperform bag-of-words models as well as other\ntechniques for text representations. Finally, we achieve new state-of-the-art\nresults on several text classification and sentiment analysis tasks." + pub_date: { + seconds: 1400198400 + } + authors: "Quoc V. Le" + authors: "Tomas Mikolov" + repositories: { + url: "https://github.com/Antonildo43/Classifica-o-de-textos-com-doc2Vec" + framework: FRAMEWORK_OTHERS + description: "Classificação de Documentos com doc2Vec" + } + repositories: { + url: "https://github.com/jimmy6727/Informd" + framework: FRAMEWORK_TENSORFLOW + description: "Project repo for Mozilla Spring Incubator Lab 2020 Project " + } + repositories: { + url: "https://github.com/wiflore/IBM_Articles_Recomender" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/hithisisdhara/doc2vec" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/kr900910/supreme_court_opinion" + framework: FRAMEWORK_TENSORFLOW + description: "Predicting party of author for different supreme court opinions based on natural language features" + } + repositories: { + url: "https://github.com/dhyeon/ingredient-vectors" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/kinimod23/NMT_Project" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/ibrahimsharaf/doc2vec" + framework: FRAMEWORK_OTHERS + number_of_stars: 93 + description: ":notebook: Long(er) text representation and classification using Doc2Vec embeddings" + } + repositories: { + url: "https://github.com/tsandefer/capstone_2" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5 + description: "Doc2Vec and Annotated Lyrics: Are they \"Genius\"? (DSI Capstone II Project)" + } + repositories: { + url: "https://github.com/bombdiggity/paper-bag" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + } + } + video: { + video_id: "NxKpgY6sWOQ" + video_title: "PR-076: Distributed Representations of Sentences and Documents" + number_of_likes: 18 + number_of_views: 1746 + published_date: { + seconds: 1522587607 + } + uploader: "keun bong Kwak" + } + } +} +pr_id_to_video: { + key: 77 + value: { + pr_id: 77 + papers: { + paper_id: "seqgan-sequence-generative-adversarial-nets" + title: "SeqGAN: Sequence Generative Adversarial Nets with Policy Gradient" + arxiv_id: "1609.05473" + abstract: "As a new way of training generative models, Generative Adversarial Nets (GAN)\nthat uses a discriminative model to guide the training of the generative model\nhas enjoyed considerable success in generating real-valued data. However, it\nhas limitations when the goal is for generating sequences of discrete tokens. A\nmajor reason lies in that the discrete outputs from the generative model make\nit difficult to pass the gradient update from the discriminative model to the\ngenerative model. Also, the discriminative model can only assess a complete\nsequence, while for a partially generated sequence, it is non-trivial to\nbalance its current score and the future one once the entire sequence has been\ngenerated. In this paper, we propose a sequence generation framework, called\nSeqGAN, to solve the problems. Modeling the data generator as a stochastic\npolicy in reinforcement learning (RL), SeqGAN bypasses the generator\ndifferentiation problem by directly performing gradient policy update. The RL\nreward signal comes from the GAN discriminator judged on a complete sequence,\nand is passed back to the intermediate state-action steps using Monte Carlo\nsearch. Extensive experiments on synthetic data and real-world tasks\ndemonstrate significant improvements over strong baselines." + pub_date: { + seconds: 1474156800 + } + authors: "Lantao Yu" + authors: "Weinan Zhang" + authors: "Jun Wang" + authors: "Yong Yu" + repositories: { + url: "https://github.com/lina2360/HiSeqGan" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/willspag/SeqGan" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Attempt at Tensorflow 2.3 version of Sequence Gan" + } + repositories: { + url: "https://github.com/desire2020/RankGAN" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 24 + description: "Implementation of Adversarial Ranking for Language Generation [ArxiV 1705.11001]" + } + repositories: { + url: "https://github.com/medtray/SeqGAN-vs-MLE-vs-PG-BLEU-vs-ScheduleSampling" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/chaneeh/SeqGAN_experiment" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/LiangqunLu/DLForChatbot" + framework: FRAMEWORK_OTHERS + description: "Deep learning for chatbot" + } + repositories: { + url: "https://github.com/yuanfeisiyuetian/seqgan-modbusTCP" + framework: FRAMEWORK_TENSORFLOW + description: "使用seqgan进行ModbusTCP协议的模糊测试" + } + repositories: { + is_official: true + url: "https://github.com/LantaoYu/SeqGAN" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1915 + description: "Implementation of Sequence Generative Adversarial Nets with Policy Gradient" + } + repositories: { + url: "https://github.com/suhoy901/SeqGAN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "pytorch SeqGAN" + } + repositories: { + url: "https://github.com/bgenchel/MusicalSeqGAN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Adapt and evaluate SeqGAN for music. Developed in PyTorch, using https://github.com/ZiJianZhao/SeqGAN-PyTorch as a base" + } + methods: { + name: "GAN" + full_name: "Generative Adversarial Network" + description: "A **GAN**, or **Generative Adversarial Network**, is a generative model that simultaneously trains\r\ntwo models: a generative model $G$ that captures the data distribution, and a discriminative model $D$ that estimates the\r\nprobability that a sample came from the training data rather than $G$.\r\n\r\nThe training procedure for $G$ is to maximize the probability of $D$ making\r\na mistake. This framework corresponds to a minimax two-player game. In the\r\nspace of arbitrary functions $G$ and $D$, a unique solution exists, with $G$\r\nrecovering the training data distribution and $D$ equal to $\\frac{1}{2}$\r\neverywhere. In the case where $G$ and $D$ are defined by multilayer perceptrons,\r\nthe entire system can be trained with backpropagation. \r\n\r\n(Image Source: [here](http://www.kdnuggets.com/2017/01/generative-adversarial-networks-hot-topic-machine-learning.html))" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + } + video: { + video_id: "BXODIP3QjJI" + video_title: "PR-077: SeqGAN: Sequence Generative Adversarial Nets with Policy Gradient" + number_of_likes: 15 + number_of_views: 2084 + published_date: { + seconds: 1523239176 + } + uploader: "Suh Kiho" + } + } +} +pr_id_to_video: { + key: 78 + value: { + pr_id: 78 + papers: { + paper_id: "net2net-accelerating-learning-via-knowledge" + title: "Net2Net: Accelerating Learning via Knowledge Transfer" + arxiv_id: "1511.05641" + abstract: "We introduce techniques for rapidly transferring the information stored in\none neural net into another neural net. The main purpose is to accelerate the\ntraining of a significantly larger neural net. During real-world workflows, one\noften trains very many different neural networks during the experimentation and\ndesign process. This is a wasteful process in which each new model is trained\nfrom scratch. Our Net2Net technique accelerates the experimentation process by\ninstantaneously transferring the knowledge from a previous network to each new\ndeeper or wider network. Our techniques are based on the concept of\nfunction-preserving transformations between neural network specifications. This\ndiffers from previous approaches to pre-training that altered the function\nrepresented by a neural net when adding layers to it. Using our knowledge\ntransfer mechanism to add depth to Inception modules, we demonstrate a new\nstate of the art accuracy rating on the ImageNet dataset." + pub_date: { + seconds: 1447804800 + } + authors: "Tianqi Chen" + authors: "Ian Goodfellow" + authors: "Jonathon Shlens" + repositories: { + url: "https://github.com/hxtruong/net2net" + framework: FRAMEWORK_TENSORFLOW + description: "Library to increasing size of model. Wider and Deeper any layer of model." + } + repositories: { + url: "https://github.com/agongt408/vbranch" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "vbranch is a deep-learning framework to improve the accuracy and efficiency of neural networks by sharing parameters between multiple branches." + } + repositories: { + url: "https://github.com/soumith/net2net.torch" + framework: FRAMEWORK_OTHERS + number_of_stars: 153 + description: "Implementation of http://arxiv.org/abs/1511.05641 that lets one build a larger net starting from a smaller one." + } + } + video: { + video_id: "btsZOMsyH_o" + video_title: "PR-078: Net2Net: Accelerating Learning via Knowledge Transfer" + number_of_likes: 14 + number_of_views: 1001 + published_date: { + seconds: 1523878774 + } + uploader: "차준범" + } + } +} +pr_id_to_video: { + key: 79 + value: { + pr_id: 79 + papers: { + paper_id: "adversarial-audio-synthesis" + title: "Adversarial Audio Synthesis" + arxiv_id: "1802.04208" + abstract: "Audio signals are sampled at high temporal resolutions, and learning to\nsynthesize audio requires capturing structure across a range of timescales.\nGenerative adversarial networks (GANs) have seen wide success at generating\nimages that are both locally and globally coherent, but they have seen little\napplication to audio generation. In this paper we introduce WaveGAN, a first\nattempt at applying GANs to unsupervised synthesis of raw-waveform audio.\nWaveGAN is capable of synthesizing one second slices of audio waveforms with\nglobal coherence, suitable for sound effect generation. Our experiments\ndemonstrate that, without labels, WaveGAN learns to produce intelligible words\nwhen trained on a small-vocabulary speech dataset, and can also synthesize\naudio from other domains such as drums, bird vocalizations, and piano. We\ncompare WaveGAN to a method which applies GANs designed for image generation on\nimage-like audio feature representations, finding both approaches to be\npromising." + pub_date: { + seconds: 1518393600 + } + authors: "Chris Donahue" + authors: "Julian McAuley" + authors: "Miller Puckette" + repositories: { + url: "https://github.com/zassou65535/WaveGAN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "WaveGANによる音声生成器" + } + repositories: { + url: "https://github.com/mahotani/ADVERSARIAL-AUDIO-SYNTHESIS" + framework: FRAMEWORK_OTHERS + description: "ICLR2019で採択されたADVERSARIAL AUDIO SYNTHESISを読んだメモ的なもの" + } + repositories: { + url: "https://github.com/MaxHolmberg96/WaveGAN" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Implementation of the paper https://arxiv.org/pdf/1802.04208.pdf" + } + repositories: { + url: "https://github.com/Yotsuyubi/wave-nr-gan" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/fromme0528/pytorch-WaveGAN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "A pytorch implementation of WaveGAN" + } + repositories: { + url: "https://github.com/MurreyCode/wavegan" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 3 + description: "WaveGAN: using GANs to synthesize raw audio" + } + repositories: { + url: "https://github.com/IBM/MAX-Audio-Sample-Generator" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 10 + description: "Generate short audio clips of speech commands and lo-fi instrumental samples" + } + repositories: { + url: "https://github.com/LEChaney/AudioStyleGAN" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/ShaunBarry/wavegan" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/mostafaelaraby/wavegan-pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 43 + description: "Pytorch Implementation of wavegan model to generate audio " + } + methods: { + name: "Leaky ReLU" + full_name: "Leaky ReLU" + description: "**Leaky Rectified Linear Unit**, or **Leaky ReLU**, is a type of activation function based on a [ReLU](https://paperswithcode.com/method/relu), but it has a small slope for negative values instead of a flat slope. The slope coefficient is determined before training, i.e. it is not learnt during training. This type of activation function is popular in tasks where we we may suffer from sparse gradients, for example training generative adversarial networks." + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "DCGAN" + full_name: "Deep Convolutional GAN" + description: "**DCGAN**, or **Deep Convolutional GAN**, is a generative adversarial network architecture. It uses a couple of guidelines, in particular:\r\n\r\n- Replacing any pooling layers with strided convolutions (discriminator) and fractional-strided convolutions (generator).\r\n- Using batchnorm in both the generator and the discriminator.\r\n- Removing fully connected hidden layers for deeper architectures.\r\n- Using ReLU activation in generator for all layers except for the output, which uses tanh.\r\n- Using LeakyReLU activation in the discriminator for all layer." + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "WGAN-GP Loss" + full_name: "WGAN-GP Loss" + description: "**Wasserstein Gradient Penalty Loss**, or **WGAN-GP Loss**, is a loss used for generative adversarial networks that augments the Wasserstein loss with a gradient norm penalty for random samples $\\mathbf{\\hat{x}} \\sim \\mathbb{P}\\_{\\hat{\\mathbf{x}}}$ to achieve Lipschitz continuity:\r\n\r\n$$ L = \\mathbb{E}\\_{\\mathbf{\\hat{x}} \\sim \\mathbb{P}\\_{g}}\\left[D\\left(\\tilde{\\mathbf{x}}\\right)\\right] - \\mathbb{E}\\_{\\mathbf{x} \\sim \\mathbb{P}\\_{r}}\\left[D\\left(\\mathbf{x}\\right)\\right] + \\lambda\\mathbb{E}\\_{\\mathbf{\\hat{x}} \\sim \\mathbb{P}\\_{\\hat{\\mathbf{x}}}}\\left[\\left(||\\nabla\\_{\\tilde{\\mathbf{x}}}D\\left(\\mathbf{\\tilde{x}}\\right)||\\_{2}-1\\right)^{2}\\right]$$\r\n\r\nIt was introduced as part of the [WGAN-GP](https://paperswithcode.com/method/wgan-gp) overall model." + } + methods: { + name: "Adam" + full_name: "Adam" + description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + methods: { + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + } + methods: { + name: "SpecGAN" + full_name: "SpecGAN" + description: "**SpecGAN** is a generative adversarial network method for spectrogram-based, frequency-domain audio generation. The problem is suited for GANs designed for image generation. The model can be approximately inverted. \r\n\r\nTo process audio into suitable spectrograms, the authors perform the short-time Fourier transform with 16 ms windows and 8ms stride, resulting in 128 frequency bins, linearly spaced from 0 to 8 kHz. They take the magnitude of the resultant spectra and scale amplitude values logarithmically to better-align with human perception. They then normalize each frequency bin to have zero mean and unit variance. They clip the spectra to $3$ standard deviations and rescale to $\\left[−1, 1\\right]$.\r\n\r\nThey then use the DCGAN approach on the result spectra." + } + methods: { + name: "Batch Normalization" + full_name: "Batch Normalization" + description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." + } + } + video: { + video_id: "UXVKSSXdwb8" + video_title: "PR-079: Synthesizing Audio with Generative Adversarial Networks" + number_of_likes: 20 + number_of_views: 1303 + published_date: { + seconds: 1523206394 + } + uploader: "Seungil Kim" + } + } +} +pr_id_to_video: { + key: 80 + value: { + pr_id: 80 + papers: { + paper_id: "practical-bayesian-optimization-of-machine" + title: "Practical Bayesian Optimization of Machine Learning Algorithms" + arxiv_id: "1206.2944" + abstract: "Machine learning algorithms frequently require careful tuning of model\nhyperparameters, regularization terms, and optimization parameters.\nUnfortunately, this tuning is often a \"black art\" that requires expert\nexperience, unwritten rules of thumb, or sometimes brute-force search. Much\nmore appealing is the idea of developing automatic approaches which can\noptimize the performance of a given learning algorithm to the task at hand. In\nthis work, we consider the automatic tuning problem within the framework of\nBayesian optimization, in which a learning algorithm's generalization\nperformance is modeled as a sample from a Gaussian process (GP). The tractable\nposterior distribution induced by the GP leads to efficient use of the\ninformation gathered by previous experiments, enabling optimal choices about\nwhat parameters to try next. Here we show how the effects of the Gaussian\nprocess prior and the associated inference procedure can have a large impact on\nthe success or failure of Bayesian optimization. We show that thoughtful\nchoices can lead to results that exceed expert-level performance in tuning\nmachine learning algorithms. We also describe new algorithms that take into\naccount the variable cost (duration) of learning experiments and that can\nleverage the presence of multiple cores for parallel experimentation. We show\nthat these proposed algorithms improve on previous automatic procedures and can\nreach or surpass human expert-level optimization on a diverse set of\ncontemporary algorithms including latent Dirichlet allocation, structured SVMs\nand convolutional neural networks." + pub_date: { + seconds: 1339545600 + } + authors: "Jasper Snoek" + authors: "Hugo Larochelle" + authors: "Ryan P. Adams" + repositories: { + url: "https://github.com/c-bata/goptuna" + framework: FRAMEWORK_OTHERS + number_of_stars: 180 + description: "A hyperparameter optimization framework, inspired by Optuna." + } + repositories: { + url: "https://github.com/JasperSnoek/spearmint" + framework: FRAMEWORK_OTHERS + number_of_stars: 1362 + description: "Spearmint is a package to perform Bayesian optimization according to the algorithms outlined in the paper: Practical Bayesian Optimization of Machine Learning Algorithms. Jasper Snoek, Hugo Larochelle and Ryan P. Adams. Advances in Neural Information Processing Systems, 2012 " + } + repositories: { + url: "https://github.com/HIPS/Spearmint" + framework: FRAMEWORK_OTHERS + number_of_stars: 1430 + description: "Spearmint Bayesian optimization codebase" + } + repositories: { + url: "https://github.com/Argaadya/intro-bayesian" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "This is an introduction for Bayesian optimization" + } + methods: { + name: "Gaussian Process" + full_name: "Gaussian Process" + description: "**Gaussian Processes** are non-parametric models for approximating functions. They rely upon a measure of similarity between points (the kernel function) to predict the value for an unseen point from training data. The models are fully probabilistic so uncertainty bounds are baked in with the model.\r\n\r\nImage Source: Gaussian Processes for Machine Learning, C. E. Rasmussen & C. K. I. Williams" + } + } + video: { + video_id: "MnHCe8tGjQ8" + video_title: "PR-080: Practical Bayesian Optimization of Machine Learning Algorithms" + number_of_likes: 26 + number_of_views: 2378 + published_date: { + seconds: 1523799259 + } + uploader: "Jiyang Kang" + } + } +} +pr_id_to_video: { + key: 81 + value: { + pr_id: 81 + papers: { + paper_id: "machine-theory-of-mind" + title: "Machine Theory of Mind" + arxiv_id: "1802.07740" + abstract: "Theory of mind (ToM; Premack & Woodruff, 1978) broadly refers to humans'\nability to represent the mental states of others, including their desires,\nbeliefs, and intentions. We propose to train a machine to build such models\ntoo. We design a Theory of Mind neural network -- a ToMnet -- which uses\nmeta-learning to build models of the agents it encounters, from observations of\ntheir behaviour alone. Through this process, it acquires a strong prior model\nfor agents' behaviour, as well as the ability to bootstrap to richer\npredictions about agents' characteristics and mental states using only a small\nnumber of behavioural observations. We apply the ToMnet to agents behaving in\nsimple gridworld environments, showing that it learns to model random,\nalgorithmic, and deep reinforcement learning agents from varied populations,\nand that it passes classic ToM tasks such as the \"Sally-Anne\" test (Wimmer &\nPerner, 1983; Baron-Cohen et al., 1985) of recognising that others can hold\nfalse beliefs about the world. We argue that this system -- which autonomously\nlearns how to model other agents in its world -- is an important step forward\nfor developing multi-agent AI systems, for building intermediating technology\nfor machine-human interaction, and for advancing the progress on interpretable\nAI." + pub_date: { + seconds: 1519171200 + } + authors: "Neil C. Rabinowitz" + authors: "Frank Perbet" + authors: "H. Francis Song" + authors: "Chiyuan Zhang" + authors: "S. M. Ali Eslami" + authors: "Matthew Botvinick" + } + video: {} + } +} +pr_id_to_video: { + key: 82 + value: { + pr_id: 82 + papers: { + paper_id: "supervised-speech-separation-based-on-deep" + title: "Supervised Speech Separation Based on Deep Learning: An Overview" + arxiv_id: "1708.07524" + abstract: "Speech separation is the task of separating target speech from background\ninterference. Traditionally, speech separation is studied as a signal\nprocessing problem. A more recent approach formulates speech separation as a\nsupervised learning problem, where the discriminative patterns of speech,\nspeakers, and background noise are learned from training data. Over the past\ndecade, many supervised separation algorithms have been put forward. In\nparticular, the recent introduction of deep learning to supervised speech\nseparation has dramatically accelerated progress and boosted separation\nperformance. This article provides a comprehensive overview of the research on\ndeep learning based supervised speech separation in the last several years. We\nfirst introduce the background of speech separation and the formulation of\nsupervised separation. Then we discuss three main components of supervised\nseparation: learning machines, training targets, and acoustic features. Much of\nthe overview is on separation algorithms where we review monaural methods,\nincluding speech enhancement (speech-nonspeech separation), speaker separation\n(multi-talker separation), and speech dereverberation, as well as\nmulti-microphone techniques. The important issue of generalization, unique to\nsupervised learning, is discussed. This overview provides a historical\nperspective on how advances are made. In addition, we discuss a number of\nconceptual issues, including what constitutes the target source." + pub_date: { + seconds: 1503532800 + } + authors: "DeLiang Wang" + authors: "Jitong Chen" + } + video: { + video_id: "OgNSFKeHy8k" + video_title: "PR-082: Introduction to Speech Separation" + number_of_likes: 14 + number_of_views: 1210 + published_date: { + seconds: 1524410583 + } + uploader: "taesu" + } + } +} +pr_id_to_video: { + key: 83 + value: { + pr_id: 83 + papers: { + paper_id: "non-local-neural-networks" + title: "Non-local Neural Networks" + arxiv_id: "1711.07971" + abstract: "Both convolutional and recurrent operations are building blocks that process\none local neighborhood at a time. In this paper, we present non-local\noperations as a generic family of building blocks for capturing long-range\ndependencies. Inspired by the classical non-local means method in computer\nvision, our non-local operation computes the response at a position as a\nweighted sum of the features at all positions. This building block can be\nplugged into many computer vision architectures. On the task of video\nclassification, even without any bells and whistles, our non-local models can\ncompete or outperform current competition winners on both Kinetics and Charades\ndatasets. In static image recognition, our non-local models improve object\ndetection/segmentation and pose estimation on the COCO suite of tasks. Code is\navailable at https://github.com/facebookresearch/video-nonlocal-net ." + pub_date: { + seconds: 1511222400 + } + authors: "Xiaolong Wang" + authors: "Ross Girshick" + authors: "Abhinav Gupta" + authors: "Kaiming He" + repositories: { + url: "https://github.com/open-mmlab/mmaction2" + framework: FRAMEWORK_PYTORCH + number_of_stars: 939 + description: "OpenMMLab's Next Generation Video Understanding Toolbox and Benchmark" + } + repositories: { + url: "https://github.com/jordiae/DeepLearning-MAI" + framework: FRAMEWORK_PYTORCH + description: "Code for the Deep Learning course (Master in Artificial Intelligence at UPC)" + } + repositories: { + url: "https://github.com/LRacoci/permutation-graphml" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/rijuldhir/TSM" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/JiaPeng1234/MRI-Segmentation-Transformer" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 7 + } + repositories: { + is_official: true + url: "https://github.com/facebookresearch/video-nonlocal-net" + framework: FRAMEWORK_OTHERS + number_of_stars: 1788 + description: "Non-local Neural Networks for Video Classification" + } + repositories: { + url: "https://github.com/jiajunhua/facebookresearch-Detectron" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/huyz1117/Non_Local_Net_TensorFlow" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 8 + description: "TensorFlow implementation of Non-local Neural Network " + } + repositories: { + url: "https://github.com/facebookresearch/detectron" + framework: FRAMEWORK_PYTORCH + number_of_stars: 24458 + description: "FAIR's research platform for object detection research, implementing popular algorithms like Mask R-CNN and RetinaNet." + } + repositories: { + url: "https://github.com/seominseok0429/inception-I3D-NON-LOCAL" + framework: FRAMEWORK_PYTORCH + number_of_stars: 13 + description: "Inception-I3D, Non Local finetune, hmdb51_flow" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Residual Block" + full_name: "Residual Block" + description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." + } + methods: { + name: "Max Pooling" + full_name: "Max Pooling" + description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" + } + methods: { + name: "Non-Local Operation" + full_name: "Non-Local Operation" + description: "A **Non-Local Operation** is a component for capturing long-range dependencies with deep neural networks. It is a generalization of the classical non-local mean operation in computer vision. Intuitively a non-local operation computes the response at a position as a weighted sum of the features at all positions in the input feature maps. The set of positions can be in space, time, or spacetime, implying that these operations are applicable for image, sequence, and video problems.\r\n\r\nFollowing the non-local mean operation, a generic non-local operation for deep neural networks is defined as:\r\n\r\n$$ \\mathbb{y}\\_{i} = \\frac{1}{\\mathcal{C}\\left(\\mathbb{x}\\right)}\\sum\\_{\\forall{j}}f\\left(\\mathbb{x}\\_{i}, \\mathbb{x}\\_{j}\\right)g\\left(\\mathbb{x}\\_{j}\\right) $$\r\n\r\nHere $i$ is the index of an output position (in space, time, or spacetime) whose response is to be computed and $j$ is the index that enumerates all possible positions. x is the input signal (image, sequence, video; often their features) and $y$ is the output signal of the same size as $x$. A pairwise function $f$ computes a scalar (representing relationship such as affinity) between $i$ and all $j$. The unary function $g$ computes a representation of the input signal at the position $j$. The\r\nresponse is normalized by a factor $C\\left(x\\right)$.\r\n\r\nThe non-local behavior is due to the fact that all positions ($\\forall{j}$) are considered in the operation. As a comparison, a convolutional operation sums up the weighted input in a local neighborhood (e.g., $i − 1 \\leq j \\leq i + 1$ in a 1D case with kernel size 3), and a recurrent operation at time $i$ is often based only on the current and the latest time steps (e.g., $j = i$ or $i − 1$).\r\n\r\nThe non-local operation is also different from a fully-connected (fc) layer. The equation above computes responses based on relationships between different locations, whereas fc uses learned weights. In other words, the relationship between $x\\_{j}$ and $x\\_{i}$ is not a function of the input data in fc, unlike in nonlocal layers. Furthermore, the formulation in the equation above supports inputs of variable sizes, and maintains the corresponding size in the output. On the contrary, an fc layer requires a fixed-size input/output and loses positional correspondence (e.g., that from $x\\_{i}$ to $y\\_{i}$ at the position $i$).\r\n\r\nA non-local operation is a flexible building block and can be easily used together with convolutional/recurrent layers. It can be added into the earlier part of deep neural networks, unlike fc layers that are often used in the end. This allows us to build a richer hierarchy that combines both non-local and local information.\r\n\r\nIn terms of parameterisation, we usually parameterise $g$ as a linear embedding of the form $g\\left(x\\_{j}\\right) = W\\_{g}\\mathbb{x}\\_{j}$ , where $W\\_{g}$ is a weight matrix to be learned. This is implemented as, e.g., 1×1 convolution in space or 1×1×1 convolution in spacetime. For $f$ we use an affinity function, a list of which can be found [here](https://paperswithcode.com/methods/category/affinity-functions)." + } + methods: { + name: "Embedded Dot Product Affinity" + full_name: "Embedded Dot Product Affinity" + description: "**Embedded Dot Product Affinity** is a type of affinity or self-similarity function between two points $\\mathbb{x\\_{i}}$ and $\\mathbb{x\\_{j}}$ that uses a dot product function in an embedding space:\r\n\r\n$$ f\\left(\\mathbb{x\\_{i}}, \\mathbb{x\\_{j}}\\right) = \\theta\\left(\\mathbb{x\\_{i}}\\right)^{T}\\phi\\left(\\mathbb{x\\_{j}}\\right) $$\r\n\r\nHere $\\theta\\left(x\\_{i}\\right) = W\\_{θ}x\\_{i}$ and $\\phi\\left(x\\_{j}\\right) = W\\_{φ}x\\_{j}$ are two embeddings.\r\n\r\nThe main difference between the dot product and embedded Gaussian affinity functions is the presence of softmax, which plays the role of an activation function." + } + methods: { + name: "Weight Decay" + full_name: "Weight Decay" + description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + } + methods: { + name: "ResNeXt" + full_name: "ResNeXt" + description: "A **ResNeXt** repeats a building block that aggregates a set of transformations with the same topology. Compared to a [ResNet](https://paperswithcode.com/method/resnet), it exposes a new dimension, *cardinality* (the size of the set of transformations) $C$, as an essential factor in addition to the dimensions of depth and width. \r\n\r\nFormally, a set of aggregated transformations can be represented as: $\\mathcal{F}(x)=\\sum_{i=1}^{C}\\mathcal{T}_i(x)$, where $\\mathcal{T}_i(x)$ can be an arbitrary function. Analogous to a simple neuron, $\\mathcal{T}_i$ should project $x$ into an (optionally low-dimensional) embedding and then transform it." + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + methods: { + name: "Kaiming Initialization" + full_name: "Kaiming Initialization" + description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." + } + } + video: { + video_id: "ZM153wo3baA" + video_title: "PR-083: Non-local Neural Networks" + number_of_likes: 45 + number_of_views: 4909 + published_date: { + seconds: 1525008094 + } + uploader: "Taeoh Kim" + } + } +} +pr_id_to_video: { + key: 84 + value: { + pr_id: 84 + papers: { + paper_id: "megdet-a-large-mini-batch-object-detector" + title: "MegDet: A Large Mini-Batch Object Detector" + arxiv_id: "1711.07240" + abstract: "The improvements in recent CNN-based object detection works, from R-CNN [11],\nFast/Faster R-CNN [10, 31] to recent Mask R-CNN [14] and RetinaNet [24], mainly\ncome from new network, new framework, or novel loss design. But mini-batch\nsize, a key factor in the training, has not been well studied. In this paper,\nwe propose a Large MiniBatch Object Detector (MegDet) to enable the training\nwith much larger mini-batch size than before (e.g. from 16 to 256), so that we\ncan effectively utilize multiple GPUs (up to 128 in our experiments) to\nsignificantly shorten the training time. Technically, we suggest a learning\nrate policy and Cross-GPU Batch Normalization, which together allow us to\nsuccessfully train a large mini-batch detector in much less time (e.g., from 33\nhours to 4 hours), and achieve even better accuracy. The MegDet is the backbone\nof our submission (mmAP 52.5%) to COCO 2017 Challenge, where we won the 1st\nplace of Detection task." + pub_date: { + seconds: 1511136000 + } + authors: "Chao Peng" + authors: "Tete Xiao" + authors: "Zeming Li" + authors: "Yuning Jiang" + authors: "Xiangyu Zhang" + authors: "Kai Jia" + authors: "Gang Yu" + authors: "Jian Sun" + repositories: { + url: "https://github.com/CSAILVision/semantic-segmentation-pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3934 + description: "Pytorch implementation for Semantic Segmentation/Scene Parsing on MIT ADE20K dataset" + } + repositories: { + url: "https://github.com/keyEpoch/semen_seg-kd" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/Louis24/Segmentation" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/chenyilun95/tf-cpn" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 760 + description: "Cascaded Pyramid Network for Multi-Person Pose Estimation (CVPR 2018)" + } + repositories: { + url: "https://github.com/vacancy/Synchronized-BatchNorm-PyTorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1301 + description: "Synchronized Batch Normalization implementation in PyTorch." + } + repositories: { + url: "https://github.com/chrisway613/Synchronized-BatchNormalization" + framework: FRAMEWORK_PYTORCH + number_of_stars: 10 + description: "Multi-Gpus Synchronized Batch Normalization implementation in PyTorch" + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + } + methods: { + name: "RetinaNet" + full_name: "RetinaNet" + description: "**RetinaNet** is a one-stage object detection model that utilizes a focal loss function to address class imbalance during training. Focal loss applies a modulating term to the cross entropy loss in order to focus learning on hard negative examples. RetinaNet is a single, unified network composed of a *backbone* network and two task-specific *subnetworks*. The backbone is responsible for computing a convolutional feature map over an entire input image and is an off-the-self convolutional network. The first subnet performs convolutional object classification on the backbone's output; the second subnet performs convolutional bounding box regression. The two subnetworks feature a simple design that the authors propose specifically for one-stage, dense detection. \r\n\r\nWe can see the motivation for focal loss by comparing with two-stage object detectors. Here class imbalance is addressed by a two-stage cascade and sampling heuristics. The proposal stage (e.g., [Selective Search](https://paperswithcode.com/method/selective-search), [EdgeBoxes](https://paperswithcode.com/method/edgeboxes), [DeepMask](https://paperswithcode.com/method/deepmask), [RPN](https://paperswithcode.com/method/rpn)) rapidly narrows down the number of candidate object locations to a small number (e.g., 1-2k), filtering out most background samples. In the second classification stage, sampling heuristics, such as a fixed foreground-to-background ratio, or online hard example mining ([OHEM](https://paperswithcode.com/method/ohem)), are performed to maintain a\r\nmanageable balance between foreground and background.\r\n\r\nIn contrast, a one-stage detector must process a much larger set of candidate object locations regularly sampled across an image. To tackle this, RetinaNet uses a focal loss function, a dynamically scaled cross entropy loss, where the scaling factor decays to zero as confidence in the correct class increases. Intuitively, this scaling factor can automatically down-weight the contribution of easy examples during training and rapidly focus the model on hard examples. \r\n\r\nFormally, the Focal Loss adds a factor $(1 - p\\_{t})^\\gamma$ to the standard cross entropy criterion. Setting $\\gamma>0$ reduces the relative loss for well-classified examples ($p\\_{t}>.5$), putting more focus on hard, misclassified examples. Here there is tunable *focusing* parameter $\\gamma \\ge 0$. \r\n\r\n$$ {\\text{FL}(p\\_{t}) = - (1 - p\\_{t})^\\gamma \\log\\left(p\\_{t}\\right)} $$" + } + methods: { + name: "Batch Normalization" + full_name: "Batch Normalization" + description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." + } + methods: { + name: "Mask R-CNN" + full_name: "Mask R-CNN" + description: "**Mask R-CNN** extends [Faster R-CNN](http://paperswithcode.com/method/faster-r-cnn) to solve instance segmentation tasks. It achieves this by adding a branch for predicting an object mask in parallel with the existing branch for bounding box recognition. In principle, Mask R-CNN is an intuitive extension of Faster R-CNN, but constructing the mask branch properly is critical for good results. \r\n\r\nMost importantly, Faster R-CNN was not designed for pixel-to-pixel alignment between network inputs and outputs. This is evident in how [RoIPool](http://paperswithcode.com/method/roi-pooling), the *de facto* core operation for attending to instances, performs coarse spatial quantization for feature extraction. To fix the misalignment, Mask R-CNN utilises a simple, quantization-free layer, called [RoIAlign](http://paperswithcode.com/method/roi-align), that faithfully preserves exact spatial locations. \r\n\r\nSecondly, Mask R-CNN *decouples* mask and class prediction: it predicts a binary mask for each class independently, without competition among classes, and relies on the network's RoI classification branch to predict the category. In contrast, an [FCN](http://paperswithcode.com/method/fcn) usually perform per-pixel multi-class categorization, which couples segmentation and classification." + } + methods: { + name: "FPN" + full_name: "Feature Pyramid Network" + description: "A **Feature Pyramid Network**, or **FPN**, is a feature extractor that takes a single-scale image of an arbitrary size as input, and outputs proportionally sized feature maps at multiple levels, in a fully convolutional fashion. This process is independent of the backbone convolutional architectures. It therefore acts as a generic solution for building feature pyramids inside deep convolutional networks to be used in tasks like object detection.\r\n\r\nThe construction of the pyramid involves a bottom-up pathway and a top-down pathway.\r\n\r\nThe bottom-up pathway is the feedforward computation of the backbone ConvNet, which computes a feature hierarchy consisting of feature maps at several scales with a scaling step of 2. For the feature\r\npyramid, one pyramid level is defined for each stage. The output of the last layer of each stage is used as a reference set of feature maps. For [ResNets](https://paperswithcode.com/method/resnet) we use the feature activations output by each stage’s last residual block. \r\n\r\nThe top-down pathway hallucinates higher resolution features by upsampling spatially coarser, but semantically stronger, feature maps from higher pyramid levels. These features are then enhanced with features from the bottom-up pathway via lateral connections. Each lateral connection merges feature maps of the same spatial size from the bottom-up pathway and the top-down pathway. The bottom-up feature map is of lower-level semantics, but its activations are more accurately localized as it was subsampled fewer times." + } + methods: { + name: "RoIAlign" + full_name: "RoIAlign" + description: "**Region of Interest Align**, or **RoIAlign**, is an operation for extracting a small feature map from each RoI in detection and segmentation based tasks. It removes the harsh quantization of [RoI Pool](https://paperswithcode.com/method/roi-pooling), properly *aligning* the extracted features with the input. To avoid any quantization of the RoI boundaries or bins (using $x/16$ instead of $[x/16]$), RoIAlign uses bilinear interpolation to compute the exact values of the input features at four regularly sampled locations in each RoI bin, and the result is then aggregated (using max or average)." + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "Focal Loss" + full_name: "Focal Loss" + description: "A **Focal Loss** function addresses class imbalance during training in tasks like object detection. Focal loss applies a modulating term to the cross entropy loss in order to focus learning on hard negative examples. It is a dynamically scaled cross entropy loss, where the scaling factor decays to zero as confidence in the correct class increases. Intuitively, this scaling factor can automatically down-weight the contribution of easy examples during training and rapidly focus the model on hard examples. \r\n\r\nFormally, the Focal Loss adds a factor $(1 - p\\_{t})^\\gamma$ to the standard cross entropy criterion. Setting $\\gamma>0$ reduces the relative loss for well-classified examples ($p\\_{t}>.5$), putting more focus on hard, misclassified examples. Here there is tunable *focusing* parameter $\\gamma \\ge 0$. \r\n\r\n$$ {\\text{FL}(p\\_{t}) = - (1 - p\\_{t})^\\gamma \\log\\left(p\\_{t}\\right)} $$" + } + } + video: { + video_id: "nkYFEoKQBH0" + video_title: "PR-084 MegDet: A Large Mini-Batch Object Detector (CVPR2018)" + number_of_likes: 3 + number_of_views: 1222 + published_date: { + seconds: 1525663256 + } + uploader: "Taegyun Jeon" + } + } +} +pr_id_to_video: { + key: 85 + value: { + pr_id: 85 + papers: { + paper_id: "in-datacenter-performance-analysis-of-a" + title: "In-Datacenter Performance Analysis of a Tensor Processing Unit" + arxiv_id: "1704.04760" + abstract: "Many architects believe that major improvements in cost-energy-performance\nmust now come from domain-specific hardware. This paper evaluates a custom\nASIC---called a Tensor Processing Unit (TPU)---deployed in datacenters since\n2015 that accelerates the inference phase of neural networks (NN). The heart of\nthe TPU is a 65,536 8-bit MAC matrix multiply unit that offers a peak\nthroughput of 92 TeraOps/second (TOPS) and a large (28 MiB) software-managed\non-chip memory. The TPU's deterministic execution model is a better match to\nthe 99th-percentile response-time requirement of our NN applications than are\nthe time-varying optimizations of CPUs and GPUs (caches, out-of-order\nexecution, multithreading, multiprocessing, prefetching, ...) that help average\nthroughput more than guaranteed latency. The lack of such features helps\nexplain why, despite having myriad MACs and a big memory, the TPU is relatively\nsmall and low power. We compare the TPU to a server-class Intel Haswell CPU and\nan Nvidia K80 GPU, which are contemporaries deployed in the same datacenters.\nOur workload, written in the high-level TensorFlow framework, uses production\nNN applications (MLPs, CNNs, and LSTMs) that represent 95% of our datacenters'\nNN inference demand. Despite low utilization for some applications, the TPU is\non average about 15X - 30X faster than its contemporary GPU or CPU, with\nTOPS/Watt about 30X - 80X higher. Moreover, using the GPU's GDDR5 memory in the\nTPU would triple achieved TOPS and raise TOPS/Watt to nearly 70X the GPU and\n200X the CPU." + pub_date: { + seconds: 1492300800 + } + authors: "Norman P. Jouppi" + authors: "Cliff Young" + authors: "Nishant Patil" + authors: "David Patterson" + authors: "Gaurav Agrawal" + authors: "Raminder Bajwa" + authors: "Sarah Bates" + authors: "Suresh Bhatia" + authors: "Nan Boden" + authors: "Al Borchers" + authors: "Rick Boyle" + authors: "Pierre-luc Cantin" + authors: "Clifford Chao" + authors: "Chris Clark" + authors: "Jeremy Coriell" + authors: "Mike Daley" + authors: "Matt Dau" + authors: "Jeffrey Dean" + authors: "Ben Gelb" + authors: "Tara Vazir Ghaemmaghami" + authors: "Rajendra Gottipati" + authors: "William Gulland" + authors: "Robert Hagmann" + authors: "C. Richard Ho" + authors: "Doug Hogberg" + authors: "John Hu" + authors: "Robert Hundt" + authors: "Dan Hurt" + authors: "Julian Ibarz" + authors: "Aaron Jaffey" + authors: "Alek Jaworski" + authors: "Alexander Kaplan" + authors: "Harshit Khaitan" + authors: "Andy Koch" + authors: "Naveen Kumar" + authors: "Steve Lacy" + authors: "James Laudon" + authors: "James Law" + authors: "Diemthu Le" + authors: "Chris Leary" + authors: "Zhuyuan Liu" + authors: "Kyle Lucke" + authors: "Alan Lundin" + authors: "Gordon MacKean" + authors: "Adriana Maggiore" + authors: "Maire Mahony" + authors: "Kieran Miller" + authors: "Rahul Nagarajan" + authors: "Ravi Narayanaswami" + authors: "Ray Ni" + authors: "Kathy Nix" + authors: "Thomas Norrie" + authors: "Mark Omernick" + authors: "Narayana Penukonda" + authors: "Andy Phelps" + authors: "Jonathan Ross" + authors: "Matt Ross" + authors: "Amir Salek" + authors: "Emad Samadiani" + authors: "Chris Severn" + authors: "Gregory Sizikov" + authors: "Matthew Snelham" + authors: "Jed Souter" + authors: "Dan Steinberg" + authors: "Andy Swing" + authors: "Mercedes Tan" + authors: "Gregory Thorson" + authors: "Bo Tian" + authors: "Horia Toma" + authors: "Erick Tuttle" + authors: "Vijay Vasudevan" + authors: "Richard Walter" + authors: "Walter Wang" + authors: "Eric Wilcox" + authors: "Doe Hyun Yoon" + } + video: { + video_id: "7WhWkhFAIO4" + video_title: "PR-085: In-Datacenter Performance Analysis of a Tensor Processing Unit" + number_of_likes: 21 + number_of_views: 1585 + published_date: { + seconds: 1526140508 + } + uploader: "JinWon Lee" + } + } +} +pr_id_to_video: { + key: 86 + value: { + pr_id: 86 + papers: { + paper_id: "on-the-power-of-curriculum-learning-in" + title: "On The Power of Curriculum Learning in Training Deep Networks" + arxiv_id: "1904.03626" + abstract: "Training neural networks is traditionally done by providing a sequence of random mini-batches sampled uniformly from the entire training data. In this work, we analyze the effect of curriculum learning, which involves the non-uniform sampling of mini-batches, on the training of deep networks, and specifically CNNs trained for image recognition. To employ curriculum learning, the training algorithm must resolve 2 problems: (i) sort the training examples by difficulty; (ii) compute a series of mini-batches that exhibit an increasing level of difficulty. We address challenge (i) using two methods: transfer learning from some competitive ``teacher\" network, and bootstrapping. In our empirical evaluation, both methods show similar benefits in terms of increased learning speed and improved final performance on test data. We address challenge (ii) by investigating different pacing functions to guide the sampling. The empirical investigation includes a variety of network architectures, using images from CIFAR-10, CIFAR-100 and subsets of ImageNet. We conclude with a novel theoretical analysis of curriculum learning, where we show how it effectively modifies the optimization landscape. We then define the concept of an ideal curriculum, and show that under mild conditions it does not change the corresponding global minimum of the optimization function." + pub_date: { + seconds: 1554595200 + } + authors: "Guy Hacohen" + authors: "Daphna Weinshall" + repositories: { + url: "https://github.com/josephch405/curriculum-nmt" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + } + repositories: { + is_official: true + url: "https://github.com/GuyHacohen/curriculum_learning" + framework: FRAMEWORK_OTHERS + number_of_stars: 46 + description: "Code implementing the experiments described in the paper \"On The Power of Curriculum Learning in Training Deep Networks\" by Hacohen & Weinshall (ICML 2019)" + } + } + video: { + video_id: "fQtuWEuwXrA" + video_title: "PR-086: Curriculum Learning" + number_of_likes: 5 + number_of_views: 1367 + published_date: { + seconds: 1526221428 + } + uploader: "차준범" + } + } +} +pr_id_to_video: { + key: 87 + value: { + pr_id: 87 + papers: { + paper_id: "spectral-normalization-for-generative" + title: "Spectral Normalization for Generative Adversarial Networks" + arxiv_id: "1802.05957" + abstract: "One of the challenges in the study of generative adversarial networks is the\ninstability of its training. In this paper, we propose a novel weight\nnormalization technique called spectral normalization to stabilize the training\nof the discriminator. Our new normalization technique is computationally light\nand easy to incorporate into existing implementations. We tested the efficacy\nof spectral normalization on CIFAR10, STL-10, and ILSVRC2012 dataset, and we\nexperimentally confirmed that spectrally normalized GANs (SN-GANs) is capable\nof generating images of better or equal quality relative to the previous\ntraining stabilization techniques." + pub_date: { + seconds: 1518739200 + } + authors: "Takeru Miyato" + authors: "Toshiki Kataoka" + authors: "Masanori Koyama" + authors: "Yuichi Yoshida" + repositories: { + url: "https://github.com/karoly-hars/GAN_image_colorizing" + framework: FRAMEWORK_PYTORCH + number_of_stars: 10 + description: "Image colorization with generative adversarial networks on the CIFAR10 dataset." + } + repositories: { + url: "https://github.com/ncuzzy/mygan" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/IShengFang/SpectralNormalizationKeras" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 105 + description: "Spectral Normalization for Keras Dense and Convolution Layers" + } + repositories: { + url: "https://github.com/zhusiling/SAGAN" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/qiaolinhan/ws-preprocess" + framework: FRAMEWORK_OTHERS + description: "This is image restoration for UAV based wildfire segmentation because it will always meet some disturbance, noise or other serious situation " + } + repositories: { + is_official: true + url: "https://github.com/pfnet-research/sngan_projection" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 950 + description: "GANs with spectral normalization and projection discriminator" + } + repositories: { + url: "https://github.com/kklemon/bgan-pytorch" + framework: FRAMEWORK_PYTORCH + description: "PyTorch implementation of Boundary Seeking GAN for discrete data" + } + repositories: { + url: "https://github.com/guy-oren/DIRT-OST" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/Bingwen-Hu/DRIT" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/hinofafa/Self-Attention-HearthStone-GAN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + description: "This repository provides a PyTorch implementation of SAGAN cited by heykeetae/Self-Attention-GAN. This repository provide an efficient method to generate large resolution images and attention weights visualisation using tensorboard platform. Tensorboard is a robust platform to monitor generated images and learning weights in computer vision learning experiment." + } + methods: { + name: "GAN Hinge Loss" + full_name: "GAN Hinge Loss" + description: "The **GAN Hinge Loss** is a hinge loss based loss function for [generative adversarial networks](https://paperswithcode.com/methods/category/generative-adversarial-networks):\r\n\r\n$$ L\\_{D} = -\\mathbb{E}\\_{\\left(x, y\\right)\\sim{p}\\_{data}}\\left[\\min\\left(0, -1 + D\\left(x, y\\right)\\right)\\right] -\\mathbb{E}\\_{z\\sim{p\\_{z}}, y\\sim{p\\_{data}}}\\left[\\min\\left(0, -1 - D\\left(G\\left(z\\right), y\\right)\\right)\\right] $$\r\n\r\n$$ L\\_{G} = -\\mathbb{E}\\_{z\\sim{p\\_{z}}, y\\sim{p\\_{data}}}D\\left(G\\left(z\\right), y\\right) $$" + } + methods: { + name: "Spectral Normalization" + full_name: "Spectral Normalization" + description: "**Spectral Normalization** is a normalization technique used for generative adversarial networks, used to stabilize training of the discriminator. Spectral normalization has the convenient property that the Lipschitz constant is the only hyper-parameter to be tuned.\r\n\r\nIt controls the Lipschitz constant of the discriminator $f$ by constraining the spectral norm of each layer $g : \\textbf{h}\\_{in} \\rightarrow \\textbf{h}_{out}$. The Lipschitz norm $\\Vert{g}\\Vert\\_{\\text{Lip}}$ is equal to $\\sup\\_{\\textbf{h}}\\sigma\\left(\\nabla{g}\\left(\\textbf{h}\\right)\\right)$, where $\\sigma\\left(a\\right)$ is the spectral norm of the matrix $A$ ($L\\_{2}$ matrix norm of $A$):\r\n\r\n$$ \\sigma\\left(a\\right) = \\max\\_{\\textbf{h}:\\textbf{h}\\neq{0}}\\frac{\\Vert{A\\textbf{h}}\\Vert\\_{2}}{\\Vert\\textbf{h}\\Vert\\_{2}} = \\max\\_{\\Vert\\textbf{h}\\Vert\\_{2}\\leq{1}}{\\Vert{A\\textbf{h}}\\Vert\\_{2}} $$\r\n\r\nwhich is equivalent to the largest singular value of $A$. Therefore for a linear layer $g\\left(\\textbf{h}\\right) = W\\textbf{h}$ the norm is given by $\\Vert{g}\\Vert\\_{\\text{Lip}} = \\sup\\_{\\textbf{h}}\\sigma\\left(\\nabla{g}\\left(\\textbf{h}\\right)\\right) = \\sup\\_{\\textbf{h}}\\sigma\\left(W\\right) = \\sigma\\left(W\\right) $. Spectral normalization normalizes the spectral norm of the weight matrix $W$ so it satisfies the Lipschitz constraint $\\sigma\\left(W\\right) = 1$:\r\n\r\n$$ \\bar{W}\\_{\\text{SN}}\\left(W\\right) = W / \\sigma\\left(W\\right) $$" + } + methods: { + name: "SNGAN" + full_name: "Spectrally Normalised GAN" + description: "**SNGAN**, or **Spectrally Normalised GAN**, is a type of generative adversarial network that uses spectral normalization, a type of weight normalization, to stabilise the training of the discriminator." + } + methods: { + name: "Residual Block" + full_name: "Residual Block" + description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." + } + methods: { + name: "Kaiming Initialization" + full_name: "Kaiming Initialization" + description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." + } + methods: { + name: "Bottleneck Residual Block" + full_name: "Bottleneck Residual Block" + description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." + } + methods: { + name: "Leaky ReLU" + full_name: "Leaky ReLU" + description: "**Leaky Rectified Linear Unit**, or **Leaky ReLU**, is a type of activation function based on a [ReLU](https://paperswithcode.com/method/relu), but it has a small slope for negative values instead of a flat slope. The slope coefficient is determined before training, i.e. it is not learnt during training. This type of activation function is popular in tasks where we we may suffer from sparse gradients, for example training generative adversarial networks." + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + } + video: { + video_id: "iXSYqohGQhM" + video_title: "PR-087: Spectral Normalization for Generative Adversarial Networks" + number_of_likes: 44 + number_of_views: 4666 + published_date: { + seconds: 1526221916 + } + uploader: "Jaejun Yoo" + } + } +} +pr_id_to_video: { + key: 88 + value: { + pr_id: 88 + papers: { + paper_id: "deep-variational-bayes-filters-unsupervised" + title: "Deep Variational Bayes Filters: Unsupervised Learning of State Space Models from Raw Data" + arxiv_id: "1605.06432" + abstract: "We introduce Deep Variational Bayes Filters (DVBF), a new method for\nunsupervised learning and identification of latent Markovian state space\nmodels. Leveraging recent advances in Stochastic Gradient Variational Bayes,\nDVBF can overcome intractable inference distributions via variational\ninference. Thus, it can handle highly nonlinear input data with temporal and\nspatial dependencies such as image sequences without domain knowledge. Our\nexperiments show that enabling backpropagation through transitions enforces\nstate space assumptions and significantly improves information content of the\nlatent embedding. This also enables realistic long-term prediction." + pub_date: { + seconds: 1463702400 + } + authors: "Maximilian Karl" + authors: "Maximilian Soelch" + authors: "Justin Bayer" + authors: "Patrick van der Smagt" + repositories: { + url: "https://github.com/baggepinnen/DeepFilters.jl" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "Fiiiiiiiiiiiiiilters" + } + repositories: { + is_official: true + url: "https://github.com/baggepinnen/DVBF.jl" + framework: FRAMEWORK_OTHERS + number_of_stars: 8 + description: "Deep variational Bayes filter in julia using Flux" + } + methods: { + name: "Stochastic Gradient Variational Bayes" + full_name: "Stochastic Gradient Variational Bayes" + } + } + video: { + video_id: "uM0rQtL6_AA" + video_title: "PR-088: Deep Variational Bayes Filters (2017)" + number_of_likes: 37 + number_of_views: 2792 + published_date: { + seconds: 1526901682 + } + uploader: "Terry TaeWoong Um" + } + } +} +pr_id_to_video: { + key: 89 + value: { + pr_id: 89 + papers: { + paper_id: "beyond-word-importance-contextual" + title: "Beyond Word Importance: Contextual Decomposition to Extract Interactions from LSTMs" + arxiv_id: "1801.05453" + abstract: "The driving force behind the recent success of LSTMs has been their ability\nto learn complex and non-linear relationships. Consequently, our inability to\ndescribe these relationships has led to LSTMs being characterized as black\nboxes. To this end, we introduce contextual decomposition (CD), an\ninterpretation algorithm for analysing individual predictions made by standard\nLSTMs, without any changes to the underlying model. By decomposing the output\nof a LSTM, CD captures the contributions of combinations of words or variables\nto the final prediction of an LSTM. On the task of sentiment analysis with the\nYelp and SST data sets, we show that CD is able to reliably identify words and\nphrases of contrasting sentiment, and how they are combined to yield the LSTM's\nfinal prediction. Using the phrase-level labels in SST, we also demonstrate\nthat CD is able to successfully extract positive and negative negations from an\nLSTM, something which has not previously been done." + pub_date: { + seconds: 1516060800 + } + authors: "W. James Murdoch" + authors: "Peter J. Liu" + authors: "Bin Yu" + repositories: { + is_official: true + url: "https://github.com/jamie-murdoch/ContextualDecomposition" + framework: FRAMEWORK_PYTORCH + number_of_stars: 53 + description: "Demo for method introduced in \"Beyond Word Importance: Contextual Decomposition to Extract Interactions from LSTMs\"" + } + repositories: { + url: "https://github.com/csinva/hierarchical-dnn-interpretations" + framework: FRAMEWORK_PYTORCH + number_of_stars: 92 + description: "Using / reproducing ACD from the paper \"Hierarchical interpretations for neural network predictions\" 🧠 (ICLR 2019)" + } + repositories: { + url: "https://github.com/suyash/ContextualDecomposition" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Contextual Decomposition Experiments" + } + methods: { + name: "Tanh Activation" + full_name: "Tanh Activation" + description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" + } + methods: { + name: "LSTM" + full_name: "Long Short-Term Memory" + description: "An **LSTM** is a type of [recurrent neural network](https://paperswithcode.com/methods/category/recurrent-neural-networks) that addresses the vanishing gradient problem in vanilla RNNs through additional cells, input and output gates. Intuitively, vanishing gradients are solved through additional *additive* components, and forget gate activations, that allow the gradients to flow through the network without vanishing as quickly.\r\n\r\n(Image Source [here](https://medium.com/datadriveninvestor/how-do-lstm-networks-solve-the-problem-of-vanishing-gradients-a6784971a577))\r\n\r\n(Introduced by Hochreiter and Schmidhuber)" + } + methods: { + name: "Sigmoid Activation" + full_name: "Sigmoid Activation" + description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." + } + } + video: { + video_id: "5whGIpoLoq4" + video_title: "PR-089: Beyond Word Importance: Contextual Decomposition to Extract Interactions from LSTMs" + number_of_views: 440 + published_date: { + seconds: 1528641922 + } + uploader: "Byung-Hak Kim" + } + } +} +pr_id_to_video: { + key: 90 + value: { + pr_id: 90 + papers: { + paper_id: "representation-learning-by-learning-to-count" + title: "Representation Learning by Learning to Count" + arxiv_id: "1708.06734" + abstract: "We introduce a novel method for representation learning that uses an\nartificial supervision signal based on counting visual primitives. This\nsupervision signal is obtained from an equivariance relation, which does not\nrequire any manual annotation. We relate transformations of images to\ntransformations of the representations. More specifically, we look for the\nrepresentation that satisfies such relation rather than the transformations\nthat match a given representation. In this paper, we use two image\ntransformations in the context of counting: scaling and tiling. The first\ntransformation exploits the fact that the number of visual primitives should be\ninvariant to scale. The second transformation allows us to equate the total\nnumber of visual primitives in each tile to that in the whole image. These two\ntransformations are combined in one constraint and used to train a neural\nnetwork with a contrastive loss. The proposed task produces representations\nthat perform on par or exceed the state of the art in transfer learning\nbenchmarks." + pub_date: { + seconds: 1503360000 + } + authors: "Mehdi Noroozi" + authors: "Hamed Pirsiavash" + authors: "Paolo Favaro" + repositories: { + url: "https://github.com/gitlimlab/Representation-Learning-by-Learning-to-Count" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 107 + description: "A Tensorflow implementation of Representation Learning by Learning to Count" + } + } + video: { + video_id: "T7i_YKN2EY8" + video_title: "PR-090: Representation Learning by Learning to Count" + number_of_likes: 3 + number_of_views: 426 + published_date: { + seconds: 1529233262 + } + uploader: "Suh Kiho" + } + } +} +pr_id_to_video: { + key: 91 + value: { + pr_id: 91 + video: { + video_id: "v1GbxpKqH8Q" + video_title: "PR-091: A Universal Music Translation Network" + number_of_likes: 12 + number_of_views: 732 + published_date: { + seconds: 1529241765 + } + uploader: "Seungil Kim" + } + } +} +pr_id_to_video: { + key: 92 + value: { + pr_id: 92 + papers: { + paper_id: "a-hitchhikers-guide-on-distributed-training" + title: "A Hitchhiker's Guide On Distributed Training of Deep Neural Networks" + arxiv_id: "1810.11787" + abstract: "Deep learning has led to tremendous advancements in the field of Artificial\nIntelligence. One caveat however is the substantial amount of compute needed to\ntrain these deep learning models. Training a benchmark dataset like ImageNet on\na single machine with a modern GPU can take upto a week, distributing training\non multiple machines has been observed to drastically bring this time down.\nRecent work has brought down ImageNet training time to a time as low as 4\nminutes by using a cluster of 2048 GPUs. This paper surveys the various\nalgorithms and techniques used to distribute training and presents the current\nstate of the art for a modern distributed training framework. More\nspecifically, we explore the synchronous and asynchronous variants of\ndistributed Stochastic Gradient Descent, various All Reduce gradient\naggregation strategies and best practices for obtaining higher throughout and\nlower latency over a cluster such as mixed precision training, large batch\ntraining and gradient compression." + pub_date: { + seconds: 1540684800 + } + authors: "Karanbir Chahal" + authors: "Manraj Singh Grover" + authors: "Kuntal Dey" + } + video: { + video_id: "pAH3KhVnADE" + video_title: "PR-092: Distributed Training of Neural Networks" + number_of_likes: 4 + number_of_views: 746 + published_date: { + seconds: 1529243628 + } + uploader: "Jiyang Kang" + } + } +} +pr_id_to_video: { + key: 93 + value: { + pr_id: 93 + papers: { + paper_id: "playing-hard-exploration-games-by-watching" + title: "Playing hard exploration games by watching YouTube" + arxiv_id: "1805.11592" + abstract: "Deep reinforcement learning methods traditionally struggle with tasks where\nenvironment rewards are particularly sparse. One successful method of guiding\nexploration in these domains is to imitate trajectories provided by a human\ndemonstrator. However, these demonstrations are typically collected under\nartificial conditions, i.e. with access to the agent's exact environment setup\nand the demonstrator's action and reward trajectories. Here we propose a\ntwo-stage method that overcomes these limitations by relying on noisy,\nunaligned footage without access to such data. First, we learn to map unaligned\nvideos from multiple sources to a common representation using self-supervised\nobjectives constructed over both time and modality (i.e. vision and sound).\nSecond, we embed a single YouTube video in this representation to construct a\nreward function that encourages an agent to imitate human gameplay. This method\nof one-shot imitation allows our agent to convincingly exceed human-level\nperformance on the infamously hard exploration games Montezuma's Revenge,\nPitfall! and Private Eye for the first time, even if the agent is not presented\nwith any environment rewards." + pub_date: { + seconds: 1527552000 + } + authors: "Yusuf Aytar" + authors: "Tobias Pfaff" + authors: "David Budden" + authors: "Tom Le Paine" + authors: "Ziyu Wang" + authors: "Nando de Freitas" + repositories: { + url: "https://github.com/MaxSobolMark/HardRLWithYoutube" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 30 + description: "TensorFlow implementation of \"Playing hard exploration games by watching YouTube\"" + } + } + video: {} + } +} +pr_id_to_video: { + key: 94 + value: { + pr_id: 94 + papers: { + paper_id: "model-agnostic-meta-learning-for-fast" + title: "Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks" + arxiv_id: "1703.03400" + abstract: "We propose an algorithm for meta-learning that is model-agnostic, in the\nsense that it is compatible with any model trained with gradient descent and\napplicable to a variety of different learning problems, including\nclassification, regression, and reinforcement learning. The goal of\nmeta-learning is to train a model on a variety of learning tasks, such that it\ncan solve new learning tasks using only a small number of training samples. In\nour approach, the parameters of the model are explicitly trained such that a\nsmall number of gradient steps with a small amount of training data from a new\ntask will produce good generalization performance on that task. In effect, our\nmethod trains the model to be easy to fine-tune. We demonstrate that this\napproach leads to state-of-the-art performance on two few-shot image\nclassification benchmarks, produces good results on few-shot regression, and\naccelerates fine-tuning for policy gradient reinforcement learning with neural\nnetwork policies." + pub_date: { + seconds: 1489017600 + } + authors: "Chelsea Finn" + authors: "Pieter Abbeel" + authors: "Sergey Levine" + repositories: { + url: "https://github.com/ThomasGoerttler/similarity-analysis-of-maml" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Code for \"Exploring the Similarity of Representations in Model-Agnostic Meta-Learning\" Forked from the code of the original paper \"Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks\"" + } + repositories: { + url: "https://github.com/mikehuisman/revisiting-learned-optimizers" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/antaradas94/MAML-waste-classification" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/GeorgeDUT/MetaRLSAS" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/SinghJasdeep/Projecting-Conflicting-Gradients" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/Tikquuss/meta_XLM" + framework: FRAMEWORK_PYTORCH + number_of_stars: 10 + description: "Cross-lingual Language Model (XLM) pretraining and Model-Agnostic Meta-Learning (MAML) for fast adaptation of deep networks" + } + repositories: { + url: "https://github.com/Zhiwei-Z/prompzzw" + framework: FRAMEWORK_TENSORFLOW + description: "Experiment sequential meta training using promp" + } + repositories: { + url: "https://github.com/sidney1505/arc_maml_transformer" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + } + repositories: { + url: "https://github.com/laiviet/maml" + framework: FRAMEWORK_PYTORCH + description: "Implementation of Model Agnostic Meta Learning" + } + repositories: { + url: "https://github.com/foolyc/Meta-SGD" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 65 + description: "Meta-SGD experiment on Omniglot classification compared with MAML" + } + methods: { + name: "TRPO" + full_name: "Trust Region Policy Optimization" + description: "**Trust Region Policy Optimization**, or **TRPO**, is a policy gradient method in reinforcement learning that avoids parameter updates that change the policy too much with a KL divergence constraint on the size of the policy update at each iteration.\r\n\r\nTake the case of off-policy reinforcement learning, where the policy $\\beta$ for collecting trajectories on rollout workers is different from the policy $\\pi$ to optimize for. The objective function in an off-policy model measures the total advantage over the state visitation distribution and actions, while the mismatch between the training data distribution and the true policy state distribution is compensated with an importance sampling estimator:\r\n\r\n$$ J\\left(\\theta\\right) = \\sum\\_{s\\in{S}}p^{\\pi\\_{\\theta\\_{old}}}\\sum\\_{a\\in\\mathcal{A}}\\left(\\pi\\_{\\theta}\\left(a\\mid{s}\\right)\\hat{A}\\_{\\theta\\_{old}}\\left(s, a\\right)\\right) $$\r\n\r\n$$ J\\left(\\theta\\right) = \\sum\\_{s\\in{S}}p^{\\pi\\_{\\theta\\_{old}}}\\sum\\_{a\\in\\mathcal{A}}\\left(\\beta\\left(a\\mid{s}\\right)\\frac{\\pi\\_{\\theta}\\left(a\\mid{s}\\right)}{\\beta\\left(a\\mid{s}\\right)}\\hat{A}\\_{\\theta\\_{old}}\\left(s, a\\right)\\right) $$\r\n\r\n$$ J\\left(\\theta\\right) = \\mathbb{E}\\_{s\\sim{p}^{\\pi\\_{\\theta\\_{old}}}, a\\sim{\\beta}} \\left(\\frac{\\pi\\_{\\theta}\\left(a\\mid{s}\\right)}{\\beta\\left(a\\mid{s}\\right)}\\hat{A}\\_{\\theta\\_{old}}\\left(s, a\\right)\\right)$$\r\n\r\nWhen training on policy, theoretically the policy for collecting data is same as the policy that we want to optimize. However, when rollout workers and optimizers are running in parallel asynchronously, the behavior policy can get stale. TRPO considers this subtle difference: It labels the behavior policy as $\\pi\\_{\\theta\\_{old}}\\left(a\\mid{s}\\right)$ and thus the objective function becomes:\r\n\r\n$$ J\\left(\\theta\\right) = \\mathbb{E}\\_{s\\sim{p}^{\\pi\\_{\\theta\\_{old}}}, a\\sim{\\pi\\_{\\theta\\_{old}}}} \\left(\\frac{\\pi\\_{\\theta}\\left(a\\mid{s}\\right)}{\\pi\\_{\\theta\\_{old}}\\left(a\\mid{s}\\right)}\\hat{A}\\_{\\theta\\_{old}}\\left(s, a\\right)\\right)$$\r\n\r\nTRPO aims to maximize the objective function $J\\left(\\theta\\right)$ subject to a trust region constraint which enforces the distance between old and new policies measured by KL-divergence to be small enough, within a parameter $\\delta$:\r\n\r\n$$ \\mathbb{E}\\_{s\\sim{p}^{\\pi\\_{\\theta\\_{old}}}} \\left[D\\_{KL}\\left(\\pi\\_{\\theta\\_{old}}\\left(.\\mid{s}\\right)\\mid\\mid\\pi\\_{\\theta}\\left(.\\mid{s}\\right)\\right)\\right] \\leq \\delta$$" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "Max Pooling" + full_name: "Max Pooling" + description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" + } + methods: { + name: "MAML" + full_name: "Model-Agnostic Meta-Learning" + description: "**MAML**, or **Model-Agnostic Meta-Learning**, is a model and task-agnostic algorithm for meta-learning that trains a model’s parameters such that a small number of gradient updates will lead to fast learning on a new task.\r\n\r\nConsider a model represented by a parametrized function $f\\_{\\theta}$ with parameters $\\theta$. When adapting to a new task $\\mathcal{T}\\_{i}$, the model’s parameters $\\theta$ become $\\theta'\\_{i}$. With MAML, the updated parameter vector $\\theta'\\_{i}$ is computed using one or more gradient descent updates on task $\\mathcal{T}\\_{i}$. For example, when using one gradient update,\r\n\r\n$$ \\theta'\\_{i} = \\theta - \\alpha\\nabla\\_{\\theta}\\mathcal{L}\\_{\\mathcal{T}\\_{i}}\\left(f\\_{\\theta}\\right) $$\r\n\r\nThe step size $\\alpha$ may be fixed as a hyperparameter or metalearned. The model parameters are trained by optimizing for the performance of $f\\_{\\theta'\\_{i}}$ with respect to $\\theta$ across tasks sampled from $p\\left(\\mathcal{T}\\_{i}\\right)$. More concretely the meta-objective is as follows:\r\n\r\n$$ \\min\\_{\\theta} \\sum\\_{\\mathcal{T}\\_{i} \\sim p\\left(\\mathcal{T}\\right)} \\mathcal{L}\\_{\\mathcal{T\\_{i}}}\\left(f\\_{\\theta'\\_{i}}\\right) = \\sum\\_{\\mathcal{T}\\_{i} \\sim p\\left(\\mathcal{T}\\right)} \\mathcal{L}\\_{\\mathcal{T\\_{i}}}\\left(f\\_{\\theta - \\alpha\\nabla\\_{\\theta}\\mathcal{L}\\_{\\mathcal{T}\\_{i}}\\left(f\\_{\\theta}\\right)}\\right) $$\r\n\r\nNote that the meta-optimization is performed over the model parameters $\\theta$, whereas the objective is computed using the updated model parameters $\\theta'$. In effect MAML aims to optimize the model parameters such that one or a small number of gradient steps on a new task will produce maximally effective behavior on that task. The meta-optimization across tasks is performed via stochastic gradient descent (SGD), such that the model parameters $\\theta$ are updated as follows:\r\n\r\n$$ \\theta \\leftarrow \\theta - \\beta\\nabla\\_{\\theta} \\sum\\_{\\mathcal{T}\\_{i} \\sim p\\left(\\mathcal{T}\\right)} \\mathcal{L}\\_{\\mathcal{T\\_{i}}}\\left(f\\_{\\theta'\\_{i}}\\right)$$\r\n\r\nwhere $\\beta$ is the meta step size." + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + methods: { + name: "Batch Normalization" + full_name: "Batch Normalization" + description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." + } + methods: { + name: "Linear Layer" + full_name: "Linear Layer" + description: "A **Linear Layer** is a projection $\\mathbf{XW + b}$." + } + } + video: { + video_id: "fxJXXKZb-ik" + video_title: "PR-094: Model-Agnostic Meta-Learning for fast adaptation of deep networks" + number_of_likes: 58 + number_of_views: 5064 + published_date: { + seconds: 1529847830 + } + uploader: "taesu" + } + } +} +pr_id_to_video: { + key: 95 + value: { + pr_id: 95 + papers: { + paper_id: "modularity-matters-learning-invariant" + title: "Modularity Matters: Learning Invariant Relational Reasoning Tasks" + arxiv_id: "1806.06765" + abstract: "We focus on two supervised visual reasoning tasks whose labels encode a\nsemantic relational rule between two or more objects in an image: the MNIST\nParity task and the colorized Pentomino task. The objects in the images undergo\nrandom translation, scaling, rotation and coloring transformations. Thus these\ntasks involve invariant relational reasoning. We report uneven performance of\nvarious deep CNN models on these two tasks. For the MNIST Parity task, we\nreport that the VGG19 model soundly outperforms a family of ResNet models.\nMoreover, the family of ResNet models exhibits a general sensitivity to random\ninitialization for the MNIST Parity task. For the colorized Pentomino task, now\nboth the VGG19 and ResNet models exhibit sluggish optimization and very poor\ntest generalization, hovering around 30% test error. The CNN we tested all\nlearn hierarchies of fully distributed features and thus encode the distributed\nrepresentation prior. We are motivated by a hypothesis from cognitive\nneuroscience which posits that the human visual cortex is modularized, and this\nallows the visual cortex to learn higher order invariances. To this end, we\nconsider a modularized variant of the ResNet model, referred to as a Residual\nMixture Network (ResMixNet) which employs a mixture-of-experts architecture to\ninterleave distributed representations with more specialized, modular\nrepresentations. We show that very shallow ResMixNets are capable of learning\neach of the two tasks well, attaining less than 2% and 1% test error on the\nMNIST Parity and the colorized Pentomino tasks respectively. Most importantly,\nthe ResMixNet models are extremely parameter efficient: generalizing better\nthan various non-modular CNNs that have over 10x the number of parameters.\nThese experimental results support the hypothesis that modularity is a robust\nprior for learning invariant relational reasoning." + pub_date: { + seconds: 1529280000 + } + authors: "Jason Jo" + authors: "Vikas Verma" + authors: "Yoshua Bengio" + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Residual Block" + full_name: "Residual Block" + description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." + } + methods: { + name: "Max Pooling" + full_name: "Max Pooling" + description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + methods: { + name: "Kaiming Initialization" + full_name: "Kaiming Initialization" + description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." + } + methods: { + name: "Batch Normalization" + full_name: "Batch Normalization" + description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." + } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "Bottleneck Residual Block" + full_name: "Bottleneck Residual Block" + description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." + } + } + video: { + video_id: "dAGI3mlOmfw" + video_title: "PR-095: Modularity Matters: Learning Invariant Relational Reasoning Tasks" + number_of_likes: 9 + number_of_views: 767 + published_date: { + seconds: 1532272031 + } + uploader: "JinWon Lee" + } + } +} +pr_id_to_video: { + key: 96 + value: { + pr_id: 96 + papers: { + paper_id: "taskonomy-disentangling-task-transfer" + title: "Taskonomy: Disentangling Task Transfer Learning" + arxiv_id: "1804.08328" + abstract: "Do visual tasks have a relationship, or are they unrelated? For instance,\ncould having surface normals simplify estimating the depth of an image?\nIntuition answers these questions positively, implying existence of a structure\namong visual tasks. Knowing this structure has notable values; it is the\nconcept underlying transfer learning and provides a principled way for\nidentifying redundancies across tasks, e.g., to seamlessly reuse supervision\namong related tasks or solve many tasks in one system without piling up the\ncomplexity.\n We proposes a fully computational approach for modeling the structure of\nspace of visual tasks. This is done via finding (first and higher-order)\ntransfer learning dependencies across a dictionary of twenty six 2D, 2.5D, 3D,\nand semantic tasks in a latent space. The product is a computational taxonomic\nmap for task transfer learning. We study the consequences of this structure,\ne.g. nontrivial emerged relationships, and exploit them to reduce the demand\nfor labeled data. For example, we show that the total number of labeled\ndatapoints needed for solving a set of 10 tasks can be reduced by roughly 2/3\n(compared to training independently) while keeping the performance nearly the\nsame. We provide a set of tools for computing and probing this taxonomical\nstructure including a solver that users can employ to devise efficient\nsupervision policies for their use cases." + pub_date: { + seconds: 1524441600 + } + authors: "Amir Zamir" + authors: "Alexander Sax" + authors: "William Shen" + authors: "Leonidas Guibas" + authors: "Jitendra Malik" + authors: "Silvio Savarese" + repositories: { + is_official: true + url: "https://github.com/StanfordVL/taskonomy" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 728 + description: "Taskonomy: Disentangling Task Transfer Learning" + } + } + video: { + video_id: "WjUGrzBIDv0" + video_title: "PR-096: Taskonomy: Disentangling Task Transfer Learning" + number_of_likes: 10 + number_of_views: 1179 + published_date: { + seconds: 1530451567 + } + uploader: "Taeoh Kim" + } + } +} +pr_id_to_video: { + key: 97 + value: { + pr_id: 97 + papers: { + paper_id: "learning-representations-for-counterfactual" + title: "Learning Representations for Counterfactual Inference" + arxiv_id: "1605.03661" + abstract: "Observational studies are rising in importance due to the widespread\naccumulation of data in fields such as healthcare, education, employment and\necology. We consider the task of answering counterfactual questions such as,\n\"Would this patient have lower blood sugar had she received a different\nmedication?\". We propose a new algorithmic framework for counterfactual\ninference which brings together ideas from domain adaptation and representation\nlearning. In addition to a theoretical justification, we perform an empirical\ncomparison with previous approaches to causal inference from observational\ndata. Our deep learning algorithm significantly outperforms the previous\nstate-of-the-art." + pub_date: { + seconds: 1463011200 + } + authors: "Fredrik D. Johansson" + authors: "Uri Shalit" + authors: "David Sontag" + repositories: { + url: "https://github.com/lightlightdyy/Deep-Learning-and-Causal-Inference" + framework: FRAMEWORK_OTHERS + number_of_stars: 24 + } + methods: { + name: "Causal Inference" + full_name: "Causal Inference" + description: "Causal inference is the process of drawing a conclusion about a causal connection based on the conditions of the occurrence of an effect. The main difference between causal inference and inference of association is that the former analyzes the response of the effect variable when the cause is changed." + } + } + video: { + video_id: "l-pcG77Hr58" + video_title: "PR-097: Learning Representations for Counterfactual Inference" + number_of_views: 821 + published_date: { + seconds: 1531061236 + } + uploader: "Byung-Hak Kim" + } + } +} +pr_id_to_video: { + key: 98 + value: { + pr_id: 98 + papers: { + paper_id: "megadepth-learning-single-view-depth" + title: "MegaDepth: Learning Single-View Depth Prediction from Internet Photos" + arxiv_id: "1804.00607" + abstract: "Single-view depth prediction is a fundamental problem in computer vision.\nRecently, deep learning methods have led to significant progress, but such\nmethods are limited by the available training data. Current datasets based on\n3D sensors have key limitations, including indoor-only images (NYU), small\nnumbers of training examples (Make3D), and sparse sampling (KITTI). We propose\nto use multi-view Internet photo collections, a virtually unlimited data\nsource, to generate training data via modern structure-from-motion and\nmulti-view stereo (MVS) methods, and present a large depth dataset called\nMegaDepth based on this idea. Data derived from MVS comes with its own\nchallenges, including noise and unreconstructable objects. We address these\nchallenges with new data cleaning methods, as well as automatically augmenting\nour data with ordinal depth relations generated using semantic segmentation. We\nvalidate the use of large amounts of Internet data by showing that models\ntrained on MegaDepth exhibit strong generalization-not only to novel scenes,\nbut also to other diverse datasets including Make3D, KITTI, and DIW, even when\nno images from those datasets are seen during training." + pub_date: { + seconds: 1522627200 + } + authors: "Zhengqi Li" + authors: "Noah Snavely" + repositories: { + url: "https://github.com/zhengqili/MegaDepth" + framework: FRAMEWORK_PYTORCH + number_of_stars: 535 + description: "Code of single-view depth prediction algorithm on Internet Photos described in \"MegaDepth: Learning Single-View Depth Prediction from Internet Photos, Z. Li and N. Snavely, CVPR 2018\"." + } + } + video: { + video_id: "tGbMWAFMMBQ" + video_title: "PR-098: MegaDepth: Learning Single-View Depth Prediction from Internet Photos (CVPR2018)" + number_of_likes: 6 + number_of_views: 776 + published_date: { + seconds: 1531661811 + } + uploader: "이광희" + } + } +} +pr_id_to_video: { + key: 99 + value: { + pr_id: 99 + papers: { + paper_id: "mrnet-product2vec-a-multi-task-recurrent" + title: "MRNet-Product2Vec: A Multi-task Recurrent Neural Network for Product Embeddings" + arxiv_id: "1709.07534" + abstract: "E-commerce websites such as Amazon, Alibaba, Flipkart, and Walmart sell\nbillions of products. Machine learning (ML) algorithms involving products are\noften used to improve the customer experience and increase revenue, e.g.,\nproduct similarity, recommendation, and price estimation. The products are\nrequired to be represented as features before training an ML algorithm. In this\npaper, we propose an approach called MRNet-Product2Vec for creating generic\nembeddings of products within an e-commerce ecosystem. We learn a dense and\nlow-dimensional embedding where a diverse set of signals related to a product\nare explicitly injected into its representation. We train a Discriminative\nMulti-task Bidirectional Recurrent Neural Network (RNN), where the input is a\nproduct title fed through a Bidirectional RNN and at the output, product labels\ncorresponding to fifteen different tasks are predicted. The task set includes\nseveral intrinsic characteristics about a product such as price, weight, size,\ncolor, popularity, and material. We evaluate the proposed embedding\nquantitatively and qualitatively. We demonstrate that they are almost as good\nas sparse and extremely high-dimensional TF-IDF representation in spite of\nhaving less than 3% of the TF-IDF dimension. We also use a multimodal\nautoencoder for comparing products from different language-regions and show\npreliminary yet promising qualitative results." + pub_date: { + seconds: 1505952000 + } + authors: "Arijit Biswas" + authors: "Mukul Bhutani" + authors: "Subhajit Sanyal" + } + video: { + video_id: "cpCS7LBRkRU" + video_title: "PR-099: MRNet-Product2Vec" + number_of_likes: 23 + number_of_views: 1342 + published_date: { + seconds: 1531661636 + } + uploader: "keun bong Kwak" + } + } +} +pr_id_to_video: { + key: 100 + value: { + pr_id: 100 + papers: { + paper_id: "f-brs-rethinking-backpropagating-refinement" + title: "f-BRS: Rethinking Backpropagating Refinement for Interactive Segmentation" + arxiv_id: "2001.10331" + abstract: "Deep neural networks have become a mainstream approach to interactive segmentation. As we show in our experiments, while for some images a trained network provides accurate segmentation result with just a few clicks, for some unknown objects it cannot achieve satisfactory result even with a large amount of user input. Recently proposed backpropagating refinement (BRS) scheme introduces an optimization problem for interactive segmentation that results in significantly better performance for the hard cases. At the same time, BRS requires running forward and backward pass through a deep network several times that leads to significantly increased computational budget per click compared to other methods. We propose f-BRS (feature backpropagating refinement scheme) that solves an optimization problem with respect to auxiliary variables instead of the network inputs, and requires running forward and backward pass just for a small part of a network. Experiments on GrabCut, Berkeley, DAVIS and SBD datasets set new state-of-the-art at an order of magnitude lower time per click compared to original BRS. The code and trained models are available at https://github.com/saic-vul/fbrs_interactive_segmentation ." + pub_date: { + seconds: 1580169600 + } + authors: "Konstantin Sofiiuk" + authors: "Ilia Petrov" + authors: "Olga Barinova" + authors: "Anton Konushin" + repositories: { + url: "https://github.com/jpconnel/fbrs-segmentation" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "f-brs segmentation modification for Hololens" + } + repositories: { + is_official: true + url: "https://github.com/saic-vul/fbrs_interactive_segmentation" + framework: FRAMEWORK_PYTORCH + number_of_stars: 400 + description: "[CVPR2020] f-BRS: Rethinking Backpropagating Refinement for Interactive Segmentation https://arxiv.org/abs/2001.10331" + } + methods: { + name: "Spatial Broadcast Decoder" + full_name: "Spatial Broadcast Decoder" + description: "Spatial Broadcast Decoder is an architecture that aims to improve disentangling, reconstruction accuracy, and generalization to held-out regions in data space. It provides a particularly dramatic\r\nbenefit when applied to datasets with small objects.\r\n\r\nSource: [Watters et al.](https://arxiv.org/pdf/1901.07017v2.pdf)\r\n\r\nImage source: [Watters et al.](https://arxiv.org/pdf/1901.07017v2.pdf)" + } + } + video: { + video_id: "ksTkCecBTCY" + video_title: "PR100: SeedNet" + number_of_likes: 11 + number_of_views: 1043 + published_date: { + seconds: 1532265921 + } + uploader: "이광희" + } + } +} +pr_id_to_video: { + key: 101 + value: { + pr_id: 101 + papers: { + paper_id: "deep-feature-consistent-variational" + title: "Deep Feature Consistent Variational Autoencoder" + arxiv_id: "1610.00291" + abstract: "We present a novel method for constructing Variational Autoencoder (VAE).\nInstead of using pixel-by-pixel loss, we enforce deep feature consistency\nbetween the input and the output of a VAE, which ensures the VAE's output to\npreserve the spatial correlation characteristics of the input, thus leading the\noutput to have a more natural visual appearance and better perceptual quality.\nBased on recent deep learning works such as style transfer, we employ a\npre-trained deep convolutional neural network (CNN) and use its hidden features\nto define a feature perceptual loss for VAE training. Evaluated on the CelebA\nface dataset, we show that our model produces better results than other methods\nin the literature. We also show that our method can produce latent vectors that\ncan capture the semantic information of face expressions and can be used to\nachieve state-of-the-art performance in facial attribute prediction." + pub_date: { + seconds: 1475366400 + } + authors: "Xianxu Hou" + authors: "Linlin Shen" + authors: "Ke Sun" + authors: "Guoping Qiu" + repositories: { + url: "https://github.com/svenrdz/DFC-VAE" + framework: FRAMEWORK_PYTORCH + number_of_stars: 11 + description: "Deep Feature Consistent Variational AutoEncoder (Pytorch)" + } + repositories: { + url: "https://github.com/nmichlo/disent" + framework: FRAMEWORK_PYTORCH + number_of_stars: 21 + description: "🧶 Modular VAE Disentanglement Framework built with PyTorch Lightning. Optionally configured and run with Hydra Config." + } + repositories: { + url: "https://github.com/bhpfelix/Variational-Autoencoder-PyTorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 144 + description: "Variational Autoencoder implemented with PyTorch, Trained over CelebA Dataset" + } + repositories: { + url: "https://github.com/UdbhavPrasad072300/Generate-Fake-Faces-with-CVAE-in-PyTorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Making fake faces with a Convolutional Variational Autoencoder in PyTorch with celebA dataset" + } + repositories: { + url: "https://github.com/bogedy/intro_dfc" + framework: FRAMEWORK_TENSORFLOW + description: "Introspective Deep Feature Consistent Variational Autoencoder" + } + repositories: { + url: "https://github.com/peria1/VAEconvMNIST" + framework: FRAMEWORK_PYTORCH + description: "Basic Pytorch VAE adapted to use conv2d on MNIST" + } + repositories: { + url: "https://github.com/Nanway/dfc-vae" + framework: FRAMEWORK_PYTORCH + description: "I turned my friends into dogs and made computer generated images of them with this deep feature consistent variational autoencoder" + } + repositories: { + url: "https://github.com/inkplatform/beta-vae" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/vinoth654321/Beta-Vae-face-dataset" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/matthew-liu/beta-vae" + framework: FRAMEWORK_PYTORCH + number_of_stars: 30 + description: "A Pytorch Implementation of the Beta-VAE" + } + methods: { + name: "VAE" + full_name: "Variational Autoencoder" + description: "A **Variational Autoencoder** is a type of likelihood-based generative model. It consists of an encoder, that takes in data $x$ as input and transforms this into a latent representation $z$, and a decoder, that takes a latent representation $z$ and returns a reconstruction $\\hat{x}$. Inference is performed via variational inference to approximate the posterior of the model." + } + methods: { + name: "AutoEncoder" + full_name: "AutoEncoder" + description: "An **Autoencoder** is a bottleneck architecture that turns a high-dimensional input into a latent low-dimensional code (encoder), and then performs a reconstruction of the input with this latent code (the decoder).\r\n\r\nImage: [Michael Massi](https://en.wikipedia.org/wiki/Autoencoder#/media/File:Autoencoder_schema.png)" + } + } + video: { + video_id: "FfBp6xJqZVA" + video_title: "PR-101: Deep Feature Consistent Variational Autoencoder" + number_of_likes: 34 + number_of_views: 9166 + published_date: { + seconds: 1536508427 + } + uploader: "MinGuk Kang" + } + } +} +pr_id_to_video: { + key: 102 + value: { + pr_id: 102 + papers: { + paper_id: "everybody-dance-now" + title: "Everybody Dance Now" + arxiv_id: "1808.07371" + abstract: "This paper presents a simple method for \"do as I do\" motion transfer: given a source video of a person dancing, we can transfer that performance to a novel (amateur) target after only a few minutes of the target subject performing standard moves. We approach this problem as video-to-video translation using pose as an intermediate representation. To transfer the motion, we extract poses from the source subject and apply the learned pose-to-appearance mapping to generate the target subject. We predict two consecutive frames for temporally coherent video results and introduce a separate pipeline for realistic face synthesis. Although our method is quite simple, it produces surprisingly compelling results (see video). This motivates us to also provide a forensics tool for reliable synthetic content detection, which is able to distinguish videos synthesized by our system from real data. In addition, we release a first-of-its-kind open-source dataset of videos that can be legally used for training and motion transfer." + pub_date: { + seconds: 1534896000 + } + authors: "Caroline Chan" + authors: "Shiry Ginosar" + authors: "Tinghui Zhou" + authors: "Alexei A. Efros" + repositories: { + url: "https://github.com/justinjohn0306/EverybodyDanceNow-Colab" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "Motion Retargeting Video Subjects, Modified Colab Version by Justin John" + } + repositories: { + url: "https://github.com/j-void/ISL_v2v" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/rajatsahay/Pose2Pose" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "Research Paper Implementation" + } + repositories: { + url: "https://github.com/martin220485/everybody_dance_now_pytorch" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/CNC-IISER-BHOPAL/Any-Body-Can-Dance" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + } + repositories: { + url: "https://github.com/aman-arya/Any-Body-Can-Dance" + framework: FRAMEWORK_PYTORCH + } + repositories: { + is_official: true + url: "https://github.com/carolineec/EverybodyDanceNow" + framework: FRAMEWORK_PYTORCH + number_of_stars: 411 + description: "Motion Retargeting Video Subjects" + } + repositories: { + url: "https://github.com/Lotayou/everybody_dance_now_pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 240 + description: "A PyTorch Implementation of \"Everybody Dance Now\" from Berkeley AI lab." + } + repositories: { + url: "https://github.com/wjy5446/pytorch-everybody-dance-now" + framework: FRAMEWORK_PYTORCH + number_of_stars: 9 + description: ":dancer: Dance Now !!!" + } + repositories: { + url: "https://github.com/dakenan1/Everybody-Dance-Now" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "Implementation of Everybody dance now via tensorflow" + } + } + video: { + video_id: "_onRnCb_h3Q" + video_title: "PR-102: Everybody Dance Now" + number_of_views: 1367 + published_date: { + seconds: 1536505303 + } + uploader: "Byung-Hak Kim" + } + } +} +pr_id_to_video: { + key: 103 + value: { + pr_id: 103 + papers: { + paper_id: "an-analysis-of-the-t-sne-algorithm-for-data" + title: "An Analysis of the t-SNE Algorithm for Data Visualization" + arxiv_id: "1803.01768" + abstract: "A first line of attack in exploratory data analysis is data visualization,\ni.e., generating a 2-dimensional representation of data that makes clusters of\nsimilar points visually identifiable. Standard Johnson-Lindenstrauss\ndimensionality reduction does not produce data visualizations. The t-SNE\nheuristic of van der Maaten and Hinton, which is based on non-convex\noptimization, has become the de facto standard for visualization in a wide\nrange of applications.\n This work gives a formal framework for the problem of data visualization -\nfinding a 2-dimensional embedding of clusterable data that correctly separates\nindividual clusters to make them visually identifiable. We then give a rigorous\nanalysis of the performance of t-SNE under a natural, deterministic condition\non the \"ground-truth\" clusters (similar to conditions assumed in earlier\nanalyses of clustering) in the underlying data. These are the first provable\nguarantees on t-SNE for constructing good data visualizations.\n We show that our deterministic condition is satisfied by considerably general\nprobabilistic generative models for clusterable data such as mixtures of\nwell-separated log-concave distributions. Finally, we give theoretical evidence\nthat t-SNE provably succeeds in partially recovering cluster structure even\nwhen the above deterministic condition is not met." + pub_date: { + seconds: 1520208000 + } + authors: "Sanjeev Arora" + authors: "Wei Hu" + authors: "Pravesh K. Kothari" + methods: { + name: "LINE" + full_name: "Large-scale Information Network Embedding" + description: "LINE is a novel network embedding method which is suitable for arbitrary types of information networks: undirected, directed, and/or weighted. The method optimizes a carefully designed objective function that preserves both the local and global network structures.\r\n\r\nSource: [Tang et al.](https://arxiv.org/pdf/1503.03578v1.pdf)\r\n\r\nImage source: [Tang et al.](https://arxiv.org/pdf/1503.03578v1.pdf)" + } + } + video: { + video_id: "zpJwm7f7EXs" + video_title: "PR-103: Visualizing Data using t-SNE" + number_of_likes: 36 + number_of_views: 2874 + published_date: { + seconds: 1537108725 + } + uploader: "Taeoh Kim" + } + } +} +pr_id_to_video: { + key: 104 + value: { + pr_id: 104 + papers: { + paper_id: "video-to-video-synthesis" + title: "Video-to-Video Synthesis" + arxiv_id: "1808.06601" + abstract: "We study the problem of video-to-video synthesis, whose goal is to learn a\nmapping function from an input source video (e.g., a sequence of semantic\nsegmentation masks) to an output photorealistic video that precisely depicts\nthe content of the source video. While its image counterpart, the\nimage-to-image synthesis problem, is a popular topic, the video-to-video\nsynthesis problem is less explored in the literature. Without understanding\ntemporal dynamics, directly applying existing image synthesis approaches to an\ninput video often results in temporally incoherent videos of low visual\nquality. In this paper, we propose a novel video-to-video synthesis approach\nunder the generative adversarial learning framework. Through carefully-designed\ngenerator and discriminator architectures, coupled with a spatio-temporal\nadversarial objective, we achieve high-resolution, photorealistic, temporally\ncoherent video results on a diverse set of input formats including segmentation\nmasks, sketches, and poses. Experiments on multiple benchmarks show the\nadvantage of our method compared to strong baselines. In particular, our model\nis capable of synthesizing 2K resolution videos of street scenes up to 30\nseconds long, which significantly advances the state-of-the-art of video\nsynthesis. Finally, we apply our approach to future video prediction,\noutperforming several state-of-the-art competing systems." + pub_date: { + seconds: 1534723200 + } + authors: "Ting-Chun Wang" + authors: "Ming-Yu Liu" + authors: "Jun-Yan Zhu" + authors: "Guilin Liu" + authors: "Andrew Tao" + authors: "Jan Kautz" + authors: "Bryan Catanzaro" + repositories: { + url: "https://github.com/play166/vid2vid" + framework: FRAMEWORK_PYTORCH + description: "make myself for building successful" + } + repositories: { + url: "https://github.com/MadRabbit-jt/vid2vid" + framework: FRAMEWORK_PYTORCH + description: "make myself for building successful" + } + repositories: { + url: "https://github.com/divyanshpuri02/divyansh.github.io" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/divyanshpuri02/Nvidia" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/BUTIYO/vid2vid-test" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/Sjunna9819/My-First-Project" + framework: FRAMEWORK_PYTORCH + } + repositories: { + is_official: true + url: "https://github.com/NVIDIA/vid2vid" + framework: FRAMEWORK_PYTORCH + number_of_stars: 7747 + description: "Pytorch implementation of our method for high-resolution (e.g. 2048x1024) photorealistic video-to-video translation." + } + repositories: { + url: "https://github.com/eric-erki/vid2vid" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "Pytorch implementation of our method for high-resolution (e.g. 2048x1024) photorealistic video-to-video translation." + } + repositories: { + url: "https://github.com/freedombenLiu/vid2vid" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/yawayo/vid2vid" + framework: FRAMEWORK_PYTORCH + } + } + video: { + video_id: "WxeeqxqnRyE" + video_title: "PR-104: Video-to-Video synthesis" + number_of_likes: 16 + number_of_views: 1607 + published_date: { + seconds: 1537107746 + } + uploader: "taesu" + } + } +} +pr_id_to_video: { + key: 105 + value: { + pr_id: 105 + papers: { + paper_id: "mnasnet-platform-aware-neural-architecture" + title: "MnasNet: Platform-Aware Neural Architecture Search for Mobile" + arxiv_id: "1807.11626" + abstract: "Designing convolutional neural networks (CNN) for mobile devices is challenging because mobile models need to be small and fast, yet still accurate. Although significant efforts have been dedicated to design and improve mobile CNNs on all dimensions, it is very difficult to manually balance these trade-offs when there are so many architectural possibilities to consider. In this paper, we propose an automated mobile neural architecture search (MNAS) approach, which explicitly incorporate model latency into the main objective so that the search can identify a model that achieves a good trade-off between accuracy and latency. Unlike previous work, where latency is considered via another, often inaccurate proxy (e.g., FLOPS), our approach directly measures real-world inference latency by executing the model on mobile phones. To further strike the right balance between flexibility and search space size, we propose a novel factorized hierarchical search space that encourages layer diversity throughout the network. Experimental results show that our approach consistently outperforms state-of-the-art mobile CNN models across multiple vision tasks. On the ImageNet classification task, our MnasNet achieves 75.2% top-1 accuracy with 78ms latency on a Pixel phone, which is 1.8x faster than MobileNetV2 [29] with 0.5% higher accuracy and 2.3x faster than NASNet [36] with 1.2% higher accuracy. Our MnasNet also achieves better mAP quality than MobileNets for COCO object detection. Code is at https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet" + pub_date: { + seconds: 1532995200 + } + authors: "Mingxing Tan" + authors: "Bo Chen" + authors: "Ruoming Pang" + authors: "Vijay Vasudevan" + authors: "Mark Sandler" + authors: "Andrew Howard" + authors: "Quoc V. Le" + repositories: { + url: "https://github.com/osmr/imgclsmob" + framework: FRAMEWORK_OTHERS + number_of_stars: 2202 + description: "Sandbox for training deep learning networks" + } + repositories: { + is_official: true + url: "https://github.com/tensorflow/tpu" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4338 + description: "Reference models and tools for Cloud TPUs." + } + repositories: { + url: "https://github.com/abhoi/Keras-MnasNet" + framework: FRAMEWORK_OTHERS + number_of_stars: 8 + description: "A Keras implementation of MnasNet" + } + repositories: { + url: "https://github.com/PotatoSpudowski/CactiNet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Pytorch Implementation of a CNN similar to Google Brain's new EfficientNet from scratch to identify images of cactus🌵" + } + repositories: { + url: "https://github.com/mingxingtan/mnasnet" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 26 + description: "MnasNet snapshot" + } + repositories: { + url: "https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4338 + description: "Reference models and tools for Cloud TPUs." + } + repositories: { + url: "https://github.com/nsarang/MnasNet" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5 + description: "A TensorFlow 2.0 implementation of MnasNet: Platform-Aware Neural Architecture Search for Mobile." + } + repositories: { + url: "https://github.com/cgebbe/kaggle_pku-autonomous-driving" + framework: FRAMEWORK_PYTORCH + description: "Code for kaggle competition https://www.kaggle.com/c/pku-autonomous-driving" + } + repositories: { + url: "https://github.com/rwightman/gen-efficientnet-pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1343 + description: "Pretrained EfficientNet, EfficientNet-Lite, MixNet, MobileNetV3 / V2, MNASNet A1 and B1, FBNet, Single-Path NAS" + } + repositories: { + url: "https://github.com/azamatkhid/mnasnet-pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "Pytorch implementation of MnasNet-A1 & MnasNet-B1" + } + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + } + methods: { + name: "LSTM" + full_name: "Long Short-Term Memory" + description: "An **LSTM** is a type of [recurrent neural network](https://paperswithcode.com/methods/category/recurrent-neural-networks) that addresses the vanishing gradient problem in vanilla RNNs through additional cells, input and output gates. Intuitively, vanishing gradients are solved through additional *additive* components, and forget gate activations, that allow the gradients to flow through the network without vanishing as quickly.\r\n\r\n(Image Source [here](https://medium.com/datadriveninvestor/how-do-lstm-networks-solve-the-problem-of-vanishing-gradients-a6784971a577))\r\n\r\n(Introduced by Hochreiter and Schmidhuber)" + } + methods: { + name: "MnasNet" + full_name: "MnasNet" + description: "**MnasNet** is a type of convolutional neural network optimized for mobile devices that is discovered through mobile neural architecture search, which explicitly incorporates model latency into the main objective so that the search can identify a model that achieves a good trade-off between accuracy and latency. The main building block is an inverted residual block (from [MobileNetV2](https://paperswithcode.com/method/mobilenetv2))." + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "Random Horizontal Flip" + full_name: "Random Horizontal Flip" + description: "**RandomHorizontalFlip** is a type of image data augmentation which horizontally flips a given image with a given probability.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" + } + methods: { + name: "Depthwise Separable Convolution" + full_name: "Depthwise Separable Convolution" + description: "While [standard convolution](https://paperswithcode.com/method/convolution) performs the channelwise and spatial-wise computation in one step, **Depthwise Separable Convolution** splits the computation into two steps: depthwise convolution applies a single convolutional filter per each input channel and pointwise convolution is used to create a linear combination of the output of the depthwise convolution. The comparison of standard convolution and depthwise separable convolution is shown to the right.\r\n\r\nCredit: [Depthwise Convolution Is All You Need for Learning Multiple Visual Domains](https://paperswithcode.com/paper/depthwise-convolution-is-all-you-need-for)" + } + methods: { + name: "RMSProp" + full_name: "RMSProp" + description: "**RMSProp** is an unpublished adaptive learning rate optimizer [proposed by Geoff Hinton](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf). The motivation is that the magnitude of gradients can differ for different weights, and can change during learning, making it hard to choose a single global learning rate. RMSProp tackles this by keeping a moving average of the squared gradient and adjusting the weight updates by this magnitude. The gradient updates are performed as:\r\n\r\n$$E\\left[g^{2}\\right]\\_{t} = \\gamma E\\left[g^{2}\\right]\\_{t-1} + \\left(1 - \\gamma\\right) g^{2}\\_{t}$$\r\n\r\n$$\\theta\\_{t+1} = \\theta\\_{t} - \\frac{\\eta}{\\sqrt{E\\left[g^{2}\\right]\\_{t} + \\epsilon}}g\\_{t}$$\r\n\r\nHinton suggests $\\gamma=0.9$, with a good default for $\\eta$ as $0.001$.\r\n\r\nImage: [Alec Radford](https://twitter.com/alecrad)" + } + methods: { + name: "Depthwise Convolution" + full_name: "Depthwise Convolution" + description: "**Depthwise Convolution** is a type of convolution where we apply a single convolutional filter for each input channel. In the regular 2D [convolution](https://paperswithcode.com/method/convolution) performed over multiple input channels, the filter is as deep as the input and lets us freely mix channels to generate each element in the output. In contrast, depthwise convolutions keep each channel separate. To summarize the steps, we:\r\n\r\n1. Split the input and filter into channels.\r\n2. We convolve each input with the respective filter.\r\n3. We stack the convolved outputs together.\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" + } + methods: { + name: "Tanh Activation" + full_name: "Tanh Activation" + description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" + } + } + video: { + video_id: "4uDZxefPd-I" + video_title: "PR-105: MnasNet: Platform-Aware Neural Architecture Search for Mobile" + number_of_likes: 23 + number_of_views: 1974 + published_date: { + seconds: 1538623331 + } + uploader: "Taekmin Kim" + } + } +} +pr_id_to_video: { + key: 106 + value: { + pr_id: 106 + papers: { + paper_id: "learning-to-explain-an-information-theoretic" + title: "Learning to Explain: An Information-Theoretic Perspective on Model Interpretation" + arxiv_id: "1802.07814" + abstract: "We introduce instancewise feature selection as a methodology for model\ninterpretation. Our method is based on learning a function to extract a subset\nof features that are most informative for each given example. This feature\nselector is trained to maximize the mutual information between selected\nfeatures and the response variable, where the conditional distribution of the\nresponse variable given the input is the model to be explained. We develop an\nefficient variational approximation to the mutual information, and show the\neffectiveness of our method on a variety of synthetic and real data sets using\nboth quantitative metrics and human evaluation." + pub_date: { + seconds: 1519171200 + } + authors: "Jianbo Chen" + authors: "Le Song" + authors: "Martin J. Wainwright" + authors: "Michael I. Jordan" + repositories: { + url: "https://github.com/vikua/l2x" + framework: FRAMEWORK_TENSORFLOW + description: "Experiments for implementation of \"Learning to Explain\" paper: https://arxiv.org/abs/1802.07814" + } + repositories: { + is_official: true + url: "https://github.com/Jianbo-Lab/L2X" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 89 + } + } + video: { + video_id: "id_CmUaTWpg" + video_title: "PR-106: Learning to Explain: An Information-Theoretic Perspective on Model Interpretation" + number_of_likes: 11 + number_of_views: 1138 + published_date: { + seconds: 1538321661 + } + uploader: "Jinsung Yoon" + } + } +} +pr_id_to_video: { + key: 107 + value: { + pr_id: 107 + papers: { + paper_id: "image-inpainting-for-irregular-holes-using" + title: "Image Inpainting for Irregular Holes Using Partial Convolutions" + arxiv_id: "1804.07723" + abstract: "Existing deep learning based image inpainting methods use a standard\nconvolutional network over the corrupted image, using convolutional filter\nresponses conditioned on both valid pixels as well as the substitute values in\nthe masked holes (typically the mean value). This often leads to artifacts such\nas color discrepancy and blurriness. Post-processing is usually used to reduce\nsuch artifacts, but are expensive and may fail. We propose the use of partial\nconvolutions, where the convolution is masked and renormalized to be\nconditioned on only valid pixels. We further include a mechanism to\nautomatically generate an updated mask for the next layer as part of the\nforward pass. Our model outperforms other methods for irregular masks. We show\nqualitative and quantitative comparisons with other methods to validate our\napproach." + pub_date: { + seconds: 1524182400 + } + authors: "Guilin Liu" + authors: "Fitsum A. Reda" + authors: "Kevin J. Shih" + authors: "Ting-Chun Wang" + authors: "Andrew Tao" + authors: "Bryan Catanzaro" + repositories: { + url: "https://github.com/feixuetuba/inpating" + framework: FRAMEWORK_PYTORCH + description: "复现Image Inpainting for Irregular Holes Using Partial Convolutions" + } + repositories: { + url: "https://github.com/jshi31/edge-connect" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/ayulockin/deepimageinpainting" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 15 + description: "Deep Image Inpainting using UNET like Vanilla Autoencoder and Partial Convolution based Autoencoder. " + } + repositories: { + url: "https://github.com/KPMG-wiseuniv/AI" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + description: "for AI" + } + repositories: { + url: "https://github.com/hiyaroy12/DFT_inpainting" + framework: FRAMEWORK_PYTORCH + number_of_stars: 11 + description: "Image inpainting using frequency domain priors" + } + repositories: { + url: "https://github.com/yashk2000/SneakySketchers" + framework: FRAMEWORK_PYTORCH + number_of_stars: 14 + description: "A python desktop application that allows you to do image inpainting by directly drawing on it. " + } + repositories: { + url: "https://github.com/preeti-2810/object-removal" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/Maouriyan/inpainting_demo" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/chefpr7/Image-Inpainting-using-Partial-Convolutional-Layers" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/waallf/edge-connect-master" + framework: FRAMEWORK_PYTORCH + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + } + video: { + video_id: "BhZN6AqfylA" + video_title: "PR-107: Image Inpainting for Irregular Holes Using Partial Convolutions" + number_of_likes: 26 + number_of_views: 2715 + published_date: { + seconds: 1539060135 + } + uploader: "정지원" + } + } +} +pr_id_to_video: { + key: 108 + value: { + pr_id: 108 + papers: { + paper_id: "mobilenetv2-inverted-residuals-and-linear" + title: "MobileNetV2: Inverted Residuals and Linear Bottlenecks" + arxiv_id: "1801.04381" + abstract: "In this paper we describe a new mobile architecture, MobileNetV2, that\nimproves the state of the art performance of mobile models on multiple tasks\nand benchmarks as well as across a spectrum of different model sizes. We also\ndescribe efficient ways of applying these mobile models to object detection in\na novel framework we call SSDLite. Additionally, we demonstrate how to build\nmobile semantic segmentation models through a reduced form of DeepLabv3 which\nwe call Mobile DeepLabv3.\n The MobileNetV2 architecture is based on an inverted residual structure where\nthe input and output of the residual block are thin bottleneck layers opposite\nto traditional residual models which use expanded representations in the input\nan MobileNetV2 uses lightweight depthwise convolutions to filter features in\nthe intermediate expansion layer. Additionally, we find that it is important to\nremove non-linearities in the narrow layers in order to maintain\nrepresentational power. We demonstrate that this improves performance and\nprovide an intuition that led to this design. Finally, our approach allows\ndecoupling of the input/output domains from the expressiveness of the\ntransformation, which provides a convenient framework for further analysis. We\nmeasure our performance on Imagenet classification, COCO object detection, VOC\nimage segmentation. We evaluate the trade-offs between accuracy, and number of\noperations measured by multiply-adds (MAdd), as well as the number of\nparameters" + pub_date: { + seconds: 1515801600 + } + authors: "Mark Sandler" + authors: "Andrew Howard" + authors: "Menglong Zhu" + authors: "Andrey Zhmoginov" + authors: "Liang-Chieh Chen" + repositories: { + url: "https://github.com/espressif/esp-who" + framework: FRAMEWORK_OTHERS + number_of_stars: 1058 + description: "Face detection and recognition framework" + } + repositories: { + url: "https://github.com/Gideon0805/Tensorflow_Model_Pruning" + framework: FRAMEWORK_TENSORFLOW + description: "Pruning for TF1.5" + } + repositories: { + url: "https://github.com/akrapukhin/MobileNetV3" + framework: FRAMEWORK_PYTORCH + description: "An implementation of the MobileNetV3 models in Pytorch with scripts for training, testing and measuring latency." + } + repositories: { + url: "https://github.com/tensorflow/models/tree/master/research/deeplab" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 70341 + description: "Models and examples built with TensorFlow" + } + repositories: { + url: "https://github.com/pytorch/vision" + framework: FRAMEWORK_PYTORCH + number_of_stars: 9309 + description: "Datasets, Transforms and Models specific to Computer Vision" + } + repositories: { + url: "https://github.com/stevensmiley1989/MrRobot" + framework: FRAMEWORK_TENSORFLOW + description: "This is a robot I designed in Fusion 360 and 3D printed with my FlashForge Creator Pro in PLA, Main Hardware: 1 x Raspberry Pi 3b, 3 x Arduinos with I2C, 5 x ultrasonic sensors, 4 x 60Kg Servos, 4 x 12V 200rpm DC motors, 1 x stepper motor for loading ammo into custom built coil gun. The coil gun uses 2 x 450V 1000uF Capacitors in parallel with a boost converter, yielding 380V maximum charge discharge from a 12V input, firing with a 1.2kV maximum peak non-repetitive surge current 1.1kA rated Thyristor SCR, Main Software: Uses TensorFlow and Python for Object Detection with some C++ for motor controls. The model used is a retrained Single Shot Detection MobileNet V2 algorithm trained on a toy reindeer. Signal processing allows proportional controller feedback to adjust movement of the robot for moving, aiming, and shooting. An application for IOS was written in Swift to control the robot as well, using Mosquito MQTT Broker for communication. " + } + repositories: { + url: "https://github.com/d-li14/mobilenetv2.pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 396 + description: "72.8% MobileNetV2 1.0 model on ImageNet and a spectrum of pre-trained MobileNetV2 models" + } + repositories: { + url: "https://github.com/lpirola13/flower-recognizer" + framework: FRAMEWORK_TENSORFLOW + description: "This project aims to create a deep learning model suitable in a mobile context that can recognize flowers from images." + } + repositories: { + url: "https://github.com/clairehester/face-mask-detector" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "GA DSI Capstone project - Face Mask Detection using Computer Vision and Machine Learning" + } + repositories: { + url: "https://github.com/tensorflow/models/tree/master/research/slim" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 70341 + description: "Models and examples built with TensorFlow" + } + methods: { + name: "DeepLabv3" + full_name: "DeepLabv3" + description: "**DeepLabv3** is a semantic segmentation architecture that improves upon DeepLabv2 with several modifications. To handle the problem of segmenting objects at multiple scales, modules are designed which employ atrous convolution in cascade or in parallel to capture multi-scale context by adopting multiple atrous rates. Furthermore, the Atrous Spatial Pyramid Pooling module from DeepLabv2 augmented with image-level features encoding global context and further boost performance. \r\n\r\nThe changes to the ASSP module are that the authors apply global average pooling on the last feature map of the model, feed the resulting image-level features to a 1 × 1 convolution with 256 filters (and batch normalization), and then bilinearly upsample the feature to the desired spatial dimension. In the\r\nend, the improved ASPP consists of (a) one 1×1 convolution and three 3 × 3 convolutions with rates = (6, 12, 18) when output stride = 16 (all with 256 filters and batch normalization), and (b) the image-level features.\r\n\r\nAnother interesting difference is that DenseCRF post-processing from DeepLabv2 is no longer needed." + } + methods: { + name: "Dilated Convolution" + full_name: "Dilated Convolution" + description: "**Dilated Convolutions** are a type of convolution that “inflate” the kernel by inserting holes between the kernel elements. An additional parameter $l$ (dilation rate) indicates how much the kernel is widened. There are usually $l-1$ spaces inserted between kernel elements. \r\n\r\nNote that concept has existed in past literature under different names, for instance the *algorithme a trous*, an algorithm for wavelet decomposition (Holschneider et al., 1987; Shensa, 1992)." + } + methods: { + name: "ReLU6" + full_name: "ReLU6" + description: "**ReLU6** is a modification of the [rectified linear unit](https://paperswithcode.com/method/relu) where we limit the activation to a maximum size of $6$. This is due to increased robustness when used with low-precision computation.\r\n\r\nImage Credit: [PyTorch](https://pytorch.org/docs/master/generated/torch.nn.ReLU6.html)" + } + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + } + methods: { + name: "RMSProp" + full_name: "RMSProp" + description: "**RMSProp** is an unpublished adaptive learning rate optimizer [proposed by Geoff Hinton](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf). The motivation is that the magnitude of gradients can differ for different weights, and can change during learning, making it hard to choose a single global learning rate. RMSProp tackles this by keeping a moving average of the squared gradient and adjusting the weight updates by this magnitude. The gradient updates are performed as:\r\n\r\n$$E\\left[g^{2}\\right]\\_{t} = \\gamma E\\left[g^{2}\\right]\\_{t-1} + \\left(1 - \\gamma\\right) g^{2}\\_{t}$$\r\n\r\n$$\\theta\\_{t+1} = \\theta\\_{t} - \\frac{\\eta}{\\sqrt{E\\left[g^{2}\\right]\\_{t} + \\epsilon}}g\\_{t}$$\r\n\r\nHinton suggests $\\gamma=0.9$, with a good default for $\\eta$ as $0.001$.\r\n\r\nImage: [Alec Radford](https://twitter.com/alecrad)" + } + methods: { + name: "Depthwise Separable Convolution" + full_name: "Depthwise Separable Convolution" + description: "While [standard convolution](https://paperswithcode.com/method/convolution) performs the channelwise and spatial-wise computation in one step, **Depthwise Separable Convolution** splits the computation into two steps: depthwise convolution applies a single convolutional filter per each input channel and pointwise convolution is used to create a linear combination of the output of the depthwise convolution. The comparison of standard convolution and depthwise separable convolution is shown to the right.\r\n\r\nCredit: [Depthwise Convolution Is All You Need for Learning Multiple Visual Domains](https://paperswithcode.com/paper/depthwise-convolution-is-all-you-need-for)" + } + methods: { + name: "ASPP" + full_name: "Atrous Spatial Pyramid Pooling" + description: "**Atrous Spatial Pyramid Pooling (ASSP)** is a semantic segmentation module for resampling a given feature layer at multiple rates prior to convolution. This amounts to probing the original image with multiple filters that have complementary effective fields of view, thus capturing objects as well as useful image context at multiple scales. Rather than actually resampling features, the mapping is implemented using multiple parallel atrous convolutional layers with different sampling rates." + } + methods: { + name: "Depthwise Convolution" + full_name: "Depthwise Convolution" + description: "**Depthwise Convolution** is a type of convolution where we apply a single convolutional filter for each input channel. In the regular 2D [convolution](https://paperswithcode.com/method/convolution) performed over multiple input channels, the filter is as deep as the input and lets us freely mix channels to generate each element in the output. In contrast, depthwise convolutions keep each channel separate. To summarize the steps, we:\r\n\r\n1. Split the input and filter into channels.\r\n2. We convolve each input with the respective filter.\r\n3. We stack the convolved outputs together.\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" + } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "Pointwise Convolution" + full_name: "Pointwise Convolution" + description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" + } + } + video: { + video_id: "mT5Y-Zumbbw" + video_title: "PR-108: MobileNetV2: Inverted Residuals and Linear Bottlenecks" + number_of_likes: 67 + number_of_views: 8211 + published_date: { + seconds: 1540388729 + } + uploader: "JinWon Lee" + } + } +} +pr_id_to_video: { + key: 109 + value: { + pr_id: 109 + papers: { + paper_id: "large-scale-gan-training-for-high-fidelity" + title: "Large Scale GAN Training for High Fidelity Natural Image Synthesis" + arxiv_id: "1809.11096" + abstract: "Despite recent progress in generative image modeling, successfully generating\nhigh-resolution, diverse samples from complex datasets such as ImageNet remains\nan elusive goal. To this end, we train Generative Adversarial Networks at the\nlargest scale yet attempted, and study the instabilities specific to such\nscale. We find that applying orthogonal regularization to the generator renders\nit amenable to a simple \"truncation trick,\" allowing fine control over the\ntrade-off between sample fidelity and variety by reducing the variance of the\nGenerator's input. Our modifications lead to models which set the new state of\nthe art in class-conditional image synthesis. When trained on ImageNet at\n128x128 resolution, our models (BigGANs) achieve an Inception Score (IS) of\n166.5 and Frechet Inception Distance (FID) of 7.4, improving over the previous\nbest IS of 52.52 and FID of 18.6." + pub_date: { + seconds: 1538092800 + } + authors: "Andrew Brock" + authors: "Jeff Donahue" + authors: "Karen Simonyan" + repositories: { + url: "https://github.com/roberttwomey/machine-imagination-workshop" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "text to image notebook with CLIP for workshop on Machine Imagination, Spring 2021" + } + repositories: { + url: "https://github.com/notperquisites/bigsleep" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Personal Big Sleep AI Repo" + } + repositories: { + url: "https://github.com/lucidrains/big-sleep" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1148 + description: "A simple command line tool for text to image generation, using OpenAI's CLIP and a BigGAN. Technique was originally created by https://twitter.com/advadnoun" + } + repositories: { + url: "https://github.com/PacktPublishing/Hands-On-Image-Generation-with-TensorFlow-2.0/tree/master/Chapter08" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 62 + description: "Hands-On Image Generation with TensorFlow 2.0, published by Packt" + } + repositories: { + url: "https://github.com/yaxingwang/DeepI2I" + framework: FRAMEWORK_PYTORCH + number_of_stars: 18 + description: "Image-to-image translation, knowledge transfer" + } + repositories: { + url: "https://github.com/uoguelph-mlrg/instance_selection_for_gans" + framework: FRAMEWORK_PYTORCH + number_of_stars: 27 + description: "Official code repository for Instance Selection for GANs." + } + repositories: { + url: "https://github.com/minyoungg/pix2latent" + framework: FRAMEWORK_PYTORCH + number_of_stars: 151 + description: "Code for: Transforming and Projecting Images into Class-conditional Generative Networks" + } + repositories: { + url: "https://github.com/times2049/talkinghead" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/krisrjohnson/Realistic-Neural-Talking-Head-Models" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/amanjaiswal73892/changemypet" + framework: FRAMEWORK_PYTORCH + description: "Deep Learning Project" + } + methods: { + name: "GAN Hinge Loss" + full_name: "GAN Hinge Loss" + description: "The **GAN Hinge Loss** is a hinge loss based loss function for [generative adversarial networks](https://paperswithcode.com/methods/category/generative-adversarial-networks):\r\n\r\n$$ L\\_{D} = -\\mathbb{E}\\_{\\left(x, y\\right)\\sim{p}\\_{data}}\\left[\\min\\left(0, -1 + D\\left(x, y\\right)\\right)\\right] -\\mathbb{E}\\_{z\\sim{p\\_{z}}, y\\sim{p\\_{data}}}\\left[\\min\\left(0, -1 - D\\left(G\\left(z\\right), y\\right)\\right)\\right] $$\r\n\r\n$$ L\\_{G} = -\\mathbb{E}\\_{z\\sim{p\\_{z}}, y\\sim{p\\_{data}}}D\\left(G\\left(z\\right), y\\right) $$" + } + methods: { + name: "Feedforward Network" + full_name: "Feedforward Network" + description: "A **Feedforward Network**, or a **Multilayer Perceptron (MLP)**, is a neural network with solely densely connected layers. This is the classic neural network architecture of the literature. It consists of inputs $x$ passed through units $h$ (of which there can be many layers) to predict a target $y$. Activation functions are generally chosen to be non-linear to allow for flexible functional approximation.\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + } + methods: { + name: "Spectral Normalization" + full_name: "Spectral Normalization" + description: "**Spectral Normalization** is a normalization technique used for generative adversarial networks, used to stabilize training of the discriminator. Spectral normalization has the convenient property that the Lipschitz constant is the only hyper-parameter to be tuned.\r\n\r\nIt controls the Lipschitz constant of the discriminator $f$ by constraining the spectral norm of each layer $g : \\textbf{h}\\_{in} \\rightarrow \\textbf{h}_{out}$. The Lipschitz norm $\\Vert{g}\\Vert\\_{\\text{Lip}}$ is equal to $\\sup\\_{\\textbf{h}}\\sigma\\left(\\nabla{g}\\left(\\textbf{h}\\right)\\right)$, where $\\sigma\\left(a\\right)$ is the spectral norm of the matrix $A$ ($L\\_{2}$ matrix norm of $A$):\r\n\r\n$$ \\sigma\\left(a\\right) = \\max\\_{\\textbf{h}:\\textbf{h}\\neq{0}}\\frac{\\Vert{A\\textbf{h}}\\Vert\\_{2}}{\\Vert\\textbf{h}\\Vert\\_{2}} = \\max\\_{\\Vert\\textbf{h}\\Vert\\_{2}\\leq{1}}{\\Vert{A\\textbf{h}}\\Vert\\_{2}} $$\r\n\r\nwhich is equivalent to the largest singular value of $A$. Therefore for a linear layer $g\\left(\\textbf{h}\\right) = W\\textbf{h}$ the norm is given by $\\Vert{g}\\Vert\\_{\\text{Lip}} = \\sup\\_{\\textbf{h}}\\sigma\\left(\\nabla{g}\\left(\\textbf{h}\\right)\\right) = \\sup\\_{\\textbf{h}}\\sigma\\left(W\\right) = \\sigma\\left(W\\right) $. Spectral normalization normalizes the spectral norm of the weight matrix $W$ so it satisfies the Lipschitz constraint $\\sigma\\left(W\\right) = 1$:\r\n\r\n$$ \\bar{W}\\_{\\text{SN}}\\left(W\\right) = W / \\sigma\\left(W\\right) $$" + } + methods: { + name: "Residual Block" + full_name: "Residual Block" + description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." + } + methods: { + name: "Truncation Trick" + full_name: "Truncation Trick" + description: "The **Truncation Trick** is a latent sampling procedure for generative adversarial networks, where we sample $z$ from a truncated normal (where values which fall outside a range are resampled to fall inside that range). \r\nThe original implementation was in [Megapixel Size Image Creation with GAN](https://paperswithcode.com/paper/megapixel-size-image-creation-using).\r\nIn [BigGAN](http://paperswithcode.com/method/biggan), the authors find this provides a boost to the Inception Score and FID." + } + methods: { + name: "Adam" + full_name: "Adam" + description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + } + methods: { + name: "Off-Diagonal Orthogonal Regularization" + full_name: "Off-Diagonal Orthogonal Regularization" + description: "**Off-Diagonal Orthogonal Regularization** is a modified form of [orthogonal regularization](https://paperswithcode.com/method/orthogonal-regularization) originally used in [BigGAN](https://paperswithcode.com/method/biggan). The original orthogonal regularization is known to be limiting so the authors explore several variants designed to relax the constraint while still imparting the desired smoothness to the models. They opt for a modification where they remove diagonal terms from the regularization, and aim to minimize the pairwise cosine similarity between filters but does not constrain their norm:\r\n\r\n$$ R\\_{\\beta}\\left(W\\right) = \\beta|| W^{T}W \\odot \\left(\\mathbf{1}-I\\right) ||^{2}\\_{F} $$\r\n\r\nwhere $\\mathbf{1}$ denotes a matrix with all elements set to 1. The authors sweep $\\beta$ values and select $10^{−4}$." + } + methods: { + name: "Bottleneck Residual Block" + full_name: "Bottleneck Residual Block" + description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." + } + methods: { + name: "TTUR" + full_name: "Two Time-scale Update Rule" + description: "The **Two Time-scale Update Rule (TTUR)** is an update rule for generative adversarial networks trained with stochastic gradient descent. TTUR has an individual learning rate for both the discriminator and the generator. The main premise is that the discriminator converges to a local minimum when the generator is fixed. If the generator changes slowly enough, then the discriminator still converges, since the generator perturbations are small. Besides ensuring convergence, the performance may also improve since the discriminator must first learn new patterns before they are transferred to the generator. In contrast, a generator which is overly fast, drives the discriminator steadily into new regions without capturing its gathered information." + } + methods: { + name: "Projection Discriminator" + full_name: "Projection Discriminator" + description: "A **Projection Discriminator** is a type of discriminator for generative adversarial networks. It is motivated by a probabilistic model in which the distribution of the conditional variable $\\textbf{y}$ given $\\textbf{x}$ is discrete or uni-modal continuous distributions.\r\n\r\nIf we look at the original solution for the loss function $\\mathcal{L}\\_{D}$ in the vanilla GANs, we can decompose it into the sum of two log-likelihood ratios:\r\n\r\n$$ f^{*}\\left(\\mathbf{x}, \\mathbf{y}\\right) = \\log\\frac{q\\left(\\mathbf{x}\\mid{\\mathbf{y}}\\right)q\\left(\\mathbf{y}\\right)}{p\\left(\\mathbf{x}\\mid{\\mathbf{y}}\\right)p\\left(\\mathbf{y}\\right)} = \\log\\frac{q\\left(\\mathbf{y}\\mid{\\mathbf{x}}\\right)}{p\\left(\\mathbf{y}\\mid{\\mathbf{x}}\\right)} + \\log\\frac{q\\left(\\mathbf{x}\\right)}{p\\left(\\mathbf{x}\\right)} = r\\left(\\mathbf{y\\mid{x}}\\right) + r\\left(\\mathbf{x}\\right) $$\r\n\r\nWe can model the log likelihood ratio $r\\left(\\mathbf{y\\mid{x}}\\right)$ and $r\\left(\\mathbf{x}\\right)$ by some parametric functions $f\\_{1}$ and $f\\_{2}$ respectively. If we make a standing assumption that $p\\left(y\\mid{x}\\right)$ and $q\\left(y\\mid{x}\\right)$ are simple distributions like those that are Gaussian or discrete log linear on the feature space, then the parametrization of the following form becomes natural:\r\n\r\n$$ f\\left(\\mathbf{x}, \\mathbf{y}; \\theta\\right) = f\\_{1}\\left(\\mathbf{x}, \\mathbf{y}; \\theta\\right) + f\\_{2}\\left(\\mathbf{x}; \\theta\\right) = \\mathbf{y}^{T}V\\phi\\left(\\mathbf{x}; \\theta\\_{\\phi}\\right) + \\psi\\left(\\phi(\\mathbf{x}; \\theta\\_{\\phi}); \\theta\\_{\\psi}\\right) $$\r\n\r\nwhere $V$ is the embedding matrix of $y$, $\\phi\\left(·, \\theta\\_{\\phi}\\right)$ is a vector output function of $x$, and $\\psi\\left(·, \\theta\\_{\\psi}\\right)$ is a scalar function of the same $\\phi\\left(\\mathbf{x}; \\theta\\_{\\phi}\\right)$ that appears in $f\\_{1}$. The learned parameters $\\theta = ${$V, \\theta\\_{\\phi}, \\theta\\_{\\psi}$} are trained to optimize the adversarial loss. This model of the discriminator is the projection." + } + } + video: { + video_id: "1f0faOeqDQ0" + video_title: "PR-109: Large Scale GAN Training for High Fidelity Natural Image Synthesis" + number_of_likes: 12 + number_of_views: 1207 + published_date: { + seconds: 1539797131 + } + uploader: "Young Seok Kim" + } + } +} +pr_id_to_video: { + key: 110 + value: { + pr_id: 110 + papers: { + paper_id: "an-analysis-of-scale-invariance-in-object-1" + title: "An Analysis of Scale Invariance in Object Detection - SNIP" + arxiv_id: "1711.08189" + abstract: "An analysis of different techniques for recognizing and detecting objects\nunder extreme scale variation is presented. Scale specific and scale invariant\ndesign of detectors are compared by training them with different configurations\nof input data. By evaluating the performance of different network architectures\nfor classifying small objects on ImageNet, we show that CNNs are not robust to\nchanges in scale. Based on this analysis, we propose to train and test\ndetectors on the same scales of an image-pyramid. Since small and large objects\nare difficult to recognize at smaller and larger scales respectively, we\npresent a novel training scheme called Scale Normalization for Image Pyramids\n(SNIP) which selectively back-propagates the gradients of object instances of\ndifferent sizes as a function of the image scale. On the COCO dataset, our\nsingle model performance is 45.7% and an ensemble of 3 networks obtains an mAP\nof 48.3%. We use off-the-shelf ImageNet-1000 pre-trained models and only train\nwith bounding box supervision. Our submission won the Best Student Entry in the\nCOCO 2017 challenge. Code will be made available at\n\\url{http://bit.ly/2yXVg4c}." + pub_date: { + seconds: 1511308800 + } + authors: "Bharat Singh" + authors: "Larry S. Davis" + methods: { + name: "ResNet" + full_name: "Residual Network" + description: "**Residual Networks**, or **ResNets**, learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. Instead of hoping each few stacked layers directly fit a desired underlying mapping, residual nets let these layers fit a residual mapping. They stack [residual blocks](https://paperswithcode.com/method/residual-block) ontop of each other to form network: e.g. a ResNet-50 has fifty layers using these blocks. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}(x)$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}(x):=\\mathcal{H}(x)-x$. The original mapping is recast into $\\mathcal{F}(x)+x$.\r\n\r\nThere is empirical evidence that these types of network are easier to optimize, and can gain accuracy from considerably increased depth." + } + methods: { + name: "DPN" + full_name: "Dual Path Network" + description: "A **Dual Path Network (DPN)** is a convolutional neural network which presents a new topology of connection paths internally. The intuition is that [ResNets](https://paperswithcode.com/method/resnet) enables feature re-usage while DenseNet enables new feature exploration, and both are important for learning good representations. To enjoy the benefits from both path topologies, Dual Path Networks share common features while maintaining the flexibility to explore new features through dual path architectures. \r\n\r\nWe formulate such a dual path architecture as follows:\r\n\r\n$$x^{k} = \\sum\\limits\\_{t=1}^{k-1} f\\_t^{k}(h^t) \\text{,} $$\r\n\r\n$$\r\ny^{k} = \\sum\\limits\\_{t=1}^{k-1} v\\_t(h^t) = y^{k-1} + \\phi^{k-1}(y^{k-1}) \\text{,} \\\\\\\\\r\n$$\r\n\r\n$$\r\nr^{k} = x^{k} + y^{k} \\text{,} \\\\\\\\\r\n$$\r\n\r\n$$\r\nh^k = g^k \\left( r^{k} \\right) \\text{,}\r\n$$\r\n\r\nwhere $x^{k}$ and $y^{k}$ denote the extracted information at $k$-th step from individual path, $v_t(\\cdot)$ is a feature learning function as $f_t^k(\\cdot)$. The first equation refers to the densely connected path that enables exploring new features. The second equation refers to the residual path that enables common features re-usage. The third equation defines the dual path that integrates them and feeds them to the last transformation function in the last equation." + } + methods: { + name: "RPN" + full_name: "Region Proposal Network" + description: "A **Region Proposal Network**, or **RPN**, is a fully convolutional network that simultaneously predicts object bounds and objectness scores at each position. The RPN is trained end-to-end to generate high-quality region proposals. RPN and algorithms like Fast R-CNN can be merged into a single network by sharing their convolutional features - using the recently popular terminology of neural networks with attention mechanisms, the RPN component tells the unified network where to look.\r\n\r\nRPNs are designed to efficiently predict region proposals with a wide range of scales and aspect ratios. RPNs use anchor boxes that serve as references at multiple scales and aspect ratios. The scheme can be thought of as a pyramid of regression references, which avoids enumerating images or filters of multiple scales or aspect ratios." + } + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "Concatenated Skip Connection" + full_name: "Concatenated Skip Connection" + description: "A **Concatenated Skip Connection** is a type of skip connection that seeks to reuse features by concatenating them to new layers, allowing more information to be retained from previous layers of the network. This contrasts with say, residual connections, where element-wise summation is used instead to incorporate information from previous layers. This type of skip connection is prominently used in DenseNets (and also Inception networks), which the Figure to the right illustrates." + } + methods: { + name: "Position-Sensitive RoI Pooling" + full_name: "Position-Sensitive RoI Pooling" + description: "**Position-Sensitive RoI Pooling layer** aggregates the outputs of the last convolutional layer and generates scores for each RoI. Unlike [RoI Pooling](https://paperswithcode.com/method/roi-pooling), PS RoI Pooling conducts selective pooling, and each of the $k$ × $k$ bin aggregates responses from only one score map out of the bank of $k$ × $k$ score maps. With end-to-end training, this RoI layer shepherds the last convolutional layer to learn specialized position-sensitive score maps." + } + methods: { + name: "Deformable Convolution" + full_name: "Deformable Convolution" + description: "**Deformable convolutions** add 2D offsets to the regular grid sampling locations in the standard convolution. It enables free form deformation of the sampling grid. The offsets are learned from the preceding feature maps, via additional convolutional layers. Thus, the deformation is conditioned on the input features in a local, dense, and adaptive manner." + } + methods: { + name: "Grouped Convolution" + full_name: "Grouped Convolution" + description: "A **Grouped Convolution** uses a group of convolutions - multiple kernels per layer - resulting in multiple channel outputs per layer. This leads to wider networks helping a network learn a varied set of low level and high level features. The original motivation of using Grouped Convolutions in [AlexNet](https://paperswithcode.com/method/alexnet) was to distribute the model over multiple GPUs as an engineering compromise. But later, with models such as [ResNeXt](https://paperswithcode.com/method/alexnet), it was shown this module could be used to improve classification accuracy. Specifically by exposing a new dimension through grouped convolutions, *cardinality* (the size of set of transformations), we can increase accuracy by increasing it." + } + } + video: { + video_id: "nimHWHxjBJ8" + video_title: "PR-110: An Analysis of Scale Invariance in Object Detection – SNIP" + number_of_likes: 14 + number_of_views: 1225 + published_date: { + seconds: 1540590955 + } + uploader: "visionNoob" + } + } +} +pr_id_to_video: { + key: 111 + value: { + pr_id: 111 + papers: { + paper_id: "eva2-exploiting-temporal-redundancy-in-live" + title: "EVA$^2$: Exploiting Temporal Redundancy in Live Computer Vision" + arxiv_id: "1803.06312" + abstract: "Hardware support for deep convolutional neural networks (CNNs) is critical to\nadvanced computer vision in mobile and embedded devices. Current designs,\nhowever, accelerate generic CNNs; they do not exploit the unique\ncharacteristics of real-time vision. We propose to use the temporal redundancy\nin natural video to avoid unnecessary computation on most frames. A new\nalgorithm, activation motion compensation, detects changes in the visual input\nand incrementally updates a previously-computed output. The technique takes\ninspiration from video compression and applies well-known motion estimation\ntechniques to adapt to visual changes. We use an adaptive key frame rate to\ncontrol the trade-off between efficiency and vision quality as the input\nchanges. We implement the technique in hardware as an extension to existing\nstate-of-the-art CNN accelerator designs. The new unit reduces the average\nenergy per frame by 54.2%, 61.7%, and 87.6% for three CNNs with less than 1%\nloss in vision accuracy." + pub_date: { + seconds: 1521158400 + } + authors: "Mark Buckler" + authors: "Philip Bedoukian" + authors: "Suren Jayasuriya" + authors: "Adrian Sampson" + } + video: { + video_id: "uwRz7PjVtB0" + video_title: "PR-111: EVA2:Exploiting Temporal Redundancy in Live Computer Vision" + number_of_likes: 23 + number_of_views: 798 + published_date: { + seconds: 1540137553 + } + uploader: "Soyeon Kim" + } + } +} +pr_id_to_video: { + key: 112 + value: { + pr_id: 112 + papers: { + paper_id: "a-tutorial-on-independent-component-analysis" + title: "A Tutorial on Independent Component Analysis" + arxiv_id: "1404.2986" + abstract: "Independent component analysis (ICA) has become a standard data analysis\ntechnique applied to an array of problems in signal processing and machine\nlearning. This tutorial provides an introduction to ICA based on linear algebra\nformulating an intuition for ICA from first principles. The goal of this\ntutorial is to provide a solid foundation on this advanced topic so that one\nmight learn the motivation behind ICA, learn why and when to apply this\ntechnique and in the process gain an introduction to this exciting field of\nactive research." + pub_date: { + seconds: 1397174400 + } + authors: "Jonathon Shlens" + repositories: { + url: "https://github.com/VU-BEAM-Lab/ADMIRE" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } + repositories: { + url: "https://github.com/bhaskar-agrawal/Independent-component-analysis" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/petteriTeikari/mixedImageSeparation" + framework: FRAMEWORK_OTHERS + number_of_stars: 3 + description: "with FastICA (and icasso for robustness)" + } + methods: { + name: "ICA" + full_name: "Independent Component Analysis" + description: "_**Independent component analysis** (ICA) is a statistical and computational technique for revealing hidden factors that underlie sets of random variables, measurements, or signals._\r\n\r\n_ICA defines a generative model for the observed multivariate data, which is typically given as a large database of samples. In the model, the data variables are assumed to be linear mixtures of some unknown latent variables, and the mixing system is also unknown. The latent variables are assumed nongaussian and mutually independent, and they are called the independent components of the observed data. These independent components, also called sources or factors, can be found by ICA._\r\n\r\n_ICA is superficially related to principal component analysis and factor analysis. ICA is a much more powerful technique, however, capable of finding the underlying factors or sources when these classic methods fail completely._\r\n\r\n\r\nExtracted from (https://www.cs.helsinki.fi/u/ahyvarin/whatisica.shtml)\r\n\r\n**Source papers**:\r\n\r\n[Blind separation of sources, part I: An adaptive algorithm based on neuromimetic architecture](https://doi.org/10.1016/0165-1684(91)90079-X)\r\n\r\n[Independent component analysis, A new concept?](https://doi.org/10.1016/0165-1684(94)90029-9)\r\n\r\n[Independent component analysis: algorithms and applications](https://doi.org/10.1016/S0893-6080(00)00026-5)" + } + } + video: { + video_id: "mLSPA76qSuU" + } + } +} +pr_id_to_video: { + key: 113 + value: { + pr_id: 113 + papers: { + paper_id: "the-perception-distortion-tradeoff" + title: "The Perception-Distortion Tradeoff" + arxiv_id: "1711.06077" + abstract: "Image restoration algorithms are typically evaluated by some distortion measure (e.g. PSNR, SSIM, IFC, VIF) or by human opinion scores that quantify perceived perceptual quality. In this paper, we prove mathematically that distortion and perceptual quality are at odds with each other. Specifically, we study the optimal probability for correctly discriminating the outputs of an image restoration algorithm from real images. We show that as the mean distortion decreases, this probability must increase (indicating worse perceptual quality). As opposed to the common belief, this result holds true for any distortion measure, and is not only a problem of the PSNR or SSIM criteria. We also show that generative-adversarial-nets (GANs) provide a principled way to approach the perception-distortion bound. This constitutes theoretical support to their observed success in low-level vision tasks. Based on our analysis, we propose a new methodology for evaluating image restoration methods, and use it to perform an extensive comparison between recent super-resolution algorithms." + pub_date: { + seconds: 1510790400 + } + authors: "Yochai Blau" + authors: "Tomer Michaeli" + repositories: { + url: "https://github.com/roimehrez/PIRM2018" + framework: FRAMEWORK_OTHERS + number_of_stars: 186 + description: "Workshop and Challenge on Perceptual Image Restoration and Manipulation" + } + } + video: { + video_id: "6Yid4dituqo" + video_title: "PR-113: The Perception Distortion Tradeoff" + number_of_likes: 16 + number_of_views: 1364 + published_date: { + seconds: 1540734798 + } + uploader: "Taeoh Kim" + } + } +} +pr_id_to_video: { + key: 114 + value: { + pr_id: 114 + papers: { + paper_id: "recycle-gan-unsupervised-video-retargeting" + title: "Recycle-GAN: Unsupervised Video Retargeting" + arxiv_id: "1808.05174" + abstract: "We introduce a data-driven approach for unsupervised video retargeting that\ntranslates content from one domain to another while preserving the style native\nto a domain, i.e., if contents of John Oliver's speech were to be transferred\nto Stephen Colbert, then the generated content/speech should be in Stephen\nColbert's style. Our approach combines both spatial and temporal information\nalong with adversarial losses for content translation and style preservation.\nIn this work, we first study the advantages of using spatiotemporal constraints\nover spatial constraints for effective retargeting. We then demonstrate the\nproposed approach for the problems where information in both space and time\nmatters such as face-to-face translation, flower-to-flower, wind and cloud\nsynthesis, sunrise and sunset." + pub_date: { + seconds: 1534291200 + } + authors: "Aayush Bansal" + authors: "Shugao Ma" + authors: "Deva Ramanan" + authors: "Yaser Sheikh" + repositories: { + url: "https://github.com/aayushbansal/Recycle-GAN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 375 + description: "Unsupervised Video Retargeting (e.g. face to face, flower to flower, clouds and winds, sunrise and sunset)" + } + } + video: { + video_id: "eMZXUqmp_PU" + video_title: "PR-114: Recycle-GAN, Unsupervised Video Retargeting" + number_of_views: 1172 + published_date: { + seconds: 1540738223 + } + uploader: "Byung-Hak Kim" + } + } +} +pr_id_to_video: { + key: 115 + value: { + pr_id: 115 + papers: { + paper_id: "unsupervised-anomaly-detection-with" + title: "Unsupervised Anomaly Detection with Generative Adversarial Networks to Guide Marker Discovery" + arxiv_id: "1703.05921" + abstract: "Obtaining models that capture imaging markers relevant for disease\nprogression and treatment monitoring is challenging. Models are typically based\non large amounts of data with annotated examples of known markers aiming at\nautomating detection. High annotation effort and the limitation to a vocabulary\nof known markers limit the power of such approaches. Here, we perform\nunsupervised learning to identify anomalies in imaging data as candidates for\nmarkers. We propose AnoGAN, a deep convolutional generative adversarial network\nto learn a manifold of normal anatomical variability, accompanying a novel\nanomaly scoring scheme based on the mapping from image space to a latent space.\nApplied to new data, the model labels anomalies, and scores image patches\nindicating their fit into the learned distribution. Results on optical\ncoherence tomography images of the retina demonstrate that the approach\ncorrectly identifies anomalous images, such as images containing retinal fluid\nor hyperreflective foci." + pub_date: { + seconds: 1489708800 + } + authors: "Thomas Schlegl" + authors: "Philipp Seeböck" + authors: "Sebastian M. Waldstein" + authors: "Ursula Schmidt-Erfurth" + authors: "Georg Langs" + repositories: { + url: "https://github.com/YeongHyeon/f-AnoGAN-TF" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "TensorFlow implementation of f-AnoGAN (with MNIST dataset)" + } + repositories: { + url: "https://github.com/xtarx/Unsupervised-Anomaly-Detection-with-Generative-Adversarial-Networks" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 93 + description: "Unsupervised Anomaly Detection with Generative Adversarial Networks on MIAS dataset" + } + repositories: { + url: "https://github.com/NMADALI97/Learning-With-Wasserstein-Loss" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/seungjunlee96/AnoGAN-pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + description: "Pytorch implementation of \"Unsupervised Anomaly Detection with Generative Adversarial Networks to Guide Marker Discovery\"" + } + repositories: { + url: "https://github.com/mullue/anogan-mnist" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/fuchami/ANOGAN" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 8 + description: "anomaly detection using GAN" + } + repositories: { + url: "https://github.com/kosyoshida/simple-keras" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } + repositories: { + url: "https://github.com/LeeDoYup/AnoGAN" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 228 + description: "Unofficial Tensorflow Implementation of AnoGAN (Anomaly GAN)" + } + repositories: { + url: "https://github.com/tkwoo/anogan-keras" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 164 + description: "Unsupervised anomaly detection with generative model, keras implementation" + } + repositories: { + url: "https://github.com/Dai7Igarashi/Anomaly-Detection" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 3 + } + } + video: { + video_id: "R0H0gqtnMyA" + video_title: "PR-115: Unsupervised Anomaly Detection with Generative Adversarial Networks" + number_of_likes: 35 + number_of_views: 3272 + published_date: { + seconds: 1541343064 + } + uploader: "MinGuk Kang" + } + } +} +pr_id_to_video: { + key: 116 + value: { + pr_id: 116 + papers: { + paper_id: "glow-generative-flow-with-invertible-1x1" + title: "Glow: Generative Flow with Invertible 1x1 Convolutions" + arxiv_id: "1807.03039" + abstract: "Flow-based generative models (Dinh et al., 2014) are conceptually attractive\ndue to tractability of the exact log-likelihood, tractability of exact\nlatent-variable inference, and parallelizability of both training and\nsynthesis. In this paper we propose Glow, a simple type of generative flow\nusing an invertible 1x1 convolution. Using our method we demonstrate a\nsignificant improvement in log-likelihood on standard benchmarks. Perhaps most\nstrikingly, we demonstrate that a generative model optimized towards the plain\nlog-likelihood objective is capable of efficient realistic-looking synthesis\nand manipulation of large images. The code for our model is available at\nhttps://github.com/openai/glow" + pub_date: { + seconds: 1531094400 + } + authors: "Diederik P. Kingma" + authors: "Prafulla Dhariwal" + repositories: { + url: "https://github.com/Naagar/Glow_NormalizingFlow_implimentation" + framework: FRAMEWORK_PYTORCH + description: "pyTorch implimentation of the Glow paper and Reimplementations of density estimation algorithms" + } + repositories: { + url: "https://github.com/Zhangyanbo/iResNetLab" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "A python/pytorch package for invertible neural networks" + } + repositories: { + url: "https://github.com/Daniel-H-99/CRD" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/simonwestberg/Glow" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "A replication of \"Glow: Generative Flow with Invertible 1×1 Convolutions\" and an investigation of its performance on Out-of-Distribution detection " + } + repositories: { + url: "https://github.com/simonwestberg/DD2412-Glow" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "A replication of \"Glow: Generative Flow with Invertible 1×1 Convolutions\" and an investigation of its performance on Out-of-Distribution detection " + } + repositories: { + url: "https://github.com/samuelmat19/GLOW-tf2" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + description: "Glow: Generative Flow with Invertible 1x1 Convolutions in Tensorflow 2" + } + repositories: { + url: "https://github.com/L0SG/NanoFlow" + framework: FRAMEWORK_PYTORCH + number_of_stars: 60 + description: "PyTorch implementation of the paper \"NanoFlow: Scalable Normalizing Flows with Sublinear Parameter Complexity.\"" + } + repositories: { + url: "https://github.com/eyalbetzalel/GLOW2" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/ClaraBing/flow" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/rhychen/Glow" + framework: FRAMEWORK_PYTORCH + } + methods: { + name: "GLOW" + full_name: "GLOW" + description: "**GLOW** is a type of flow-based generative model that is based on an invertible $1 \\times 1$ convolution. This builds on the flows introduced by [NICE](https://paperswithcode.com/method/nice) and [RealNVP](https://paperswithcode.com/method/realnvp). It consists of a series of steps of flow, combined in a multi-scale architecture; see the Figure to the right. Each step of flow consists of Act Normalization followed by an *invertible $1 \\times 1$ convolution* followed by an affine coupling layer." + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Normalizing Flows" + full_name: "Normalizing Flows" + description: "**Normalizing Flows** are a method for constructing complex distributions by transforming a\r\nprobability density through a series of invertible mappings. By repeatedly applying the rule for change of variables, the initial density ‘flows’ through the sequence of invertible mappings. At the end of this sequence we obtain a valid probability distribution and hence this type of flow is referred to as a normalizing flow.\r\n\r\nIn the case of finite flows, the basic rule for the transformation of densities considers an invertible, smooth mapping $f : \\mathbb{R}^{d} \\rightarrow \\mathbb{R}^{d}$ with inverse $f^{-1} = g$, i.e. the composition $g \\cdot f\\left(z\\right) = z$. If we use this mapping to transform a random variable $z$ with distribution $q\\left(z\\right)$, the resulting random variable $z' = f\\left(z\\right)$ has a distribution:\r\n\r\n$$ q\\left(\\mathbf{z}'\\right) = q\\left(\\mathbf{z}\\right)\\bigl\\vert{\\text{det}}\\frac{\\delta{f}^{-1}}{\\delta{\\mathbf{z'}}}\\bigr\\vert = q\\left(\\mathbf{z}\\right)\\bigl\\vert{\\text{det}}\\frac{\\delta{f}}{\\delta{\\mathbf{z}}}\\bigr\\vert ^{-1} $$\r\n\x0c\r\nwhere the last equality can be seen by applying the chain rule (inverse function theorem) and is a property of Jacobians of invertible functions. We can construct arbitrarily complex densities by composing several simple maps and successively applying the above equation. The density $q\\_{K}\\left(\\mathbf{z}\\right)$ obtained by successively transforming a random variable $z\\_{0}$ with distribution $q\\_{0}$ through a chain of $K$ transformations $f\\_{k}$ is:\r\n\r\n$$ z\\_{K} = f\\_{K} \\cdot \\dots \\cdot f\\_{2} \\cdot f\\_{1}\\left(z\\_{0}\\right) $$\r\n\r\n$$ \\ln{q}\\_{K}\\left(z\\_{K}\\right) = \\ln{q}\\_{0}\\left(z\\_{0}\\right) − \\sum^{K}\\_{k=1}\\ln\\vert\\det\\frac{\\delta{f\\_{k}}}{\\delta{\\mathbf{z\\_{k-1}}}}\\vert $$\r\n\x0c\r\nThe path traversed by the random variables $z\\_{k} = f\\_{k}\\left(z\\_{k-1}\\right)$ with initial distribution $q\\_{0}\\left(z\\_{0}\\right)$ is called the flow and the path formed by the successive distributions $q\\_{k}$ is a normalizing flow." + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + } + methods: { + name: "Affine Coupling" + full_name: "Affine Coupling" + description: "**Affine Coupling** is a method for implementing a normalizing flow (where we stack a sequence of invertible bijective transformation functions). Affine coupling is one of these bijective transformation functions. Specifically, it is an example of a reversible transformation where the forward function, the reverse function and the log-determinant are computationally efficient. For the forward function, we split the input dimension into two parts:\r\n\r\n$$ \\mathbf{x}\\_{a}, \\mathbf{x}\\_{b} = \\text{split}\\left(\\mathbf{x}\\right) $$\r\n\r\nThe second part stays the same $\\mathbf{x}\\_{b} = \\mathbf{y}\\_{b}$, while the first part $\\mathbf{x}\\_{a}$ undergoes an affine transformation, where the parameters for this transformation are learnt using the second part $\\mathbf{x}\\_{b}$ being put through a neural network. Together we have:\r\n\r\n$$ \\left(\\log{\\mathbf{s}, \\mathbf{t}}\\right) = \\text{NN}\\left(\\mathbf{x}\\_{b}\\right) $$\r\n\r\n$$ \\mathbf{s} = \\exp\\left(\\log{\\mathbf{s}}\\right) $$\r\n\r\n$$ \\mathbf{y}\\_{a} = \\mathbf{s} \\odot \\mathbf{x}\\_{a} + \\mathbf{t} $$\r\n\r\n$$ \\mathbf{y}\\_{b} = \\mathbf{x}\\_{b} $$\r\n\r\n$$ \\mathbf{y} = \\text{concat}\\left(\\mathbf{y}\\_{a}, \\mathbf{y}\\_{b}\\right) $$\r\n\r\nImage: [GLOW](https://arxiv.org/pdf/1807.03039.pdf)" + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + methods: { + name: "Adam" + full_name: "Adam" + description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + } + methods: { + name: "Invertible 1x1 Convolution" + full_name: "Invertible 1x1 Convolution" + description: "The **Invertible 1x1 Convolution** is a type of convolution used in flow-based generative models that reverses the ordering of channels. The weight matrix is initialized as a random rotation matrix. The log-determinant of an invertible 1 × 1 convolution of a $h \\times w \\times c$ tensor $h$ with $c \\times c$ weight matrix $\\mathbf{W}$ is straightforward to compute:\r\n\r\n$$ \\log | \\text{det}\\left(\\frac{d\\text{conv2D}\\left(\\mathbf{h};\\mathbf{W}\\right)}{d\\mathbf{h}}\\right) | = h \\cdot w \\cdot \\log | \\text{det}\\left(\\mathbf{W}\\right) | $$" + } + methods: { + name: "Activation Normalization" + full_name: "Activation Normalization" + description: "**Activation Normalization** is a type of normalization used for flow-based generative models; specifically it was introduced in the [GLOW](https://paperswithcode.com/method/glow) architecture. An ActNorm layer performs an affine transformation of the activations using a scale and bias parameter per channel, similar to batch normalization. These parameters are initialized such that the post-actnorm activations per-channel have zero mean and unit variance given an initial minibatch of data. This is a form of data dependent initilization. After initialization, the scale and bias are treated as regular trainable parameters that are independent of the data." + } + } + video: { + video_id: "6OVH1i2BVAE" + video_title: "PR-116: Glow: Generative Flow with Invertible 1x1 Convolutions" + number_of_likes: 21 + number_of_views: 2795 + published_date: { + seconds: 1541342135 + } + uploader: "taesu" + } + } +} +pr_id_to_video: { + key: 117 + value: { + pr_id: 117 + papers: { + paper_id: "peernets-exploiting-peer-wisdom-against" + title: "PeerNets: Exploiting Peer Wisdom Against Adversarial Attacks" + arxiv_id: "1806.00088" + abstract: "Deep learning systems have become ubiquitous in many aspects of our lives.\nUnfortunately, it has been shown that such systems are vulnerable to\nadversarial attacks, making them prone to potential unlawful uses. Designing\ndeep neural networks that are robust to adversarial attacks is a fundamental\nstep in making such systems safer and deployable in a broader variety of\napplications (e.g. autonomous driving), but more importantly is a necessary\nstep to design novel and more advanced architectures built on new computational\nparadigms rather than marginally building on the existing ones. In this paper\nwe introduce PeerNets, a novel family of convolutional networks alternating\nclassical Euclidean convolutions with graph convolutions to harness information\nfrom a graph of peer samples. This results in a form of non-local forward\npropagation in the model, where latent features are conditioned on the global\nstructure induced by the graph, that is up to 3 times more robust to a variety\nof white- and black-box adversarial attacks compared to conventional\narchitectures with almost no drop in accuracy." + pub_date: { + seconds: 1527724800 + } + authors: "Jan Svoboda" + authors: "Jonathan Masci" + authors: "Federico Monti" + authors: "Michael M. Bronstein" + authors: "Leonidas Guibas" + repositories: { + url: "https://github.com/tantara/PeerNets-pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 14 + description: "A pytorch implementation of 'PeerNets: Exploiting Peer Wisdom Against Adversarial Attacks'" + } + } + video: { + video_id: "VQsG_Yk9KuQ" + video_title: "PR-117: PeerNets: Exploiting Peer Wisdom Against Adversarial Attacks" + number_of_likes: 5 + number_of_views: 769 + published_date: { + seconds: 1542016335 + } + uploader: "Taekmin Kim" + } + } +} +pr_id_to_video: { + key: 118 + value: { + pr_id: 118 + papers: { + paper_id: "black-box-adversarial-attacks-with-limited" + title: "Black-box Adversarial Attacks with Limited Queries and Information" + arxiv_id: "1804.08598" + abstract: "Current neural network-based classifiers are susceptible to adversarial\nexamples even in the black-box setting, where the attacker only has query\naccess to the model. In practice, the threat model for real-world systems is\noften more restrictive than the typical black-box model where the adversary can\nobserve the full output of the network on arbitrarily many chosen inputs. We\ndefine three realistic threat models that more accurately characterize many\nreal-world classifiers: the query-limited setting, the partial-information\nsetting, and the label-only setting. We develop new attacks that fool\nclassifiers under these more restrictive threat models, where previous methods\nwould be impractical or ineffective. We demonstrate that our methods are\neffective against an ImageNet classifier under our proposed threat models. We\nalso demonstrate a targeted black-box attack against a commercial classifier,\novercoming the challenges of limited query access, partial information, and\nother practical issues to break the Google Cloud Vision API." + pub_date: { + seconds: 1524441600 + } + authors: "Andrew Ilyas" + authors: "Logan Engstrom" + authors: "Anish Athalye" + authors: "Jessy Lin" + repositories: { + is_official: true + url: "https://github.com/labsix/limited-blackbox-attacks" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 140 + description: "Code for \"Black-box Adversarial Attacks with Limited Queries and Information\" (http://arxiv.org/abs/1804.08598)" + } + repositories: { + url: "https://github.com/mllab-adv-attack/lazy-attack" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + } + } + video: { + video_id: "AMPpOFtg3Q4" + video_title: "PR-118: Black-Box Attacks with Limited Queries and Information" + number_of_likes: 2 + number_of_views: 419 + published_date: { + seconds: 1541943972 + } + uploader: "Jinsung Yoon" + } + } +} +pr_id_to_video: { + key: 119 + value: { + pr_id: 119 + papers: { + paper_id: "active-learning-for-convolutional-neural" + title: "Active Learning for Convolutional Neural Networks: A Core-Set Approach" + arxiv_id: "1708.00489" + abstract: "Convolutional neural networks (CNNs) have been successfully applied to many\nrecognition and learning tasks using a universal recipe; training a deep model\non a very large dataset of supervised examples. However, this approach is\nrather restrictive in practice since collecting a large set of labeled images\nis very expensive. One way to ease this problem is coming up with smart ways\nfor choosing images to be labelled from a very large collection (ie. active\nlearning).\n Our empirical study suggests that many of the active learning heuristics in\nthe literature are not effective when applied to CNNs in batch setting.\nInspired by these limitations, we define the problem of active learning as\ncore-set selection, ie. choosing set of points such that a model learned over\nthe selected subset is competitive for the remaining data points. We further\npresent a theoretical result characterizing the performance of any selected\nsubset using the geometry of the datapoints. As an active learning algorithm,\nwe choose the subset which is expected to yield best result according to our\ncharacterization. Our experiments show that the proposed method significantly\noutperforms existing approaches in image classification experiments by a large\nmargin." + pub_date: { + seconds: 1501545600 + } + authors: "Ozan Sener" + authors: "Silvio Savarese" + repositories: { + url: "https://github.com/razvancaramalau/Visual-Transformer-for-Task-aware-Active-Learning" + framework: FRAMEWORK_PYTORCH + number_of_stars: 8 + } + repositories: { + url: "https://github.com/razvancaramalau/Sequential-GCN-for-Active-Learning" + framework: FRAMEWORK_PYTORCH + number_of_stars: 22 + } + repositories: { + url: "https://github.com/rpinsler/active-bayesian-coresets" + framework: FRAMEWORK_PYTORCH + number_of_stars: 25 + } + methods: { + name: "Coresets" + full_name: "Coresets" + } + } + video: { + video_id: "3ROQis3hxPs" + video_title: "PR-119: Active Learning For Convolutional Neural Networks: A Core-Set Approach" + number_of_likes: 22 + number_of_views: 1891 + published_date: { + seconds: 1543402308 + } + uploader: "정지원" + } + } +} +pr_id_to_video: { + key: 120 + value: { + pr_id: 120 + papers: { + paper_id: "shufflenet-v2-practical-guidelines-for" + title: "ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" + arxiv_id: "1807.11164" + abstract: "Datasets, Transforms and Models specific to Computer Vision" + pub_date: { + seconds: 1532908800 + } + authors: "Ningning Ma" + authors: "Xiangyu Zhang" + authors: "Hai-Tao Zheng" + authors: "Jian Sun" + repositories: { + url: "https://github.com/pytorch/vision" + framework: FRAMEWORK_PYTORCH + number_of_stars: 9309 + description: "Datasets, Transforms and Models specific to Computer Vision" + } + repositories: { + url: "https://github.com/PaddlePaddle/PaddleSeg" + framework: FRAMEWORK_OTHERS + number_of_stars: 1482 + description: "End-to-end image segmentation kit based on PaddlePaddle. " + } + repositories: { + url: "https://github.com/PaddlePaddle/PaddleClas" + framework: FRAMEWORK_OTHERS + number_of_stars: 1547 + description: "A treasure chest for visual recognition powered by PaddlePaddle" + } + repositories: { + url: "https://github.com/allen108108/Model-Optimizer_Implementation" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "Training different model on MNIST datadet to see their performance" + } + repositories: { + url: "https://github.com/zjZSTU/LightWeightCNN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "轻量化卷积神经网络实现(SqueezeNet/MobileNet/ShuffleNet/MnasNet)" + } + repositories: { + url: "https://github.com/ba-san/MobilePose-Pi" + framework: FRAMEWORK_PYTORCH + number_of_stars: 15 + description: "MobilePose deployment for Raspberry Pi" + } + repositories: { + url: "https://github.com/forcefulowl/image_classification" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/allenai/dnw" + framework: FRAMEWORK_PYTORCH + number_of_stars: 132 + description: "Discovering Neural Wirings (https://arxiv.org/abs/1906.00586)" + } + repositories: { + url: "https://github.com/mnicnc404/CartoonGan-tensorflow" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 650 + description: "Generate your own cartoon-style images with CartoonGAN (CVPR 2018), powered by TensorFlow 2.0 Alpha." + } + repositories: { + url: "https://github.com/xggIoU/centernet_tensorflow_wilderface_voc" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 103 + description: "This is the unofficial implementation of the \"CenterNet:Objects as Points\".Just a simple try with self-modified shufflenetv2 and yolov3.If you want better results, you need more experiments." + } + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "Channel Shuffle" + full_name: "Channel Shuffle" + description: "**Channel Shuffle** is an operation to help information flow across feature channels in convolutional neural networks. It was used as part of the [ShuffleNet](https://paperswithcode.com/method/shufflenet) architecture. \r\n\r\nIf we allow a group convolution to obtain input data from different groups, the input and output channels will be fully related. Specifically, for the feature map generated from the previous group layer, we can first divide the channels in each group into several subgroups, then feed each group in the next layer with different subgroups. \r\n\r\nThe above can be efficiently and elegantly implemented by a channel shuffle operation: suppose a convolutional layer with $g$ groups whose output has $g \\times n$ channels; we first reshape the output channel dimension into $\\left(g, n\\right)$, transposing and then flattening it back as the input of next layer. Channel shuffle is also differentiable, which means it can be embedded into network structures for end-to-end training." + } + methods: { + name: "Depthwise Convolution" + full_name: "Depthwise Convolution" + description: "**Depthwise Convolution** is a type of convolution where we apply a single convolutional filter for each input channel. In the regular 2D [convolution](https://paperswithcode.com/method/convolution) performed over multiple input channels, the filter is as deep as the input and lets us freely mix channels to generate each element in the output. In contrast, depthwise convolutions keep each channel separate. To summarize the steps, we:\r\n\r\n1. Split the input and filter into channels.\r\n2. We convolve each input with the respective filter.\r\n3. We stack the convolved outputs together.\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" + } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "Sigmoid Activation" + full_name: "Sigmoid Activation" + description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." + } + methods: { + name: "Squeeze-and-Excitation Block" + full_name: "Squeeze-and-Excitation Block" + description: "The **Squeeze-and-Excitation Block** is an architectural unit designed to improve the representational power of a network by enabling it to perform dynamic channel-wise feature recalibration. The process is:\r\n\r\n- The block has a convolutional block as an input.\r\n- Each channel is \"squeezed\" into a single numeric value using average pooling.\r\n- A dense layer followed by a ReLU adds non-linearity and output channel complexity is reduced by a ratio.\r\n- Another dense layer followed by a sigmoid gives each channel a smooth gating function.\r\n- Finally, we weight each feature map of the convolutional block based on the side network; the \"excitation\"." + } + methods: { + name: "ShuffleNet v2" + full_name: "ShuffleNet v2" + description: "**ShuffleNet v2** is a convolutional neural network optimized for a direct metric (speed) rather than indirect metrics like FLOPs. It builds upon [ShuffleNet v1](https://paperswithcode.com/method/shufflenet), which utilised pointwise group convolutions, bottleneck-like structures, and a channel shuffle operation. Differences are shown in the Figure to the right, including a new channel split operation and moving the channel shuffle operation further down the block." + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + } + video: { + video_id: "lrU6uXiJ_9Y" + video_title: "PR-120: ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" + number_of_likes: 34 + number_of_views: 2887 + published_date: { + seconds: 1542552935 + } + uploader: "JinWon Lee" + } + } +} +pr_id_to_video: { + key: 121 + value: { + pr_id: 121 + papers: { + paper_id: "bert-pre-training-of-deep-bidirectional" + title: "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding" + arxiv_id: "1810.04805" + abstract: "We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers. Unlike recent language representation models, BERT is designed to pre-train deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result, the pre-trained BERT model can be fine-tuned with just one additional output layer to create state-of-the-art models for a wide range of tasks, such as question answering and language inference, without substantial task-specific architecture modifications. BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural language processing tasks, including pushing the GLUE score to 80.5% (7.7% point absolute improvement), MultiNLI accuracy to 86.7% (4.6% absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement)." + pub_date: { + seconds: 1539216000 + } + authors: "Jacob Devlin" + authors: "Ming-Wei Chang" + authors: "Kenton Lee" + authors: "Kristina Toutanova" + repositories: { + url: "https://github.com/airsplay/vimpac" + framework: FRAMEWORK_PYTORCH + number_of_stars: 21 + } + repositories: { + url: "https://github.com/han-shi/SparseBERT" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + } + repositories: { + url: "https://github.com/NoraH2004/adv-absa" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/SindhuMadi/FakeNewsDetection" + framework: FRAMEWORK_OTHERS + description: "BERT and RoBERTa" + } + repositories: { + url: "https://github.com/labmlai/annotated_deep_learning_paper_implementations/tree/master/labml_nn/transformers/mlm" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3067 + description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." + } + repositories: { + url: "https://github.com/JA-Bar/nlp-depression" + framework: FRAMEWORK_PYTORCH + description: "NLP course project. Tool to potentially identify signs of depression from text and audio." + } + repositories: { + url: "https://github.com/andi611/Mockingjay-Speech-Representation" + framework: FRAMEWORK_PYTORCH + number_of_stars: 7 + description: "Official Implementation of Mockingjay in Pytorch" + } + repositories: { + url: "https://github.com/huggingface/transformers" + framework: FRAMEWORK_PYTORCH + number_of_stars: 47629 + description: "🤗Transformers: State-of-the-art Natural Language Processing for Pytorch, TensorFlow, and JAX." + } + repositories: { + url: "https://github.com/yoshitomo-matsubara/torchdistill" + framework: FRAMEWORK_PYTORCH + number_of_stars: 311 + description: "PyTorch-based modular, configuration-driven framework for knowledge distillation. 🏆18 methods presented at CVPR, ICLR, ECCV, NeurIPS, ICCV, etc are implemented so far. 🎁 Trained models, training logs and configurations are available for ensuring the reproducibiliy." + } + repositories: { + url: "https://github.com/zer0sh0t/artificial_intelligence/tree/master/language_models/bidirectional_encoder_representations_from_transformers" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "ai codebase" + } + methods: { + name: "Scaled Dot-Product Attention" + full_name: "Scaled Dot-Product Attention" + description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "Weight Decay" + full_name: "Weight Decay" + description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + } + methods: { + name: "Multi-Head Attention" + full_name: "Multi-Head Attention" + description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" + } + methods: { + name: "Adam" + full_name: "Adam" + description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + } + methods: { + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + } + methods: { + name: "WordPiece" + full_name: "WordPiece" + description: "**WordPiece** is a subword segmentation algorithm used in natural language processing. The vocabulary is initialized with individual characters in the language, then the most frequent combinations of symbols in the vocabulary are iteratively added to the vocabulary. The process is:\r\n\r\n1. Initialize the word unit inventory with all the characters in the text.\r\n2. Build a language model on the training data using the inventory from 1.\r\n3. Generate a new word unit by combining two units out of the current word inventory to increment the word unit inventory by one. Choose the new word unit out of all the possible ones that increases the likelihood on the training data the most when added to the model.\r\n4. Goto 2 until a predefined limit of word units is reached or the likelihood increase falls below a certain threshold.\r\n\r\nText: [Source](https://stackoverflow.com/questions/55382596/how-is-wordpiece-tokenization-helpful-to-effectively-deal-with-rare-words-proble/55416944#55416944)\r\n\r\nImage: WordPiece as used in BERT" + } + methods: { + name: "Attention Dropout" + full_name: "Attention Dropout" + description: "**Attention Dropout** is a type of dropout used in attention-based architectures, where elements are randomly dropped out of the softmax in the attention equation. For example, for scaled-dot product attention, we would drop elements from the first term:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}\\left(\\frac{QK^{T}}{\\sqrt{d_k}}\\right)V $$" + } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "GELU" + full_name: "Gaussian Error Linear Units" + description: "The **Gaussian Error Linear Unit**, or **GELU**, is an activation function. The GELU activation function is $x\\Phi(x)$, where $\\Phi(x)$ the standard Gaussian cumulative distribution function. The GELU nonlinearity weights inputs by their percentile, rather than gates inputs by their sign as in [ReLUs](https://paperswithcode.com/method/relu) ($x\\mathbf{1}_{x>0}$). Consequently the GELU can be thought of as a smoother ReLU.\r\n\r\n$$\\text{GELU}\\left(x\\right) = x{P}\\left(X\\leq{x}\\right) = x\\Phi\\left(x\\right) = x \\cdot \\frac{1}{2}\\left[1 + \\text{erf}(x/\\sqrt{2})\\right],$$\r\nif $X\\sim \\mathcal{N}(0,1)$.\r\n\r\nOne can approximate the GELU with\r\n$0.5x\\left(1+\\tanh\\left[\\sqrt{2/\\pi}\\left(x + 0.044715x^{3}\\right)\\right]\\right)$ or $x\\sigma\\left(1.702x\\right),$\r\nbut PyTorch's exact implementation is sufficiently fast such that these approximations may be unnecessary. (See also the [SiLU](https://paperswithcode.com/method/silu) $x\\sigma(x)$ which was also coined in the paper that introduced the GELU.)\r\n\r\nGELUs are used in GPT-3, BERT, and most other Transformers." + } + } + video: { + video_id: "GK4IO3qOnLc" + video_title: "PR-121: BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding" + number_of_likes: 41 + number_of_views: 2830 + published_date: { + seconds: 1543981172 + } + uploader: "Young Seok Kim" + } + } +} +pr_id_to_video: { + key: 122 + value: { + pr_id: 122 + papers: { + paper_id: "can-creative-adversarial-networks-generating" + title: "CAN: Creative Adversarial Networks, Generating \"Art\" by Learning About Styles and Deviating from Style Norms" + arxiv_id: "1706.07068" + abstract: "We propose a new system for generating art. The system generates art by\nlooking at art and learning about style; and becomes creative by increasing the\narousal potential of the generated art by deviating from the learned styles. We\nbuild over Generative Adversarial Networks (GAN), which have shown the ability\nto learn to generate novel images simulating a given distribution. We argue\nthat such networks are limited in their ability to generate creative products\nin their original design. We propose modifications to its objective to make it\ncapable of generating creative art by maximizing deviation from established\nstyles and minimizing deviation from art distribution. We conducted experiments\nto compare the response of human subjects to the generated art with their\nresponse to art created by artists. The results show that human subjects could\nnot distinguish art generated by the proposed system from art generated by\ncontemporary artists and shown in top art fairs. Human subjects even rated the\ngenerated images higher on various scales." + pub_date: { + seconds: 1498003200 + } + authors: "Ahmed Elgammal" + authors: "Bingchen Liu" + authors: "Mohamed Elhoseiny" + authors: "Marian Mazzone" + repositories: { + url: "https://github.com/otepencelik/GAN-Artwork-Generation" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + } + repositories: { + url: "https://github.com/naotokui/CreativeGAN-Rhythm" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 19 + description: "Creative Adversarial Network for generating Dance Music Rhythm Patterns" + } + repositories: { + url: "https://github.com/dylanell/conditional-wgan" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Implementation of Conditional Wasserstein Generative Adversarial Network (GAN) in PyTorch" + } + repositories: { + url: "https://github.com/mlberkeley/Creative-Adversarial-Networks" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 199 + description: "(WIP) Implementation of Creative Adversarial Networks https://arxiv.org/pdf/1706.07068.pdf" + } + repositories: { + url: "https://github.com/sfc-computational-creativity-lab/x-rhythm-can" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + description: "Creative Adversarial Network for generating Dance Music Rhythm Patterns" + } + repositories: { + url: "https://github.com/VladAleshin/pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "GAN (pet project on pytorch and flask)" + } + repositories: { + url: "https://github.com/AndreasWieg/Creative-GAN" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 3 + description: " Art-GAN" + } + repositories: { + url: "https://github.com/zawlinnnaing/CAN-thesis" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + } + repositories: { + url: "https://github.com/casey-barr/generative-models-in-tensorflow" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "a collection of generative adversarial networks implemented in TensorFlow" + } + repositories: { + url: "https://github.com/previtus/cci_exploring_machine_intelligence" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 17 + description: "UAL, CCI - MSc course: 19/20 \"IU000128 Coding Three: Exploring Machine Intelligence\"" + } + } + video: { + video_id: "TB7izZIWYyw" + video_title: "PR-122: CAN: Creative Adversarial Networks" + number_of_likes: 13 + number_of_views: 1127 + published_date: { + seconds: 1543554496 + } + uploader: "visionNoob" + } + } +} +pr_id_to_video: { + key: 123 + value: { + pr_id: 123 + papers: { + paper_id: "partial-convolution-based-padding" + title: "Partial Convolution based Padding" + arxiv_id: "1811.11718" + abstract: "In this paper, we present a simple yet effective padding scheme that can be\nused as a drop-in module for existing convolutional neural networks. We call it\npartial convolution based padding, with the intuition that the padded region\ncan be treated as holes and the original input as non-holes. Specifically,\nduring the convolution operation, the convolution results are re-weighted near\nimage borders based on the ratios between the padded area and the convolution\nsliding window area. Extensive experiments with various deep network models on\nImageNet classification and semantic segmentation demonstrate that the proposed\npadding scheme consistently outperforms standard zero padding with better\naccuracy." + pub_date: { + seconds: 1543363200 + } + authors: "Guilin Liu" + authors: "Kevin J. Shih" + authors: "Ting-Chun Wang" + authors: "Fitsum A. Reda" + authors: "Karan Sapra" + authors: "Zhiding Yu" + authors: "Andrew Tao" + authors: "Bryan Catanzaro" + repositories: { + url: "https://github.com/feixuetuba/inpating" + framework: FRAMEWORK_PYTORCH + description: "复现Image Inpainting for Irregular Holes Using Partial Convolutions" + } + repositories: { + is_official: true + url: "https://github.com/NVIDIA/partialconv" + framework: FRAMEWORK_PYTORCH + number_of_stars: 937 + description: "A New Padding Scheme: Partial Convolution based Padding" + } + repositories: { + url: "https://github.com/lessw2020/auto-adaptive-ai" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + description: "auto adaptive framework for intrinsic hyperparameter selection, adaptive padding, normalized weights" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + } + video: { + video_id: "IKHzc7sGCxQ" + video_title: "PR-123: Partial Convolution based Padding" + number_of_likes: 50 + number_of_views: 2265 + published_date: { + seconds: 1544173387 + } + uploader: "Soyeon Kim" + } + } +} +pr_id_to_video: { + key: 124 + value: { + pr_id: 124 + papers: { + paper_id: "training-stacked-denoising-autoencoders-for" + title: "Training Stacked Denoising Autoencoders for Representation Learning" + arxiv_id: "2102.08012" + abstract: "We implement stacked denoising autoencoders, a class of neural networks that are capable of learning powerful representations of high dimensional data. We describe stochastic gradient descent for unsupervised training of autoencoders, as well as a novel genetic algorithm based approach that makes use of gradient information. We analyze the performance of both optimization algorithms and also the representation learning ability of the autoencoder when it is trained on standard image classification datasets." + pub_date: { + seconds: 1613433600 + } + authors: "Jason Liang" + authors: "Keith Kelly" + methods: { + name: "AutoEncoder" + full_name: "AutoEncoder" + description: "An **Autoencoder** is a bottleneck architecture that turns a high-dimensional input into a latent low-dimensional code (encoder), and then performs a reconstruction of the input with this latent code (the decoder).\r\n\r\nImage: [Michael Massi](https://en.wikipedia.org/wiki/Autoencoder#/media/File:Autoencoder_schema.png)" + } + } + video: { + video_id: "saJcr74ldvs" + } + } +} +pr_id_to_video: { + key: 125 + value: { + pr_id: 125 + papers: { + paper_id: "energy-based-generative-adversarial-network" + title: "Energy-based Generative Adversarial Network" + arxiv_id: "1609.03126" + abstract: "We introduce the \"Energy-based Generative Adversarial Network\" model (EBGAN)\nwhich views the discriminator as an energy function that attributes low\nenergies to the regions near the data manifold and higher energies to other\nregions. Similar to the probabilistic GANs, a generator is seen as being\ntrained to produce contrastive samples with minimal energies, while the\ndiscriminator is trained to assign high energies to these generated samples.\nViewing the discriminator as an energy function allows to use a wide variety of\narchitectures and loss functionals in addition to the usual binary classifier\nwith logistic output. Among them, we show one instantiation of EBGAN framework\nas using an auto-encoder architecture, with the energy being the reconstruction\nerror, in place of the discriminator. We show that this form of EBGAN exhibits\nmore stable behavior than regular GANs during training. We also show that a\nsingle-scale architecture can be trained to generate high-resolution images." + pub_date: { + seconds: 1473552000 + } + authors: "Junbo Zhao" + authors: "Michael Mathieu" + authors: "Yann LeCun" + repositories: { + url: "https://github.com/buriburisuri/ebgan" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 207 + description: "A tensorflow implementation of Junbo et al's Energy-based generative adversarial network ( EBGAN ) paper. " + } + repositories: { + url: "https://github.com/eriklindernoren/PyTorch-GAN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 9588 + description: "PyTorch implementations of Generative Adversarial Networks." + } + repositories: { + url: "https://github.com/evan11401/CS_IOC5008_0856043_HW2" + framework: FRAMEWORK_PYTORCH + description: "NCTU VRDL HW2 | Use BEGAN to create human's face" + } + } + video: { + video_id: "8PoewOpK6b4" + video_title: "PR-125: ENERGY-BASED GENERATIVE ADVERSARIAL NETWORKS" + number_of_likes: 7 + number_of_views: 708 + published_date: { + seconds: 1544368518 + } + uploader: "MinGuk Kang" + } + } +} +pr_id_to_video: { + key: 126 + value: { + pr_id: 126 + papers: { + paper_id: "densepose-dense-human-pose-estimation-in-the" + title: "DensePose: Dense Human Pose Estimation In The Wild" + arxiv_id: "1802.00434" + abstract: "In this work, we establish dense correspondences between RGB image and a\nsurface-based representation of the human body, a task we refer to as dense\nhuman pose estimation. We first gather dense correspondences for 50K persons\nappearing in the COCO dataset by introducing an efficient annotation pipeline.\nWe then use our dataset to train CNN-based systems that deliver dense\ncorrespondence 'in the wild', namely in the presence of background, occlusions\nand scale variations. We improve our training set's effectiveness by training\nan 'inpainting' network that can fill in missing groundtruth values and report\nclear improvements with respect to the best results that would be achievable in\nthe past. We experiment with fully-convolutional networks and region-based\nmodels and observe a superiority of the latter; we further improve accuracy\nthrough cascading, obtaining a system that delivers highly0accurate results in\nreal time. Supplementary materials and videos are provided on the project page\nhttp://densepose.org" + pub_date: { + seconds: 1517443200 + } + authors: "Rıza Alp Güler" + authors: "Natalia Neverova" + authors: "Iasonas Kokkinos" + repositories: { + url: "https://github.com/ubc-vision/DwNet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 19 + } + repositories: { + url: "https://github.com/hz-ants/DensePose" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/yongsheng268/DensePose" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/jiajunhua/facebookresearch-DensePose" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/sgoldyaev/DeepFashion.ADGAN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + } + repositories: { + url: "https://github.com/chengjiali/DensePose3" + framework: FRAMEWORK_OTHERS + description: "Adapt FB's DensePose for Python3" + } + repositories: { + url: "https://github.com/facebookresearch/DensePose" + framework: FRAMEWORK_OTHERS + number_of_stars: 6031 + description: "A real-time approach for mapping all human pixels of 2D RGB images to a 3D surface-based model of the body" + } + repositories: { + url: "https://github.com/ARMUGHAN-SHAHID/MoboDensepose" + framework: FRAMEWORK_OTHERS + description: "DEnse" + } + repositories: { + url: "https://github.com/facebookresearch/detectron" + framework: FRAMEWORK_PYTORCH + number_of_stars: 24458 + description: "FAIR's research platform for object detection research, implementing popular algorithms like Mask R-CNN and RetinaNet." + } + repositories: { + url: "https://github.com/svikramank/DensePose" + framework: FRAMEWORK_OTHERS + number_of_stars: 5 + description: "In this repo, I tried replicating the famous Facebook's DensePose R-CNN model and tried to visualize the collected DensePose-COCO dataset and show the correspondences to the SMPL model." + } + } + video: { + video_id: "-bvMCbk_FT8" + video_title: "PR-126: DensePose: Dense Human Pose Estimation In The Wild" + number_of_views: 1783 + published_date: { + seconds: 1544365241 + } + uploader: "Byung-Hak Kim" + } + } +} +pr_id_to_video: { + key: 127 + value: { + pr_id: 127 + papers: { + paper_id: "facenet-a-unified-embedding-for-face" + title: "FaceNet: A Unified Embedding for Face Recognition and Clustering" + arxiv_id: "1503.03832" + abstract: "Despite significant recent advances in the field of face recognition,\nimplementing face verification and recognition efficiently at scale presents\nserious challenges to current approaches. In this paper we present a system,\ncalled FaceNet, that directly learns a mapping from face images to a compact\nEuclidean space where distances directly correspond to a measure of face\nsimilarity. Once this space has been produced, tasks such as face recognition,\nverification and clustering can be easily implemented using standard techniques\nwith FaceNet embeddings as feature vectors.\n Our method uses a deep convolutional network trained to directly optimize the\nembedding itself, rather than an intermediate bottleneck layer as in previous\ndeep learning approaches. To train, we use triplets of roughly aligned matching\n/ non-matching face patches generated using a novel online triplet mining\nmethod. The benefit of our approach is much greater representational\nefficiency: we achieve state-of-the-art face recognition performance using only\n128-bytes per face.\n On the widely used Labeled Faces in the Wild (LFW) dataset, our system\nachieves a new record accuracy of 99.63%. On YouTube Faces DB it achieves\n95.12%. Our system cuts the error rate in comparison to the best published\nresult by 30% on both datasets.\n We also introduce the concept of harmonic embeddings, and a harmonic triplet\nloss, which describe different versions of face embeddings (produced by\ndifferent networks) that are compatible to each other and allow for direct\ncomparison between each other." + pub_date: { + seconds: 1426118400 + } + authors: "Florian Schroff" + authors: "Dmitry Kalenichenko" + authors: "James Philbin" + repositories: { + url: "https://github.com/sdamolini/LooksLikeWho" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "VGGFace2 Facial Recognition using Quadruplet Loss and 4 CNNs." + } + repositories: { + url: "https://github.com/shi510/ffem" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 3 + description: "Face Feature Embedding Module" + } + repositories: { + url: "https://github.com/obj2vec/obj2vec" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/akshayraghavan21/Face_Recognition_Using_Facenet" + framework: FRAMEWORK_TENSORFLOW + description: "A simple face recognition implementation using a pre-trained, one-shot learning model - FaceNet. Classification on custom dataset by using the WebCam to perform live face recognition." + } + repositories: { + url: "https://github.com/tamerthamoqa/facenet-pytorch-glint360k" + framework: FRAMEWORK_PYTORCH + number_of_stars: 115 + description: "A PyTorch implementation of the 'FaceNet' paper for training a facial recognition model with Triplet Loss using the glint360k dataset. A pre-trained model using Triplet Loss is available for download." + } + repositories: { + url: "https://github.com/Atcold/torch-TripletEmbedding" + framework: FRAMEWORK_OTHERS + number_of_stars: 157 + description: "TripletLoss used in Google's FaceNet paper" + } + repositories: { + url: "https://github.com/serengil/deepface" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1696 + description: "A Lightweight Deep Face Recognition and Facial Attribute Analysis (Age, Gender, Emotion and Race) Framework for Python" + } + repositories: { + url: "https://github.com/PushpakBhoge/Face_Recognition_TF" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5 + description: "A project to Recognise faces in photos and videos or in realtime" + } + repositories: { + url: "https://github.com/Mrzhang3389/FaceRecognition" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 3 + description: "人脸检测(MTCNN) + 人脸编码(FaceNet) = 人脸识别(FaceRecognition) DockerFile + Docker Image = 容器部署" + } + repositories: { + url: "https://github.com/QuocThangNguyen/deep-metric-learning-tsinghua-dogs" + framework: FRAMEWORK_PYTORCH + number_of_stars: 25 + description: "Dogs classification with Deep Metric Learning" + } + } + video: { + video_id: "0k3X-9y_9S8" + video_title: "PR-127: FaceNet" + number_of_likes: 58 + number_of_views: 4050 + published_date: { + seconds: 1544971153 + } + uploader: "Taeoh Kim" + } + } +} +pr_id_to_video: { + key: 128 + value: { + pr_id: 128 + papers: { + paper_id: "timbretron-a-wavenetcyclegancqtaudio-pipeline" + title: "TimbreTron: A WaveNet(CycleGAN(CQT(Audio))) Pipeline for Musical Timbre Transfer" + arxiv_id: "1811.09620" + abstract: "In this work, we address the problem of musical timbre transfer, where the\ngoal is to manipulate the timbre of a sound sample from one instrument to match\nanother instrument while preserving other musical content, such as pitch,\nrhythm, and loudness. In principle, one could apply image-based style transfer\ntechniques to a time-frequency representation of an audio signal, but this\ndepends on having a representation that allows independent manipulation of\ntimbre as well as high-quality waveform generation. We introduce TimbreTron, a\nmethod for musical timbre transfer which applies \"image\" domain style transfer\nto a time-frequency representation of the audio signal, and then produces a\nhigh-quality waveform using a conditional WaveNet synthesizer. We show that the\nConstant Q Transform (CQT) representation is particularly well-suited to\nconvolutional architectures due to its approximate pitch equivariance. Based on\nhuman perceptual evaluations, we confirmed that TimbreTron recognizably\ntransferred the timbre while otherwise preserving the musical content, for both\nmonophonic and polyphonic samples." + pub_date: { + seconds: 1542844800 + } + authors: "Sicong Huang" + authors: "Qiyang Li" + authors: "Cem Anil" + authors: "Xuchan Bao" + authors: "Sageev Oore" + authors: "Roger B. Grosse" + repositories: { + url: "https://github.com/edivadiranatnom/Machine-Learning-Project" + framework: FRAMEWORK_OTHERS + number_of_stars: 3 + description: "Machine Learning course project" + } + repositories: { + is_official: true + url: "https://github.com/huangsicong/TimbreTron" + framework: FRAMEWORK_OTHERS + number_of_stars: 37 + description: "The repo accompanying the paper: TimbreTron: A WaveNet(CycleGAN(CQT(Audio))) Pipeline for Musical Timbre Transfer" + } + methods: { + name: "WaveNet" + full_name: "WaveNet" + description: "**WaveNet** is an audio generative model based on the [PixelCNN](https://paperswithcode.com/method/pixelcnn) architecture. In order to deal with long-range temporal dependencies needed for raw audio generation, architectures are developed based on dilated causal convolutions, which exhibit very large receptive fields.\r\n\r\nThe joint probability of a waveform $\\vec{x} = \\{ x_1, \\dots, x_T \\}$ is factorised as a product of conditional probabilities as follows:\r\n\r\n$$p\\left(\\vec{x}\\right) = \\prod_{t=1}^{T} p\\left(x_t \\mid x_1, \\dots ,x_{t-1}\\right)$$\r\n\r\nEach audio sample $x_t$ is therefore conditioned on the samples at all previous timesteps." + } + methods: { + name: "Dilated Causal Convolution" + full_name: "Dilated Causal Convolution" + description: "A **Dilated Causal Convolution** is a causal convolution where the filter is applied over an area larger than its length by skipping input values with a certain step. A dilated causal convolution effectively allows the network to have very large receptive fields with just a few layers." + } + methods: { + name: "Mixture of Logistic Distributions" + full_name: "Mixture of Logistic Distributions" + description: "**Mixture of Logistic Distributions (MoL)** is a type of output function, and an alternative to a [softmax](https://paperswithcode.com/method/softmax) layer. Discretized logistic mixture likelihood is used in PixelCNN++ and [WaveNet](https://paperswithcode.com/method/wavenet) to predict discrete values.\r\n\r\nImage Credit: [Hao Gao](https://medium.com/@smallfishbigsea/an-explanation-of-discretized-logistic-mixture-likelihood-bdfe531751f0)" + } + } + video: { + video_id: "5eofa6SksKU" + video_title: "PR-128: TimbreTron: A Wavenet(CycleGAN(CQT(Audio))) pipeline for musical timbre transfer" + number_of_likes: 6 + number_of_views: 549 + published_date: { + seconds: 1544973323 + } + uploader: "taesu" + } + } +} +pr_id_to_video: { + key: 129 + value: { + pr_id: 129 + papers: { + paper_id: "horovod-fast-and-easy-distributed-deep" + title: "Horovod: fast and easy distributed deep learning in TensorFlow" + arxiv_id: "1802.05799" + abstract: "Training modern deep learning models requires large amounts of computation,\noften provided by GPUs. Scaling computation from one GPU to many can enable\nmuch faster training and research progress but entails two complications.\nFirst, the training library must support inter-GPU communication. Depending on\nthe particular methods employed, this communication may entail anywhere from\nnegligible to significant overhead. Second, the user must modify his or her\ntraining code to take advantage of inter-GPU communication. Depending on the\ntraining library's API, the modification required may be either significant or\nminimal.\n Existing methods for enabling multi-GPU training under the TensorFlow library\nentail non-negligible communication overhead and require users to heavily\nmodify their model-building code, leading many researchers to avoid the whole\nmess and stick with slower single-GPU training. In this paper we introduce\nHorovod, an open source library that improves on both obstructions to scaling:\nit employs efficient inter-GPU communication via ring reduction and requires\nonly a few lines of modification to user code, enabling faster, easier\ndistributed training in TensorFlow. Horovod is available under the Apache 2.0\nlicense at https://github.com/uber/horovod" + pub_date: { + seconds: 1518652800 + } + authors: "Alexander Sergeev" + authors: "Mike Del Balso" + repositories: { + url: "https://github.com/hcyang99/horovod" + framework: FRAMEWORK_TENSORFLOW + description: "Modify horovod/horovod to support nic switching" + } + repositories: { + url: "https://github.com/gridgentoo/UberHorovod" + framework: FRAMEWORK_TENSORFLOW + description: "Реверс инжиниринг архитектуры UberHorovod, Distributed training framework for TensorFlow, Keras, PyTorch, and Apache MXNet." + } + repositories: { + url: "https://github.com/teja5832/horovod-elastic" + framework: FRAMEWORK_TENSORFLOW + description: "Adding Transparent Gradient Aggregation to Horovod." + } + repositories: { + is_official: true + url: "https://github.com/uber/horovod" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 11345 + description: "Distributed training framework for TensorFlow, Keras, PyTorch, and Apache MXNet." + } + repositories: { + url: "https://github.com/horovod/horovod" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 11346 + description: "Distributed training framework for TensorFlow, Keras, PyTorch, and Apache MXNet." + } + repositories: { + url: "https://github.com/markWJJ/horovod" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/ctcyang/horovod" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + description: "Distributed training framework for TensorFlow, Keras, and PyTorch." + } + repositories: { + url: "https://github.com/karakusc/horovod" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Distributed training framework for TensorFlow, Keras, and PyTorch." + } + repositories: { + url: "https://github.com/zhonghual1206/horvodval" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "use gan for val" + } + repositories: { + url: "https://github.com/axbaretto/horovod" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Distributed training framework for TensorFlow, Keras, PyTorch, and MXNet. " + } + } + video: { + video_id: "8zQECRiONAo" + video_title: "PR-129: Horovod: fast and easy distributed deep learning in TensorFlow" + number_of_likes: 9 + number_of_views: 715 + published_date: { + seconds: 1546077765 + } + uploader: "Taekmin Kim" + } + } +} +pr_id_to_video: { + key: 130 + value: { + pr_id: 130 + papers: { + paper_id: "generative-adversarial-imitation-learning" + title: "Generative Adversarial Imitation Learning" + arxiv_id: "1606.03476" + abstract: "Consider learning a policy from example expert behavior, without interaction\nwith the expert or access to reinforcement signal. One approach is to recover\nthe expert's cost function with inverse reinforcement learning, then extract a\npolicy from that cost function with reinforcement learning. This approach is\nindirect and can be slow. We propose a new general framework for directly\nextracting a policy from data, as if it were obtained by reinforcement learning\nfollowing inverse reinforcement learning. We show that a certain instantiation\nof our framework draws an analogy between imitation learning and generative\nadversarial networks, from which we derive a model-free imitation learning\nalgorithm that obtains significant performance gains over existing model-free\nmethods in imitating complex behaviors in large, high-dimensional environments." + pub_date: { + seconds: 1465516800 + } + authors: "Jonathan Ho" + authors: "Stefano Ermon" + repositories: { + url: "https://github.com/morikatron/GAIL_PPO" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Generative Adversarial Imitation Learning" + } + repositories: { + url: "https://github.com/HumanCompatibleAI/deep-rlsp" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 23 + description: "Code accompanying \"Learning What To Do by Simulating the Past\", ICLR 2021." + } + repositories: { + url: "https://github.com/HumanCompatibleAI/imitation" + framework: FRAMEWORK_PYTORCH + number_of_stars: 253 + description: "Clean PyTorch implementations of imitation learning algorithms" + } + repositories: { + url: "https://github.com/Khrylx/PyTorch-RL" + framework: FRAMEWORK_PYTORCH + number_of_stars: 699 + description: "PyTorch implementation of Deep Reinforcement Learning: Policy Gradient methods (TRPO, PPO, A2C) and Generative Adversarial Imitation Learning (GAIL). Fast Fisher vector product TRPO." + } + repositories: { + url: "https://github.com/sisl/ngsim_env" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 99 + description: "Learning human driver models from NGSIM data with imitation learning." + } + repositories: { + url: "https://github.com/170928/-Review-Generative-Adversarial-Imitation-Learning" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "[Review & Code]" + } + repositories: { + url: "https://github.com/bukysun/gail-traj-eb" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/nav74neet/gail-tf-gym" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 55 + description: "Implementation of Generatve Adversarial Imitation Learning (GAIL) for classic environments from OpenAI Gym. " + } + repositories: { + url: "https://github.com/hill-a/stable-baselines" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 3177 + description: "A fork of OpenAI Baselines, implementations of reinforcement learning algorithms" + } + repositories: { + url: "https://github.com/KAIST-AILab/deeprl_practice_colab" + framework: FRAMEWORK_OTHERS + number_of_stars: 4 + description: "Preparation for Deep Reinforcement Learning using Google Colab" + } + methods: { + name: "GAIL" + full_name: "Generative Adversarial Imitation Learning" + description: "**Generative Adversarial Imitation Learning** presents a new general framework for directly extracting a policy from data, as if it were obtained by reinforcement learning following inverse reinforcement learning." + } + } + video: { + video_id: "XHmRsgFrCTM" + video_title: "PR-130: Generative Adversarial Imitation Learning" + number_of_likes: 14 + number_of_views: 2561 + published_date: { + seconds: 1545573404 + } + uploader: "Jinsung Yoon" + } + } +} +pr_id_to_video: { + key: 131 + value: { + pr_id: 131 + papers: { + paper_id: "a-style-based-generator-architecture-for" + title: "A Style-Based Generator Architecture for Generative Adversarial Networks" + arxiv_id: "1812.04948" + abstract: "We propose an alternative generator architecture for generative adversarial\nnetworks, borrowing from style transfer literature. The new architecture leads\nto an automatically learned, unsupervised separation of high-level attributes\n(e.g., pose and identity when trained on human faces) and stochastic variation\nin the generated images (e.g., freckles, hair), and it enables intuitive,\nscale-specific control of the synthesis. The new generator improves the\nstate-of-the-art in terms of traditional distribution quality metrics, leads to\ndemonstrably better interpolation properties, and also better disentangles the\nlatent factors of variation. To quantify interpolation quality and\ndisentanglement, we propose two new, automated methods that are applicable to\nany generator architecture. Finally, we introduce a new, highly varied and\nhigh-quality dataset of human faces." + pub_date: { + seconds: 1544572800 + } + authors: "Tero Karras" + authors: "Samuli Laine" + authors: "Timo Aila" + repositories: { + url: "https://github.com/comp-imaging-sci/pic-recon" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 3 + description: "Code associated with the paper \"Prior Image-Constrained Reconstruction using Style-Based Generative Models\" accepted to ICML 2021." + } + repositories: { + url: "https://github.com/toshas/torch-fidelity" + framework: FRAMEWORK_PYTORCH + number_of_stars: 247 + description: "High-fidelity performance metrics for generative models in PyTorch" + } + repositories: { + url: "https://github.com/roberttwomey/machine-imagination-workshop" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "text to image notebook with CLIP for workshop on Machine Imagination, Spring 2021" + } + repositories: { + url: "https://github.com/ariel415el/SimplePytorch-ALAE" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + description: "Implementation of Adverserial autoencoders" + } + repositories: { + url: "https://github.com/jhKessler/Progressively-Growing-Generative-Adverserial-Network" + framework: FRAMEWORK_PYTORCH + description: "Generative Adverserial Network for Image Generation" + } + repositories: { + url: "https://github.com/genforce/interfacegan" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 950 + description: "[CVPR 2020] Interpreting the Latent Space of GANs for Semantic Face Editing" + } + repositories: { + url: "https://github.com/a514514772/hijackgan" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 10 + description: "[CVPR 2021] Pytorch implementation of Hijack-GAN: Unintended-Use of Pretrained, Black-Box GANs" + } + repositories: { + url: "https://github.com/yaxingwang/DeepI2I" + framework: FRAMEWORK_PYTORCH + number_of_stars: 18 + description: "Image-to-image translation, knowledge transfer" + } + repositories: { + url: "https://github.com/ariel415el/ALAE" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + description: "Implementation of Adverserial autoencoders" + } + repositories: { + url: "https://github.com/ayushgupta9198/stylegan" + framework: FRAMEWORK_TENSORFLOW + description: "The model is based for fake person creation based on stylegan technique. I have trained the model on my data set and generates the result basis of grids and seeds" + } + methods: { + name: "Feedforward Network" + full_name: "Feedforward Network" + description: "A **Feedforward Network**, or a **Multilayer Perceptron (MLP)**, is a neural network with solely densely connected layers. This is the classic neural network architecture of the literature. It consists of inputs $x$ passed through units $h$ (of which there can be many layers) to predict a target $y$. Activation functions are generally chosen to be non-linear to allow for flexible functional approximation.\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + } + methods: { + name: "Leaky ReLU" + full_name: "Leaky ReLU" + description: "**Leaky Rectified Linear Unit**, or **Leaky ReLU**, is a type of activation function based on a [ReLU](https://paperswithcode.com/method/relu), but it has a small slope for negative values instead of a flat slope. The slope coefficient is determined before training, i.e. it is not learnt during training. This type of activation function is popular in tasks where we we may suffer from sparse gradients, for example training generative adversarial networks." + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Adaptive Instance Normalization" + full_name: "Adaptive Instance Normalization" + description: "**Adaptive Instance Normalization** is a normalization method that aligns the mean and variance of the content features with those of the style features. \r\n\r\n[Instance Normalization](https://paperswithcode.com/method/instance-normalization) normalizes the input to a single style specified by the affine parameters. Adaptive Instance Normaliation is an extension. In AdaIN, we receive a content input $x$ and a style input $y$, and we simply align the channel-wise mean and variance of $x$ to match those of $y$. Unlike [Batch Normalization](https://paperswithcode.com/method/batch-normalization), Instance Normalization or Conditional Instance Normalization, AdaIN has no learnable affine parameters. Instead, it adaptively computes the affine parameters from the style input:\r\n\r\n$$\r\n\\textrm{AdaIN}(x, y)= \\sigma(y)\\left(\\frac{x-\\mu(x)}{\\sigma(x)}\\right)+\\mu(y)\r\n$$" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "WGAN-GP Loss" + full_name: "WGAN-GP Loss" + description: "**Wasserstein Gradient Penalty Loss**, or **WGAN-GP Loss**, is a loss used for generative adversarial networks that augments the Wasserstein loss with a gradient norm penalty for random samples $\\mathbf{\\hat{x}} \\sim \\mathbb{P}\\_{\\hat{\\mathbf{x}}}$ to achieve Lipschitz continuity:\r\n\r\n$$ L = \\mathbb{E}\\_{\\mathbf{\\hat{x}} \\sim \\mathbb{P}\\_{g}}\\left[D\\left(\\tilde{\\mathbf{x}}\\right)\\right] - \\mathbb{E}\\_{\\mathbf{x} \\sim \\mathbb{P}\\_{r}}\\left[D\\left(\\mathbf{x}\\right)\\right] + \\lambda\\mathbb{E}\\_{\\mathbf{\\hat{x}} \\sim \\mathbb{P}\\_{\\hat{\\mathbf{x}}}}\\left[\\left(||\\nabla\\_{\\tilde{\\mathbf{x}}}D\\left(\\mathbf{\\tilde{x}}\\right)||\\_{2}-1\\right)^{2}\\right]$$\r\n\r\nIt was introduced as part of the [WGAN-GP](https://paperswithcode.com/method/wgan-gp) overall model." + } + methods: { + name: "R1 Regularization" + full_name: "R1 Regularization" + description: "**R$\\_{1}$ Regularization** is a regularization technique and gradient penalty for training [generative adversarial networks](https://paperswithcode.com/methods/category/generative-adversarial-networks). It penalizes the discriminator from deviating from the Nash Equilibrium via penalizing the gradient on real data alone: when the generator distribution produces the true data distribution and the discriminator is equal to 0 on the data manifold, the gradient penalty ensures that the discriminator cannot create a non-zero gradient orthogonal to the data manifold without suffering a loss in the GAN game.\r\n\r\nThis leads to the following regularization term:\r\n\r\n$$ R\\_{1}\\left(\\psi\\right) = \\frac{\\gamma}{2}E\\_{p\\_{D}\\left(x\\right)}\\left[||\\nabla{D\\_{\\psi}\\left(x\\right)}||^{2}\\right] $$" + } + methods: { + name: "Adam" + full_name: "Adam" + description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + } + methods: { + name: "StyleGAN" + full_name: "StyleGAN" + description: "**StyleGAN** is a type of generative adversarial network. It uses an alternative generator architecture for generative adversarial networks, borrowing from style transfer literature; in particular, the use of adaptive instance normalization. Otherwise it follows Progressive GAN in using a progressively growing training regime. Other quirks include the fact it generates from a fixed value tensor not stochastically generated latent variables as in regular GANs. The stochastically generated latent variables are used as style vectors in the adaptive instance normalization at each resolution after being transformed by an 8-layer feedforward network. Lastly, it employs a form of regularization called mixing regularization, which mixes two style latent variables during training." + } + } + video: { + video_id: "TWzEbMrH59o" + video_title: "PR-131: A Style-Based Generator Architecture for Generative Adversarial Networks" + number_of_likes: 64 + number_of_views: 3883 + published_date: { + seconds: 1546903803 + } + uploader: "정지원" + } + } +} +pr_id_to_video: { + key: 132 + value: { + pr_id: 132 + papers: { + paper_id: "ssd-single-shot-multibox-detector" + title: "SSD: Single Shot MultiBox Detector" + arxiv_id: "1512.02325" + abstract: "We present a method for detecting objects in images using a single deep\nneural network. Our approach, named SSD, discretizes the output space of\nbounding boxes into a set of default boxes over different aspect ratios and\nscales per feature map location. At prediction time, the network generates\nscores for the presence of each object category in each default box and\nproduces adjustments to the box to better match the object shape. Additionally,\nthe network combines predictions from multiple feature maps with different\nresolutions to naturally handle objects of various sizes. Our SSD model is\nsimple relative to methods that require object proposals because it completely\neliminates proposal generation and subsequent pixel or feature resampling stage\nand encapsulates all computation in a single network. This makes SSD easy to\ntrain and straightforward to integrate into systems that require a detection\ncomponent. Experimental results on the PASCAL VOC, MS COCO, and ILSVRC datasets\nconfirm that SSD has comparable accuracy to methods that utilize an additional\nobject proposal step and is much faster, while providing a unified framework\nfor both training and inference. Compared to other single stage methods, SSD\nhas much better accuracy, even with a smaller input image size. For $300\\times\n300$ input, SSD achieves 72.1% mAP on VOC2007 test at 58 FPS on a Nvidia Titan\nX and for $500\\times 500$ input, SSD achieves 75.1% mAP, outperforming a\ncomparable state of the art Faster R-CNN model. Code is available at\nhttps://github.com/weiliu89/caffe/tree/ssd ." + pub_date: { + seconds: 1449532800 + } + authors: "Wei Liu" + authors: "Dragomir Anguelov" + authors: "Dumitru Erhan" + authors: "Christian Szegedy" + authors: "Scott Reed" + authors: "Cheng-Yang Fu" + authors: "Alexander C. Berg" + repositories: { + url: "https://github.com/huytranvan2010/SSD" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } + repositories: { + url: "https://github.com/stevensmiley1989/MrRobot" + framework: FRAMEWORK_TENSORFLOW + description: "This is a robot I designed in Fusion 360 and 3D printed with my FlashForge Creator Pro in PLA, Main Hardware: 1 x Raspberry Pi 3b, 3 x Arduinos with I2C, 5 x ultrasonic sensors, 4 x 60Kg Servos, 4 x 12V 200rpm DC motors, 1 x stepper motor for loading ammo into custom built coil gun. The coil gun uses 2 x 450V 1000uF Capacitors in parallel with a boost converter, yielding 380V maximum charge discharge from a 12V input, firing with a 1.2kV maximum peak non-repetitive surge current 1.1kA rated Thyristor SCR, Main Software: Uses TensorFlow and Python for Object Detection with some C++ for motor controls. The model used is a retrained Single Shot Detection MobileNet V2 algorithm trained on a toy reindeer. Signal processing allows proportional controller feedback to adjust movement of the robot for moving, aiming, and shooting. An application for IOS was written in Swift to control the robot as well, using Mosquito MQTT Broker for communication. " + } + repositories: { + url: "https://github.com/birosjh/pytorch_ssd" + framework: FRAMEWORK_PYTORCH + description: "A project for me to play around and experiment with the different components of the Single Shot Multibox Detector." + } + repositories: { + url: "https://github.com/Chubbyman2/SSD_MobileNet_Hand_Tracker" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "A hand tracker created using OpenCV and a re-trained SSD MobileNet v2 via transfer learning on the EgoHands Dataset." + } + repositories: { + url: "https://github.com/serengil/deepface" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1696 + description: "A Lightweight Deep Face Recognition and Facial Attribute Analysis (Age, Gender, Emotion and Race) Framework for Python" + } + repositories: { + url: "https://github.com/AmirDavoodi/Hand-Gestures-Human-Robot-Interaction" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "This project is the final project of the course Robotics 2019 and we are implementing hand gesture classifier to using it for controlling Mighty Thymio robot which is a differential robot." + } + repositories: { + url: "https://github.com/bleedingfight/caffe-env" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } + repositories: { + url: "https://github.com/KostadinovShalon/UAVDetectionTrackingBenchmark" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + } + repositories: { + url: "https://github.com/jaykshirsagar05/CrowdCounting" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/ashwath007/amenity-detection" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "aminity-detection" + } + methods: { + name: "SGD with Momentum" + full_name: "SGD with Momentum" + description: "**SGD with Momentum** is a stochastic optimization method that adds a momentum term to regular stochastic gradient descent:\r\n\r\n$$v\\_{t} = \\gamma{v}\\_{t-1} + \\eta\\nabla\\_{\\theta}J\\left(\\theta\\right)$$\r\n$$\\theta\\_{t} = \\theta\\_{t-1} - v\\_{t} $$\r\n\r\nA typical value for $\\gamma$ is $0.9$. The momentum name comes from an analogy to physics, such as ball accelerating down a slope. In the case of weight updates, we can think of the weights as a particle traveling through parameter space which incurs acceleration from the gradient of the loss.\r\n\r\nImage Source: [Juan Du](https://www.researchgate.net/figure/The-compare-of-the-SGD-algorithms-with-and-without-momentum-Take-Task-1-as-example-The_fig1_333469047)" + } + methods: { + name: "SSD" + full_name: "SSD" + description: "**SSD** is a single-stage object detection method that discretizes the output space of bounding boxes into a set of default boxes over different aspect ratios and scales per feature map location. At prediction time, the network generates scores for the presence of each object category in each default box and produces adjustments to the box to better match the object shape. Additionally, the network combines predictions from multiple feature maps with different resolutions to naturally handle objects of various sizes. \r\n\r\nThe fundamental improvement in speed comes from eliminating bounding box proposals and the subsequent pixel or feature resampling stage. Improvements over competing single-stage methods include using a small convolutional filter to predict object categories and offsets in bounding box locations, using separate predictors (filters) for different aspect ratio detections, and applying these filters to multiple feature maps from the later stages of a network in order to perform detection at multiple scales." + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Non Maximum Suppression" + full_name: "Non Maximum Suppression" + description: "**Non Maximum Suppression** is a computer vision method that selects a single entity out of many overlapping entities (for example bounding boxes in object detection). The criteria is usually discarding entities that are below a given probability bound. With remaining entities we repeatedly pick the entity with the highest probability, output that as the prediction, and discard any remaining box where a $\\text{IoU} \\geq 0.5$ with the box output in the previous step.\r\n\r\nImage Credit: [Martin Kersner](https://github.com/martinkersner/non-maximum-suppression-cpp)" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "VGG" + full_name: "VGG" + description: "**VGG** is a classical convolutional neural network architecture. It was based on an analysis of how to increase the depth of such networks. The network utilises small 3 x 3 filters. Otherwise the network is characterized by its simplicity: the only other components being pooling layers and a fully connected layer.\r\n\r\nImage: [Davi Frossard](https://www.cs.toronto.edu/frossard/post/vgg16/)" + } + methods: { + name: "Max Pooling" + full_name: "Max Pooling" + description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" + } + methods: { + name: "Weight Decay" + full_name: "Weight Decay" + description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + } + } + video: { + video_id: "ej1ISEoAK5g" + video_title: "PR-132: SSD: Single Shot MultiBox Detector" + number_of_likes: 118 + number_of_views: 10404 + published_date: { + seconds: 1546786878 + } + uploader: "JinWon Lee" + } + } +} +pr_id_to_video: { + key: 133 + value: { + pr_id: 133 + papers: { + paper_id: "accurate-large-minibatch-sgd-training" + title: "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour" + arxiv_id: "1706.02677" + abstract: "Deep learning thrives with large neural networks and large datasets. However,\nlarger networks and larger datasets result in longer training times that impede\nresearch and development progress. Distributed synchronous SGD offers a\npotential solution to this problem by dividing SGD minibatches over a pool of\nparallel workers. Yet to make this scheme efficient, the per-worker workload\nmust be large, which implies nontrivial growth in the SGD minibatch size. In\nthis paper, we empirically show that on the ImageNet dataset large minibatches\ncause optimization difficulties, but when these are addressed the trained\nnetworks exhibit good generalization. Specifically, we show no loss of accuracy\nwhen training with large minibatch sizes up to 8192 images. To achieve this\nresult, we adopt a hyper-parameter-free linear scaling rule for adjusting\nlearning rates as a function of minibatch size and develop a new warmup scheme\nthat overcomes optimization challenges early in training. With these simple\ntechniques, our Caffe2-based system trains ResNet-50 with a minibatch size of\n8192 on 256 GPUs in one hour, while matching small minibatch accuracy. Using\ncommodity hardware, our implementation achieves ~90% scaling efficiency when\nmoving from 8 to 256 GPUs. Our findings enable training visual recognition\nmodels on internet-scale data with high efficiency." + pub_date: { + seconds: 1496880000 + } + authors: "Priya Goyal" + authors: "Piotr Dollár" + authors: "Ross Girshick" + authors: "Pieter Noordhuis" + authors: "Lukasz Wesolowski" + authors: "Aapo Kyrola" + authors: "Andrew Tulloch" + authors: "Yangqing Jia" + authors: "Kaiming He" + repositories: { + url: "https://github.com/luminxu/ViPNAS" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: "The official repo for CVPR2021——ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search." + } + repositories: { + url: "https://github.com/nerminsamet/HPRNet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 19 + } + repositories: { + url: "https://github.com/IVRL/FG-NIC" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "Fidelity-Guided Noisy Image Classification" + } + repositories: { + url: "https://github.com/vycezhong/byteps-compress" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/YeLyuUT/VOSDetectron" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "Combination of Mask RCNN with ConvGRU for video object segmentation" + } + repositories: { + url: "https://github.com/MarcAntoineAlex/darts" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/kikacaty/adv_guide" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/HRNet/Lite-HRNet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 340 + description: "This is an official pytorch implementation of Lite-HRNet: A Lightweight High-Resolution Network. " + } + repositories: { + url: "https://github.com/serend1p1ty/SeqNet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 113 + description: "Code for AAAI 2021 paper: Sequential End-to-end Network for Efficient Person Search" + } + repositories: { + url: "https://github.com/ericyang789/Parallel-Compute-Project" + framework: FRAMEWORK_OTHERS + description: "C implementation of t-SNE with parallelization optimization" + } + methods: { + name: "SGD" + full_name: "Stochastic Gradient Descent" + description: "**Stochastic Gradient Descent** is an iterative optimization technique that uses minibatches of data to form an expectation of the gradient, rather than the full gradient using all available data. That is for weights $w$ and a loss function $L$ we have:\r\n\r\n$$ w\\_{t+1} = w\\_{t} - \\eta\\hat{\\nabla}\\_{w}{L(w\\_{t})} $$\r\n\r\nWhere $\\eta$ is a learning rate. SGD reduces redundancy compared to batch gradient descent - which recomputes gradients for similar examples before each parameter update - so it is usually much faster.\r\n\r\n(Image Source: [here](http://rasbt.github.io/mlxtend/user_guide/general_concepts/gradient-optimization/))" + } + } + video: { + video_id: "g3McZgloCJo" + video_title: "PR-133: Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour" + number_of_likes: 9 + number_of_views: 652 + published_date: { + seconds: 1547454308 + } + uploader: "Young Seok Kim" + } + } +} +pr_id_to_video: { + key: 134 + value: { + pr_id: 134 + papers: { + paper_id: "how-does-batch-normalization-help" + title: "How Does Batch Normalization Help Optimization?" + arxiv_id: "1805.11604" + abstract: "Batch Normalization (BatchNorm) is a widely adopted technique that enables\nfaster and more stable training of deep neural networks (DNNs). Despite its\npervasiveness, the exact reasons for BatchNorm's effectiveness are still poorly\nunderstood. The popular belief is that this effectiveness stems from\ncontrolling the change of the layers' input distributions during training to\nreduce the so-called \"internal covariate shift\". In this work, we demonstrate\nthat such distributional stability of layer inputs has little to do with the\nsuccess of BatchNorm. Instead, we uncover a more fundamental impact of\nBatchNorm on the training process: it makes the optimization landscape\nsignificantly smoother. This smoothness induces a more predictive and stable\nbehavior of the gradients, allowing for faster training." + pub_date: { + seconds: 1527552000 + } + authors: "Shibani Santurkar" + authors: "Dimitris Tsipras" + authors: "Andrew Ilyas" + authors: "Aleksander Madry" + repositories: { + url: "https://github.com/yaoshiang/MobileNetV2-CIFAR-Cleverhans" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/AchintyaX/Brain_tumor_segmentation" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } + repositories: { + url: "https://github.com/utsawk/CarND-Traffic-Sign-Classifier-Project" + framework: FRAMEWORK_OTHERS + description: "Udacity CarND Traffic Sign Classifier Project" + } + repositories: { + url: "https://github.com/jadevaibhav/Brain-Tumor-Segmentation-using-Deep-Neural-networks" + framework: FRAMEWORK_OTHERS + number_of_stars: 79 + description: "Keras implementation of paper by the same name" + } + repositories: { + url: "https://github.com/peteraugustine/seg3" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/ajinas-ibrahim/brain_tumor" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/tobinthankachan1/exam1" + framework: FRAMEWORK_OTHERS + } + } + video: { + video_id: "hiN0IMM50FM" + video_title: "PR-134 How Does Batch Normalization Help Optimization?" + number_of_likes: 14 + number_of_views: 1001 + published_date: { + seconds: 1548117640 + } + uploader: "visionNoob" + } + } +} +pr_id_to_video: { + key: 135 + value: { + pr_id: 135 + papers: { + paper_id: "photo-wake-up-3d-character-animation-from-a" + title: "Photo Wake-Up: 3D Character Animation from a Single Photo" + arxiv_id: "1812.02246" + abstract: "We present a method and application for animating a human subject from a\nsingle photo. E.g., the character can walk out, run, sit, or jump in 3D. The\nkey contributions of this paper are: 1) an application of viewing and animating\nhumans in single photos in 3D, 2) a novel 2D warping method to deform a posable\ntemplate body model to fit the person's complex silhouette to create an\nanimatable mesh, and 3) a method for handling partial self occlusions. We\ncompare to state-of-the-art related methods and evaluate results with human\nstudies. Further, we present an interactive interface that allows re-posing the\nperson in 3D, and an augmented reality setup where the animated 3D person can\nemerge from the photo into the real world. We demonstrate the method on photos,\nposters, and art." + pub_date: { + seconds: 1543968000 + } + authors: "Chung-Yi Weng" + authors: "Brian Curless" + authors: "Ira Kemelmacher-Shlizerman" + } + video: { + video_id: "LSlBoNNbULg" + video_title: "PR-135: Photo Wake-Up: 3D Character Animation from a Single Photo" + number_of_likes: 55 + number_of_views: 2976 + published_date: { + seconds: 1548003936 + } + uploader: "Soyeon Kim" + } + } +} +pr_id_to_video: { + key: 136 + value: { + pr_id: 136 + papers: { + paper_id: "self-supervised-generative-adversarial" + title: "Self-Supervised GANs via Auxiliary Rotation Loss" + arxiv_id: "1811.11212" + abstract: "Conditional GANs are at the forefront of natural image synthesis. The main\ndrawback of such models is the necessity for labeled data. In this work we\nexploit two popular unsupervised learning techniques, adversarial training and\nself-supervision, and take a step towards bridging the gap between conditional\nand unconditional GANs. In particular, we allow the networks to collaborate on\nthe task of representation learning, while being adversarial with respect to\nthe classic GAN game. The role of self-supervision is to encourage the\ndiscriminator to learn meaningful feature representations which are not\nforgotten during training. We test empirically both the quality of the learned\nimage representations, and the quality of the synthesized images. Under the\nsame conditions, the self-supervised GAN attains a similar performance to\nstate-of-the-art conditional counterparts. Finally, we show that this approach\nto fully unsupervised learning can be scaled to attain an FID of 23.4 on\nunconditional ImageNet generation." + pub_date: { + seconds: 1543276800 + } + authors: "Ting Chen" + authors: "Xiaohua Zhai" + authors: "Marvin Ritter" + authors: "Mario Lucic" + authors: "Neil Houlsby" + repositories: { + is_official: true + url: "https://github.com/google/compare_gan" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1731 + description: "Compare GAN code." + } + repositories: { + url: "https://github.com/zhangqianhui/Self-Supervised-GANs" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 30 + description: "Tensorflow Implementation for paper \"self-supervised generative adversarial networks\"" + } + repositories: { + url: "https://github.com/vandit15/Self-Supervised-Gans-Pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 50 + description: "Ready to train Pytorch implementation of the CVPR'19 paper \"Self-Supervised GANs via Auxiliary Rotation Loss\"" + } + methods: { + name: "GAN" + full_name: "Generative Adversarial Network" + description: "A **GAN**, or **Generative Adversarial Network**, is a generative model that simultaneously trains\r\ntwo models: a generative model $G$ that captures the data distribution, and a discriminative model $D$ that estimates the\r\nprobability that a sample came from the training data rather than $G$.\r\n\r\nThe training procedure for $G$ is to maximize the probability of $D$ making\r\na mistake. This framework corresponds to a minimax two-player game. In the\r\nspace of arbitrary functions $G$ and $D$, a unique solution exists, with $G$\r\nrecovering the training data distribution and $D$ equal to $\\frac{1}{2}$\r\neverywhere. In the case where $G$ and $D$ are defined by multilayer perceptrons,\r\nthe entire system can be trained with backpropagation. \r\n\r\n(Image Source: [here](http://www.kdnuggets.com/2017/01/generative-adversarial-networks-hot-topic-machine-learning.html))" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + } + video: { + video_id: "_wpDP-6afM4" + video_title: "PR-136 Self-Supervised Generative Adversarial Networks" + number_of_likes: 23 + number_of_views: 1085 + published_date: { + seconds: 1547995361 + } + uploader: "강민국" + } + } +} +pr_id_to_video: { + key: 137 + value: { + pr_id: 137 + papers: { + paper_id: "mine-mutual-information-neural-estimation" + title: "MINE: Mutual Information Neural Estimation" + arxiv_id: "1801.04062" + abstract: "We argue that the estimation of mutual information between high dimensional\ncontinuous random variables can be achieved by gradient descent over neural\nnetworks. We present a Mutual Information Neural Estimator (MINE) that is\nlinearly scalable in dimensionality as well as in sample size, trainable\nthrough back-prop, and strongly consistent. We present a handful of\napplications on which MINE can be used to minimize or maximize mutual\ninformation. We apply MINE to improve adversarially trained generative models.\nWe also use MINE to implement Information Bottleneck, applying it to supervised\nclassification; our results demonstrate substantial improvement in flexibility\nand performance in these settings." + pub_date: { + seconds: 1515715200 + } + authors: "Mohamed Ishmael Belghazi" + authors: "Aristide Baratin" + authors: "Sai Rajeswar" + authors: "Sherjil Ozair" + authors: "Yoshua Bengio" + authors: "Aaron Courville" + authors: "R Devon Hjelm" + repositories: { + url: "https://github.com/ahujak/KKLE" + framework: FRAMEWORK_OTHERS + description: "Estimating KL Divergence" + } + repositories: { + url: "https://github.com/sambklein/MINE_demo" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/gtegner/hyper-gan" + framework: FRAMEWORK_PYTORCH + description: "Uncertainty Estimation with HyperGANS in PyTorch!" + } + repositories: { + url: "https://github.com/MasanoriYamada/Mine_pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 156 + description: "MINE: Mutual Information Neural Estimation in pytorch (unofficial)" + } + repositories: { + url: "https://github.com/mzgubic/MINE" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 37 + description: "Mutual Information Neural Estimator implemented in Tensorflow" + } + repositories: { + url: "https://github.com/csliuwei/Emotion_MI" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/Avi-avidan/MINE" + framework: FRAMEWORK_PYTORCH + description: "Multi Information Neural Encoder " + } + repositories: { + url: "https://github.com/dizcza/EmbedderSDR" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + description: "Embedder with binary sparse distributed representation." + } + repositories: { + url: "https://github.com/shannonycj/simple-mine" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "A tensorflow implementation of Mutual Information Nerual Estimation" + } + repositories: { + url: "https://github.com/ChengzhangZhu/MINE" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 14 + description: "Keras implementation (only for tensorflow backend) of MINE: Mutual Information Neural Estimation" + } + } + video: {} + } +} +pr_id_to_video: { + key: 138 + value: { + pr_id: 138 + papers: { + paper_id: "overcoming-limitations-of-mixture-density-1" + title: "Overcoming Limitations of Mixture Density Networks: A Sampling and Fitting Framework for Multimodal Future Prediction" + arxiv_id: "1906.03631" + abstract: "Future prediction is a fundamental principle of intelligence that helps plan actions and avoid possible dangers. As the future is uncertain to a large extent, modeling the uncertainty and multimodality of the future states is of great relevance. Existing approaches are rather limited in this regard and mostly yield a single hypothesis of the future or, at the best, strongly constrained mixture components that suffer from instabilities in training and mode collapse. In this work, we present an approach that involves the prediction of several samples of the future with a winner-takes-all loss and iterative grouping of samples to multiple modes. Moreover, we discuss how to evaluate predicted multimodal distributions, including the common real scenario, where only a single sample from the ground-truth distribution is available for evaluation. We show on synthetic and real data that the proposed approach triggers good estimates of multimodal distributions and avoids mode collapse. Source code is available at $\\href{https://github.com/lmb-freiburg/Multimodal-Future-Prediction}{\\text{this https URL.}}$" + pub_date: { + seconds: 1560038400 + } + authors: "Osama Makansi" + authors: "Eddy Ilg" + authors: "Özgün Cicek" + authors: "Thomas Brox" + repositories: { + is_official: true + url: "https://github.com/lmb-freiburg/Multimodal-Future-Prediction" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 27 + description: "The official repository for the CVPR 2019 paper \"Overcoming Limitations of Mixture Density Networks: A Sampling and Fitting Framework for Multimodal Future Prediction\"" + } + } + video: { + video_id: "VORJQQUphuw" + video_title: "PR-138: Mixture Density Network" + number_of_views: 2390 + published_date: { + seconds: 1548599784 + } + uploader: "Byung-Hak Kim" + } + } +} +pr_id_to_video: { + key: 139 + value: { + pr_id: 139 + papers: { + paper_id: "fully-convolutional-siamese-networks-for-1" + title: "Fully-Convolutional Siamese Networks for Object Tracking" + arxiv_id: "1606.09549" + abstract: "The problem of arbitrary object tracking has traditionally been tackled by\nlearning a model of the object's appearance exclusively online, using as sole\ntraining data the video itself. Despite the success of these methods, their\nonline-only approach inherently limits the richness of the model they can\nlearn. Recently, several attempts have been made to exploit the expressive\npower of deep convolutional networks. However, when the object to track is not\nknown beforehand, it is necessary to perform Stochastic Gradient Descent online\nto adapt the weights of the network, severely compromising the speed of the\nsystem. In this paper we equip a basic tracking algorithm with a novel\nfully-convolutional Siamese network trained end-to-end on the ILSVRC15 dataset\nfor object detection in video. Our tracker operates at frame-rates beyond\nreal-time and, despite its extreme simplicity, achieves state-of-the-art\nperformance in multiple benchmarks." + pub_date: { + seconds: 1467244800 + } + authors: "Luca Bertinetto" + authors: "Jack Valmadre" + authors: "João F. Henriques" + authors: "Andrea Vedaldi" + authors: "Philip H. S. Torr" + repositories: { + url: "https://github.com/logiklesuraj/siamfcex" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/logiklesuraj/SiamFC" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/suraj-maniyar/Object-Tracking-SSD300" + framework: FRAMEWORK_PYTORCH + description: "Object tracking using SSD" + } + repositories: { + url: "https://github.com/zllrunning/SiameseX.PyTorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 340 + description: "A simplified PyTorch implementation of Siamese networks for tracking: SiamFC, SiamRPN, SiamRPN++, SiamVGG, SiamDW, SiamRPN-VGG." + } + repositories: { + url: "https://github.com/shallowtoil/DROL" + framework: FRAMEWORK_PYTORCH + number_of_stars: 58 + description: "Discriminative and Robust Online Learning for Siamese Visual Tracking [AAAI2020]" + } + methods: { + name: "Siamese Network" + full_name: "Siamese Network" + description: "A **Siamese Network** consists of twin networks which accept distinct inputs but are joined by an energy function at the top. This function computes a metric between the highest level feature representation on each side. The parameters between the twin networks are tied. Weight tying guarantees that two extremely similar images are not mapped by each network to very different locations in feature space because each network computes the same function. The network is symmetric, so that whenever we present two distinct images to the twin networks, the top conjoining layer will compute the same metric as if we were to we present the same two images but to the opposite twins.\r\n\r\nIntuitively instead of trying to classify inputs, a siamese network learns to differentiate between inputs, learning their similarity. The loss function used is usually a form of contrastive loss.\r\n\r\nSource: [Koch et al](https://www.cs.cmu.edu/~rsalakhu/papers/oneshot1.pdf)" + } + } + video: { + video_id: "dv5yUl6Lw1g" + video_title: "PR-139: Fully Convolutional Siamese Networks for Object Tracking" + number_of_likes: 38 + number_of_views: 2850 + published_date: { + seconds: 1549845265 + } + uploader: "Taeoh Kim" + } + } +} +pr_id_to_video: { + key: 140 + value: { + pr_id: 140 + papers: { + paper_id: "training-set-debugging-using-trusted-items" + title: "Training Set Debugging Using Trusted Items" + arxiv_id: "1801.08019" + abstract: "Training set bugs are flaws in the data that adversely affect machine\nlearning. The training set is usually too large for man- ual inspection, but\none may have the resources to verify a few trusted items. The set of trusted\nitems may not by itself be adequate for learning, so we propose an algorithm\nthat uses these items to identify bugs in the training set and thus im- proves\nlearning. Specifically, our approach seeks the smallest set of changes to the\ntraining set labels such that the model learned from this corrected training\nset predicts labels of the trusted items correctly. We flag the items whose\nlabels are changed as potential bugs, whose labels can be checked for veracity\nby human experts. To find the bugs in this way is a challenging combinatorial\nbilevel optimization problem, but it can be relaxed into a continuous\noptimization problem. Ex- periments on toy and real data demonstrate that our\napproach can identify training set bugs effectively and suggest appro- priate\nchanges to the labels. Our algorithm is a step toward trustworthy machine\nlearning." + pub_date: { + seconds: 1516752000 + } + authors: "Xuezhou Zhang" + authors: "Xiaojin Zhu" + authors: "Stephen J. Wright" + } + video: { + video_id: "_2l2UFIF08Q" + video_title: "PR-140: Training Set Debugging Using Trusted Items" + number_of_likes: 5 + number_of_views: 613 + published_date: { + seconds: 1549810486 + } + uploader: "taesu" + } + } +} +pr_id_to_video: { + key: 141 + value: { + pr_id: 141 + papers: { + paper_id: "auto-deeplab-hierarchical-neural-architecture" + title: "Auto-DeepLab: Hierarchical Neural Architecture Search for Semantic Image Segmentation" + arxiv_id: "1901.02985" + abstract: "Recently, Neural Architecture Search (NAS) has successfully identified neural\nnetwork architectures that exceed human designed ones on large-scale image\nclassification. In this paper, we study NAS for semantic image segmentation.\nExisting works often focus on searching the repeatable cell structure, while\nhand-designing the outer network structure that controls the spatial resolution\nchanges. This choice simplifies the search space, but becomes increasingly\nproblematic for dense image prediction which exhibits a lot more network level\narchitectural variations. Therefore, we propose to search the network level\nstructure in addition to the cell level structure, which forms a hierarchical\narchitecture search space. We present a network level search space that\nincludes many popular designs, and develop a formulation that allows efficient\ngradient-based architecture search (3 P100 GPU days on Cityscapes images). We\ndemonstrate the effectiveness of the proposed method on the challenging\nCityscapes, PASCAL VOC 2012, and ADE20K datasets. Auto-DeepLab, our\narchitecture searched specifically for semantic image segmentation, attains\nstate-of-the-art performance without any ImageNet pretraining." + pub_date: { + seconds: 1547078400 + } + authors: "Chenxi Liu" + authors: "Liang-Chieh Chen" + authors: "Florian Schroff" + authors: "Hartwig Adam" + authors: "Wei Hua" + authors: "Alan Yuille" + authors: "Li Fei-Fei" + repositories: { + is_official: true + url: "https://github.com/tensorflow/models" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 70339 + description: "Models and examples built with TensorFlow" + } + repositories: { + url: "https://github.com/Dawars/auto_deeplab-pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 29 + description: "[wip] Implementation of found architecture in Auto Deeplab" + } + repositories: { + url: "https://github.com/MenghaoGuo/AutoDeeplab" + framework: FRAMEWORK_PYTORCH + number_of_stars: 380 + description: "Pytorch Implementation the paper Auto-DeepLab Hierarchical Neural Architecture Search for Semantic Image Segmentation" + } + repositories: { + url: "https://github.com/NoamRosenberg/autodeeplab" + framework: FRAMEWORK_PYTORCH + number_of_stars: 270 + description: "AutoDeeplab / auto-deeplab / AutoML for semantic segmentation, implemented in Pytorch" + } + methods: { + name: "Tanh Activation" + full_name: "Tanh Activation" + description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "LSTM" + full_name: "Long Short-Term Memory" + description: "An **LSTM** is a type of [recurrent neural network](https://paperswithcode.com/methods/category/recurrent-neural-networks) that addresses the vanishing gradient problem in vanilla RNNs through additional cells, input and output gates. Intuitively, vanishing gradients are solved through additional *additive* components, and forget gate activations, that allow the gradients to flow through the network without vanishing as quickly.\r\n\r\n(Image Source [here](https://medium.com/datadriveninvestor/how-do-lstm-networks-solve-the-problem-of-vanishing-gradients-a6784971a577))\r\n\r\n(Introduced by Hochreiter and Schmidhuber)" + } + methods: { + name: "Sigmoid Activation" + full_name: "Sigmoid Activation" + description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." + } + } + video: { + video_id: "ltlhQXHGzgE" + video_title: "PR-141: Auto-DeepLab: Hierarchical Neural Architecture Search for Semantic Image Segmentation" + number_of_likes: 19 + number_of_views: 1694 + published_date: { + seconds: 1550413961 + } + uploader: "Taekmin Kim" + } + } +} +pr_id_to_video: { + key: 142 + value: { + pr_id: 142 + papers: { + paper_id: "wasserstein-gan" + title: "Wasserstein GAN" + arxiv_id: "1701.07875" + abstract: "We introduce a new algorithm named WGAN, an alternative to traditional GAN\ntraining. In this new model, we show that we can improve the stability of\nlearning, get rid of problems like mode collapse, and provide meaningful\nlearning curves useful for debugging and hyperparameter searches. Furthermore,\nwe show that the corresponding optimization problem is sound, and provide\nextensive theoretical work highlighting the deep connections to other distances\nbetween distributions." + pub_date: { + seconds: 1485388800 + } + authors: "Martin Arjovsky" + authors: "Soumith Chintala" + authors: "Léon Bottou" + repositories: { + url: "https://github.com/bhargavajs07/Packed-Wasserstein-GAN-with-GradientPenalty-Example" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/VitoRazor/Gan_Architecture" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + } + repositories: { + url: "https://github.com/sanghyun-son/EDSR-PyTorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1656 + description: "PyTorch version of the paper 'Enhanced Deep Residual Networks for Single Image Super-Resolution' (CVPRW 2017) " + } + repositories: { + url: "https://github.com/shekkizh/WassersteinGAN.tensorflow" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 419 + description: "Tensorflow implementation of Wasserstein GAN - arxiv: https://arxiv.org/abs/1701.07875" + } + repositories: { + url: "https://github.com/lab-ml/annotated_deep_learning_paper_implementations/tree/master/labml_nn/gan/wasserstein" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3068 + description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." + } + repositories: { + url: "https://github.com/Ars235/Novelty_Detection" + framework: FRAMEWORK_PYTORCH + description: "PyTorch implementation of Adversarially Learned One-Class Classifier for Novelty Detection" + } + repositories: { + url: "https://github.com/ChristophReich1996/Mode_Collapse" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + description: "Mode collapse example of GANs in 2D (PyTorch)." + } + repositories: { + url: "https://github.com/rkem1542/EDSR-pytorch" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/kynk94/TF2-Image-Generation" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 9 + description: "Tensorflow2 reimplementation of image generation model (GAN, Style Transfer, Image to Image Translation, etc)" + } + repositories: { + url: "https://github.com/laowng/GISR" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "From EDSR" + } + methods: { + name: "WGAN" + full_name: "Wasserstein GAN" + description: "**Wasserstein GAN**, or **WGAN**, is a type of generative adversarial network that minimizes an approximation of the Earth-Mover's distance (EM) rather than the Jensen-Shannon divergence as in the original GAN formulation. It leads to more stable training than original GANs with less evidence of mode collapse, as well as meaningful curves that can be used for debugging and searching hyperparameters." + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + } + video: { + video_id: "tKQwlf-DAl0" + video_title: "PR-142: Wasserstein GAN" + number_of_likes: 34 + number_of_views: 1982 + published_date: { + seconds: 1550412193 + } + uploader: "Jinsung Yoon" + } + } +} +pr_id_to_video: { + key: 143 + value: { + pr_id: 143 + papers: { + paper_id: "recurrent-world-models-facilitate-policy" + title: "Recurrent World Models Facilitate Policy Evolution" + arxiv_id: "1809.01999" + abstract: "A generative recurrent neural network is quickly trained in an unsupervised\nmanner to model popular reinforcement learning environments through compressed\nspatio-temporal representations. The world model's extracted features are fed\ninto compact and simple policies trained by evolution, achieving state of the\nart results in various environments. We also train our agent entirely inside of\nan environment generated by its own internal world model, and transfer this\npolicy back into the actual environment. Interactive version of paper at\nhttps://worldmodels.github.io" + pub_date: { + seconds: 1536019200 + } + authors: "David Ha" + authors: "Jürgen Schmidhuber" + } + video: { + video_id: "APjGjwBR6o8" + video_title: "PR-143: Recurrent World Models Facilitate Policy Evolution" + number_of_likes: 8 + number_of_views: 550 + published_date: { + seconds: 1551026446 + } + uploader: "정지원" + } + } +} +pr_id_to_video: { + key: 144 + value: { + pr_id: 144 + papers: { + paper_id: "squeezenext-hardware-aware-neural-network" + title: "SqueezeNext: Hardware-Aware Neural Network Design" + arxiv_id: "1803.10615" + abstract: "One of the main barriers for deploying neural networks on embedded systems\nhas been large memory and power consumption of existing neural networks. In\nthis work, we introduce SqueezeNext, a new family of neural network\narchitectures whose design was guided by considering previous architectures\nsuch as SqueezeNet, as well as by simulation results on a neural network\naccelerator. This new network is able to match AlexNet's accuracy on the\nImageNet benchmark with $112\\times$ fewer parameters, and one of its deeper\nvariants is able to achieve VGG-19 accuracy with only 4.4 Million parameters,\n($31\\times$ smaller than VGG-19). SqueezeNext also achieves better top-5\nclassification accuracy with $1.3\\times$ fewer parameters as compared to\nMobileNet, but avoids using depthwise-separable convolutions that are\ninefficient on some mobile processor platforms. This wide range of accuracy\ngives the user the ability to make speed-accuracy tradeoffs, depending on the\navailable resources on the target hardware. Using hardware simulation results\nfor power and inference speed on an embedded system has guided us to design\nvariations of the baseline model that are $2.59\\times$/$8.26\\times$ faster and\n$2.25\\times$/$7.5\\times$ more energy efficient as compared to\nSqueezeNet/AlexNet without any accuracy degradation." + pub_date: { + seconds: 1521763200 + } + authors: "Amir Gholami" + authors: "Kiseok Kwon" + authors: "Bichen Wu" + authors: "Zizheng Tai" + authors: "Xiangyu Yue" + authors: "Peter Jin" + authors: "Sicheng Zhao" + authors: "Kurt Keutzer" + repositories: { + url: "https://github.com/osmr/imgclsmob" + framework: FRAMEWORK_OTHERS + number_of_stars: 2202 + description: "Sandbox for training deep learning networks" + } + repositories: { + url: "https://github.com/luuuyi/SqueezeNext.PyTorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 25 + description: "re-implement for paper: SqueezeNext: Hardware-Aware Neural Network Design. (SqueezeNext)" + } + repositories: { + url: "https://github.com/Timen/squeezenext-tensorflow" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 54 + description: "A tensorflow implementation of squeezenext. (includes link to trained model)" + } + repositories: { + url: "https://github.com/x5675602/SqeezeNet" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/amirgholami/SqueezeNext" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 105 + } + methods: { + name: "Spatially Separable Convolution" + full_name: "Spatially Separable Convolution" + description: "A **Spatially Separable Convolution** decomposes a convolution into two separate operations. In regular convolution, if we have a 3 x 3 kernel then we directly convolve this with the image. We can divide a 3 x 3 kernel into a 3 x 1 kernel and a 1 x 3 kernel. Then, in spatially separable convolution, we first convolve the 3 x 1 kernel then the 1 x 3 kernel. This requires 6 instead of 9 parameters compared to regular convolution, and so it is more parameter efficient (additionally less matrix multiplications are required).\r\n\r\nImage Source: [Kunlun Bai](https://towardsdatascience.com/a-comprehensive-introduction-to-different-types-of-convolutions-in-deep-learning-669281e58215)" + } + methods: { + name: "Fire Module" + full_name: "Fire Module" + description: "A **Fire Module** is a building block for convolutional neural networks, notably used as part of [SqueezeNet](https://paperswithcode.com/method/squeezenet). A Fire module is comprised of: a squeeze convolution layer (which has only 1x1 filters), feeding into an expand layer that has a mix of 1x1 and 3x3 convolution filters. We expose three tunable dimensions (hyperparameters) in a Fire module: $s\\_{1x1}$, $e\\_{1x1}$, and $e\\_{3x3}$. In a Fire module, $s\\_{1x1}$ is the number of filters in the squeeze layer (all 1x1), $e\\_{1x1}$ is the number of 1x1 filters in the expand layer, and $e\\_{3x3}$ is the number of 3x3 filters in the expand layer. When we use Fire modules we set $s\\_{1x1}$ to be less than ($e\\_{1x1}$ + $e\\_{3x3}$), so the squeeze layer helps to limit the number of input channels to the 3x3 filters." + } + methods: { + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + } + methods: { + name: "Weight Decay" + full_name: "Weight Decay" + description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + } + methods: { + name: "SqueezeNet" + full_name: "SqueezeNet" + description: "**SqueezeNet** is a convolutional neural network that employs design strategies to reduce the number of parameters, notably with the use of fire modules that \"squeeze\" parameters using 1x1 convolutions." + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "Xavier Initialization" + full_name: "Xavier Initialization" + description: "**Xavier Initialization**, or **Glorot Initialization**, is an initialization scheme for neural networks. Biases are initialized be 0 and the weights $W\\_{ij}$ at each layer are initialized as:\r\n\r\n$$ W\\_{ij} \\sim U\\left[-\\frac{1}{\\sqrt{n}}, \\frac{1}{\\sqrt{n}}\\right] $$\r\n\r\nWhere $U$ is a uniform distribution and $n$ is the size of the previous layer (number of columns in $W$)." + } + } + video: { + video_id: "WReWeADJ3Pw" + video_title: "PR-144: SqueezeNext: Hardware-Aware Neural Network Design" + number_of_likes: 33 + number_of_views: 2018 + published_date: { + seconds: 1551018415 + } + uploader: "JinWon Lee" + } + } +} +pr_id_to_video: { + key: 145 + value: { + pr_id: 145 + papers: { + paper_id: "visualizing-attention-in-transformer-based" + title: "Visualizing Attention in Transformer-Based Language Representation Models" + arxiv_id: "1904.02679" + abstract: "We present an open-source tool for visualizing multi-head self-attention in\nTransformer-based language representation models. The tool extends earlier work\nby visualizing attention at three levels of granularity: the attention-head\nlevel, the model level, and the neuron level. We describe how each of these\nviews can help to interpret the model, and we demonstrate the tool on the BERT\nmodel and the OpenAI GPT-2 model. We also present three use cases for analyzing\nGPT-2: detecting model bias, identifying recurring patterns, and linking\nneurons to model behavior." + pub_date: { + seconds: 1554336000 + } + authors: "Jesse Vig" + methods: { + name: "Scaled Dot-Product Attention" + full_name: "Scaled Dot-Product Attention" + description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "Weight Decay" + full_name: "Weight Decay" + description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + } + methods: { + name: "Linear Warmup With Cosine Annealing" + full_name: "Linear Warmup With Cosine Annealing" + description: "**Linear Warmup With Cosine Annealing** is a learning rate schedule where we increase the learning rate linearly for $n$ updates and then anneal according to a cosine schedule afterwards." + } + methods: { + name: "Adam" + full_name: "Adam" + description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + } + methods: { + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + } + methods: { + name: "BPE" + full_name: "Byte Pair Encoding" + description: "**Byte Pair Encoding**, or **BPE**, is a subword segmentation algorithm that encodes rare and unknown words as sequences of subword units. The intuition is that various word classes are translatable via smaller units than words, for instance names (via character copying or transliteration), compounds (via compositional translation), and cognates and loanwords (via phonological and morphological transformations).\r\n\r\n[Lei Mao](https://leimao.github.io/blog/Byte-Pair-Encoding/) has a detailed blog post that explains how this works." + } + methods: { + name: "Discriminative Fine-Tuning" + full_name: "Discriminative Fine-Tuning" + description: "**Discriminative Fine-Tuning** is a fine-tuning strategy that is used for ULMFiT type models. Instead of using the same learning rate for all layers of the model, discriminative fine-tuning allows us to tune each layer with different learning rates. For context, the regular stochastic gradient descent (SGD) update of a model’s parameters $\\theta$ at time step $t$ looks like the following (Ruder, 2016):\r\n\r\n$$ \\theta\\_{t} = \\theta\\_{t-1} − \\eta\\cdot\\nabla\\_{\\theta}J\\left(\\theta\\right)$$\r\n\r\nwhere $\\eta$ is the learning rate and $\\nabla\\_{\\theta}J\\left(\\theta\\right)$ is the gradient with regard to the model’s objective function. For discriminative fine-tuning, we split the parameters $\\theta$ into {$\\theta\\_{1}, \\ldots, \\theta\\_{L}$} where $\\theta\\_{l}$ contains the parameters of the model at the $l$-th layer and $L$ is the number of layers of the model. Similarly, we obtain {$\\eta\\_{1}, \\ldots, \\eta\\_{L}$} where $\\theta\\_{l}$ where $\\eta\\_{l}$ is the learning rate of the $l$-th layer. The SGD update with discriminative finetuning is then:\r\n\r\n$$ \\theta\\_{t}^{l} = \\theta\\_{t-1}^{l} - \\eta^{l}\\cdot\\nabla\\_{\\theta^{l}}J\\left(\\theta\\right) $$\r\n\r\nThe authors find that empirically it worked well to first choose the learning rate $\\eta^{L}$ of the last layer by fine-tuning only the last layer and using $\\eta^{l-1}=\\eta^{l}/2.6$ as the learning rate for lower layers." + } + methods: { + name: "Attention Dropout" + full_name: "Attention Dropout" + description: "**Attention Dropout** is a type of dropout used in attention-based architectures, where elements are randomly dropped out of the softmax in the attention equation. For example, for scaled-dot product attention, we would drop elements from the first term:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}\\left(\\frac{QK^{T}}{\\sqrt{d_k}}\\right)V $$" + } + methods: { + name: "GELU" + full_name: "Gaussian Error Linear Units" + description: "The **Gaussian Error Linear Unit**, or **GELU**, is an activation function. The GELU activation function is $x\\Phi(x)$, where $\\Phi(x)$ the standard Gaussian cumulative distribution function. The GELU nonlinearity weights inputs by their percentile, rather than gates inputs by their sign as in [ReLUs](https://paperswithcode.com/method/relu) ($x\\mathbf{1}_{x>0}$). Consequently the GELU can be thought of as a smoother ReLU.\r\n\r\n$$\\text{GELU}\\left(x\\right) = x{P}\\left(X\\leq{x}\\right) = x\\Phi\\left(x\\right) = x \\cdot \\frac{1}{2}\\left[1 + \\text{erf}(x/\\sqrt{2})\\right],$$\r\nif $X\\sim \\mathcal{N}(0,1)$.\r\n\r\nOne can approximate the GELU with\r\n$0.5x\\left(1+\\tanh\\left[\\sqrt{2/\\pi}\\left(x + 0.044715x^{3}\\right)\\right]\\right)$ or $x\\sigma\\left(1.702x\\right),$\r\nbut PyTorch's exact implementation is sufficiently fast such that these approximations may be unnecessary. (See also the [SiLU](https://paperswithcode.com/method/silu) $x\\sigma(x)$ which was also coined in the paper that introduced the GELU.)\r\n\r\nGELUs are used in GPT-3, BERT, and most other Transformers." + } + } + video: { + video_id: "f5zULULWUwM" + video_title: "PR-145: Language Models are Unsupervised Multitask Learners (OpenAI GPT-2)" + number_of_likes: 13 + number_of_views: 965 + published_date: { + seconds: 1552226192 + } + uploader: "Young Seok Kim" + } + } +} +pr_id_to_video: { + key: 146 + value: { + pr_id: 146 + papers: { + paper_id: "cornernet-detecting-objects-as-paired" + title: "CornerNet: Detecting Objects as Paired Keypoints" + arxiv_id: "1808.01244" + abstract: "We propose CornerNet, a new approach to object detection where we detect an\nobject bounding box as a pair of keypoints, the top-left corner and the\nbottom-right corner, using a single convolution neural network. By detecting\nobjects as paired keypoints, we eliminate the need for designing a set of\nanchor boxes commonly used in prior single-stage detectors. In addition to our\nnovel formulation, we introduce corner pooling, a new type of pooling layer\nthat helps the network better localize corners. Experiments show that CornerNet\nachieves a 42.2% AP on MS COCO, outperforming all existing one-stage detectors." + pub_date: { + seconds: 1533254400 + } + authors: "Hei Law" + authors: "Jia Deng" + repositories: { + url: "https://github.com/open-mmlab/mmdetection" + framework: FRAMEWORK_PYTORCH + number_of_stars: 15379 + description: "OpenMMLab Detection Toolbox and Benchmark" + } + repositories: { + url: "https://github.com/egeonat/MS-CornerNet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "An extension of the CornerNet architecture for RGB+T image inputs" + } + repositories: { + is_official: true + url: "https://github.com/princeton-vl/CornerNet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2214 + } + methods: { + name: "Hourglass Module" + full_name: "Hourglass Module" + description: "An **Hourglass Module** is an image block module used mainly for pose estimation tasks. The design of the hourglass is motivated by the need to capture information at every scale. While local evidence is essential for identifying features like faces and hands, a final pose estimate requires a coherent understanding of the full body. The person’s orientation, the arrangement of their limbs, and the relationships of adjacent joints are among the many cues that are best recognized at different scales in the image. The hourglass is a simple, minimal design that has the capacity to capture all of these features and bring them together to output pixel-wise predictions.\r\n\r\nThe network must have some mechanism to effectively process and consolidate features across scales. The Hourglass uses a single pipeline with skip layers to preserve spatial information at each resolution. The network reaches its lowest resolution at 4x4 pixels allowing smaller spatial filters to be applied that compare features across the entire space of the image.\r\n\r\nThe hourglass is set up as follows: Convolutional and max pooling layers are used to process features down to a very low resolution. At each max pooling step, the network branches off and applies more convolutions at the original pre-pooled resolution. After reaching the lowest resolution, the network begins the top-down sequence of upsampling and combination of features across scales. To bring together information across two adjacent resolutions, we do nearest neighbor upsampling of the lower resolution followed by an elementwise addition of the two sets of features. The topology of the hourglass is symmetric, so for every layer present on the way down there is a corresponding layer going up.\r\n\r\nAfter reaching the output resolution of the network, two consecutive rounds of 1x1 convolutions are applied to produce the final network predictions. The output of the network is a set of heatmaps where for a given heatmap the network predicts the probability of a joint’s presence at each and every pixel." + } + methods: { + name: "Stacked Hourglass Network" + full_name: "Stacked Hourglass Network" + description: "**Stacked Hourglass Networks** are a type of convolutional neural network for pose estimation. They are based on the successive steps of pooling and upsampling that are done to produce a final set of predictions." + } + methods: { + name: "ColorJitter" + full_name: "Color Jitter" + description: "**ColorJitter** is a type of image data augmentation where we randomly change the brightness, contrast and saturation of an image.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" + } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + } + methods: { + name: "Random Resized Crop" + full_name: "Random Resized Crop" + description: "**RandomResizedCrop** is a type of image data augmentation where a crop of random size of the original size and a random aspect ratio of the original aspect ratio is made. This crop is finally resized to given size.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" + } + methods: { + name: "Random Horizontal Flip" + full_name: "Random Horizontal Flip" + description: "**RandomHorizontalFlip** is a type of image data augmentation which horizontally flips a given image with a given probability.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" + } + methods: { + name: "CornerNet" + full_name: "CornerNet" + description: "**CornerNet** is an object detection model that detects an object bounding box as a pair of keypoints, the top-left corner and the bottom-right corner, using a single convolution neural network. By detecting objects as paired keypoints, we eliminate the need for designing a set of anchor boxes commonly used in prior single-stage detectors. It also utilises corner pooling, a new type of pooling layer than helps the network better localize corners." + } + methods: { + name: "Corner Pooling" + full_name: "Corner Pooling" + description: "**Corner Pooling** is a pooling technique for object detection that seeks to better localize corners by encoding explicit prior knowledge. Suppose we want to determine if a pixel at location $\\left(i, j\\right)$ is a top-left corner. Let $f\\_{t}$ and $f\\_{l}$ be the feature maps that are the inputs to the top-left corner pooling layer, and let $f\\_{t\\_{ij}}$ and $f\\_{l\\_{ij}}$ be the vectors at location $\\left(i, j\\right)$ in $f\\_{t}$ and $f\\_{l}$ respectively. With $H \\times W$ feature maps, the corner pooling layer first max-pools all feature vectors between $\\left(i, j\\right)$ and $\\left(i, H\\right)$ in $f\\_{t}$ to a feature vector $t\\_{ij}$ , and max-pools all feature vectors between $\\left(i, j\\right)$ and $\\left(W, j\\right)$ in $f\\_{l}$ to a feature vector $l\\_{ij}$. Finally, it adds $t\\_{ij}$ and $l\\_{ij}$ together." + } + methods: { + name: "Non Maximum Suppression" + full_name: "Non Maximum Suppression" + description: "**Non Maximum Suppression** is a computer vision method that selects a single entity out of many overlapping entities (for example bounding boxes in object detection). The criteria is usually discarding entities that are below a given probability bound. With remaining entities we repeatedly pick the entity with the highest probability, output that as the prediction, and discard any remaining box where a $\\text{IoU} \\geq 0.5$ with the box output in the previous step.\r\n\r\nImage Credit: [Martin Kersner](https://github.com/martinkersner/non-maximum-suppression-cpp)" + } + } + video: { + video_id: "6OYmOtivQY8" + video_title: "PR-146: CornerNet: Detecting Objects as Paired Keypoints" + number_of_likes: 24 + number_of_views: 1873 + published_date: { + seconds: 1570081370 + } + uploader: "visionNoob" + } + } +} +pr_id_to_video: { + key: 147 + value: { + pr_id: 147 + papers: { + paper_id: "learning-deep-structure-preserving-image-text" + title: "Learning Deep Structure-Preserving Image-Text Embeddings" + arxiv_id: "1511.06078" + abstract: "This paper proposes a method for learning joint embeddings of images and text\nusing a two-branch neural network with multiple layers of linear projections\nfollowed by nonlinearities. The network is trained using a large margin\nobjective that combines cross-view ranking constraints with within-view\nneighborhood structure preservation constraints inspired by metric learning\nliterature. Extensive experiments show that our approach gains significant\nimprovements in accuracy for image-to-text and text-to-image retrieval. Our\nmethod achieves new state-of-the-art results on the Flickr30K and MSCOCO\nimage-sentence datasets and shows promise on the new task of phrase\nlocalization on the Flickr30K Entities dataset." + pub_date: { + seconds: 1447891200 + } + authors: "Liwei Wang" + authors: "Yin Li" + authors: "Svetlana Lazebnik" + } + video: { + video_id: "7lyxexSjshc" + video_title: "PR-147: Learning Deep Structure-Preserving Image-Text Embeddings" + number_of_likes: 20 + number_of_views: 576 + published_date: { + seconds: 1552667121 + } + uploader: "Soyeon Kim" + } + } +} +pr_id_to_video: { + key: 148 + value: { + pr_id: 148 + papers: { + paper_id: "deep-anomaly-detection-using-geometric" + title: "Deep Anomaly Detection Using Geometric Transformations" + arxiv_id: "1805.10917" + abstract: "We consider the problem of anomaly detection in images, and present a new\ndetection technique. Given a sample of images, all known to belong to a\n\"normal\" class (e.g., dogs), we show how to train a deep neural model that can\ndetect out-of-distribution images (i.e., non-dog objects). The main idea behind\nour scheme is to train a multi-class model to discriminate between dozens of\ngeometric transformations applied on all the given images. The auxiliary\nexpertise learned by the model generates feature detectors that effectively\nidentify, at test time, anomalous images based on the softmax activation\nstatistics of the model when applied on transformed images. We present\nextensive experiments using the proposed detector, which indicate that our\nalgorithm improves state-of-the-art methods by a wide margin." + pub_date: { + seconds: 1527465600 + } + authors: "Izhak Golan" + authors: "Ran El-Yaniv" + repositories: { + url: "https://github.com/ninatu/anomaly_detection" + framework: FRAMEWORK_PYTORCH + number_of_stars: 12 + description: "This is the official implementation of \"Anomaly Detection with Deep Perceptual Autoencoders\". " + } + repositories: { + is_official: true + url: "https://github.com/izikgo/AnomalyDetectionTransformations" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 129 + description: "A simple and effective method for single-class classification of images" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + } + video: { + video_id: "TgJuUxtLO3s" + video_title: "PR-148 deep anomaly detection using geometric transformations" + number_of_likes: 25 + number_of_views: 1628 + published_date: { + seconds: 1552831505 + } + uploader: "MinGuk Kang" + } + } +} +pr_id_to_video: { + key: 149 + value: { + pr_id: 149 + papers: { + paper_id: "perceptual-losses-for-real-time-style" + title: "Perceptual Losses for Real-Time Style Transfer and Super-Resolution" + arxiv_id: "1603.08155" + abstract: "We consider image transformation problems, where an input image is\ntransformed into an output image. Recent methods for such problems typically\ntrain feed-forward convolutional neural networks using a \\emph{per-pixel} loss\nbetween the output and ground-truth images. Parallel work has shown that\nhigh-quality images can be generated by defining and optimizing\n\\emph{perceptual} loss functions based on high-level features extracted from\npretrained networks. We combine the benefits of both approaches, and propose\nthe use of perceptual loss functions for training feed-forward networks for\nimage transformation tasks. We show results on image style transfer, where a\nfeed-forward network is trained to solve the optimization problem proposed by\nGatys et al in real-time. Compared to the optimization-based method, our\nnetwork gives similar qualitative results but is three orders of magnitude\nfaster. We also experiment with single-image super-resolution, where replacing\na per-pixel loss with a perceptual loss gives visually pleasing results." + pub_date: { + seconds: 1459036800 + } + authors: "Justin Johnson" + authors: "Alexandre Alahi" + authors: "Li Fei-Fei" + repositories: { + url: "https://github.com/jayChung0302/DeepFilter" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "make cool image filters for SNS applications" + } + repositories: { + url: "https://github.com/Josien94/MLiP" + framework: FRAMEWORK_TENSORFLOW + description: "This repository contains code and supplementary material for participated Kaggle Challenges." + } + repositories: { + url: "https://github.com/Arthur-ZHAO-001/Fast-style-transfer" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/milmor/perceptual-losses-neural-style" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5 + description: "Perceptual Losses for Real-Time Style Transfer and Super-Resolution Tensorflow 2 implementation" + } + repositories: { + url: "https://github.com/vijishmadhavan/SkinDeep" + framework: FRAMEWORK_PYTORCH + number_of_stars: 703 + description: "Get Deinked!!" + } + repositories: { + url: "https://github.com/back8/github_vijishmadhavan_ArtLine" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/kynk94/TF2-Image-Generation" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 9 + description: "Tensorflow2 reimplementation of image generation model (GAN, Style Transfer, Image to Image Translation, etc)" + } + repositories: { + url: "https://github.com/rrrepsac/tb_vc" + framework: FRAMEWORK_PYTORCH + description: "telebot" + } + repositories: { + url: "https://github.com/vijishmadhavan/Toon-Me" + framework: FRAMEWORK_PYTORCH + number_of_stars: 315 + description: "A Deep Learning project to Toon Portrait Images" + } + repositories: { + url: "https://github.com/WalterJohnson0/tf-keras-implementation-of-Image-Style-transformation-network" + framework: FRAMEWORK_TENSORFLOW + description: "Computer Vision Final Project- implementation of Neural style transfer" + } + } + video: { + video_id: "OKDaGzeUz4U" + video_title: "PR-149: Perceptual Losses for Real-Time Style Transfer and Super-Resolution" + number_of_views: 2122 + published_date: { + seconds: 1552832996 + } + uploader: "Byung-Hak Kim" + } + } +} +pr_id_to_video: { + key: 150 + value: { + pr_id: 150 + papers: { + paper_id: "imagenet-trained-cnns-are-biased-towards" + title: "ImageNet-trained CNNs are biased towards texture; increasing shape bias improves accuracy and robustness" + arxiv_id: "1811.12231" + abstract: "Convolutional Neural Networks (CNNs) are commonly thought to recognise\nobjects by learning increasingly complex representations of object shapes. Some\nrecent studies suggest a more important role of image textures. We here put\nthese conflicting hypotheses to a quantitative test by evaluating CNNs and\nhuman observers on images with a texture-shape cue conflict. We show that\nImageNet-trained CNNs are strongly biased towards recognising textures rather\nthan shapes, which is in stark contrast to human behavioural evidence and\nreveals fundamentally different classification strategies. We then demonstrate\nthat the same standard architecture (ResNet-50) that learns a texture-based\nrepresentation on ImageNet is able to learn a shape-based representation\ninstead when trained on \"Stylized-ImageNet\", a stylized version of ImageNet.\nThis provides a much better fit for human behavioural performance in our\nwell-controlled psychophysical lab setting (nine experiments totalling 48,560\npsychophysical trials across 97 observers) and comes with a number of\nunexpected emergent benefits such as improved object detection performance and\npreviously unseen robustness towards a wide range of image distortions,\nhighlighting advantages of a shape-based representation." + pub_date: { + seconds: 1543449600 + } + authors: "Robert Geirhos" + authors: "Patricia Rubisch" + authors: "Claudio Michaelis" + authors: "Matthias Bethge" + authors: "Felix A. Wichmann" + authors: "Wieland Brendel" + repositories: { + url: "https://github.com/facebookresearch/augmentation-corruption" + framework: FRAMEWORK_PYTORCH + number_of_stars: 9 + description: "This repository provides code for \"On Interaction Between Augmentations and Corruptions in Natural Corruption Robustness\"." + } + repositories: { + url: "https://github.com/annstrange/breast-cancer-cnn" + framework: FRAMEWORK_TENSORFLOW + description: "Breast Cancer biopsy image analysis using CNN" + } + repositories: { + url: "https://github.com/LiYingwei/ShapeTextureDebiasedTraining" + framework: FRAMEWORK_PYTORCH + number_of_stars: 71 + description: "Code and models for the paper Shape-Texture Debiased Neural Network Training (ICLR 2021)" + } + repositories: { + is_official: true + url: "https://github.com/rgeirhos/texture-vs-shape" + framework: FRAMEWORK_PYTORCH + number_of_stars: 594 + description: "Pre-trained models, data, code & materials from the paper \"ImageNet-trained CNNs are biased towards texture; increasing shape bias improves accuracy and robustness\" (ICLR 2019 Oral)" + } + repositories: { + url: "https://github.com/mbuet2ner/local-global-features-cnn" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Code for my Master's Thesis: \"The Role of Local Versus Global Features in Convolutional Neural Networks\"" + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Residual Block" + full_name: "Residual Block" + description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." + } + methods: { + name: "Max Pooling" + full_name: "Max Pooling" + description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + methods: { + name: "Kaiming Initialization" + full_name: "Kaiming Initialization" + description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." + } + methods: { + name: "Batch Normalization" + full_name: "Batch Normalization" + description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." + } + methods: { + name: "Bottleneck Residual Block" + full_name: "Bottleneck Residual Block" + description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." + } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + } + video: { + video_id: "oBapZTL8LsE" + video_title: "PR-150: ImageNet-trained CNNs are Biased Towards Textures" + number_of_likes: 17 + number_of_views: 1113 + published_date: { + seconds: 1553435404 + } + uploader: "Taeoh Kim" + } + } +} +pr_id_to_video: { + key: 151 + value: { + pr_id: 151 + papers: { + paper_id: "the-unreasonable-effectiveness-of-deep" + title: "The Unreasonable Effectiveness of Deep Features as a Perceptual Metric" + arxiv_id: "1801.03924" + abstract: "While it is nearly effortless for humans to quickly assess the perceptual\nsimilarity between two images, the underlying processes are thought to be quite\ncomplex. Despite this, the most widely used perceptual metrics today, such as\nPSNR and SSIM, are simple, shallow functions, and fail to account for many\nnuances of human perception. Recently, the deep learning community has found\nthat features of the VGG network trained on ImageNet classification has been\nremarkably useful as a training loss for image synthesis. But how perceptual\nare these so-called \"perceptual losses\"? What elements are critical for their\nsuccess? To answer these questions, we introduce a new dataset of human\nperceptual similarity judgments. We systematically evaluate deep features\nacross different architectures and tasks and compare them with classic metrics.\nWe find that deep features outperform all previous metrics by large margins on\nour dataset. More surprisingly, this result is not restricted to\nImageNet-trained VGG features, but holds across different deep architectures\nand levels of supervision (supervised, self-supervised, or even unsupervised).\nOur results suggest that perceptual similarity is an emergent property shared\nacross deep visual representations." + pub_date: { + seconds: 1515628800 + } + authors: "Richard Zhang" + authors: "Phillip Isola" + authors: "Alexei A. Efros" + authors: "Eli Shechtman" + authors: "Oliver Wang" + repositories: { + url: "https://github.com/tding1/CDFI" + framework: FRAMEWORK_PYTORCH + number_of_stars: 53 + description: "Code of paper \"CDFI: Compression-Driven Network Design for Frame Interpolation\", CVPR 2021" + } + repositories: { + url: "https://github.com/RudreshVeerkhare/StyleGan" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + } + repositories: { + url: "https://github.com/cassava-math-ubb/experiments" + framework: FRAMEWORK_TENSORFLOW + description: "This repo contains our experimental approaches. " + } + repositories: { + url: "https://github.com/ak9250/stylegan-art" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 342 + description: "train stylegan through transfer learning" + } + repositories: { + url: "https://github.com/ayushgupta9198/stylegan" + framework: FRAMEWORK_TENSORFLOW + description: "The model is based for fake person creation based on stylegan technique. I have trained the model on my data set and generates the result basis of grids and seeds" + } + repositories: { + url: "https://github.com/isaacschaal/SG_training" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/stefkim/stylegan-batik" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/MrWednes/CopyNVlab" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/khurram702/StyleBasedGAN" + framework: FRAMEWORK_TENSORFLOW + description: "Style Base Architecture of Generator" + } + repositories: { + url: "https://github.com/ayushgupta9198/gan" + framework: FRAMEWORK_TENSORFLOW + description: "The model is based for fake person creation based on stylegan technique. I have trained the model on my data set and generates the result basis of grids and seeds" + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "VGG" + full_name: "VGG" + description: "**VGG** is a classical convolutional neural network architecture. It was based on an analysis of how to increase the depth of such networks. The network utilises small 3 x 3 filters. Otherwise the network is characterized by its simplicity: the only other components being pooling layers and a fully connected layer.\r\n\r\nImage: [Davi Frossard](https://www.cs.toronto.edu/frossard/post/vgg16/)" + } + methods: { + name: "Max Pooling" + full_name: "Max Pooling" + description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" + } + } + video: { + video_id: "VDeJFb5jt5M" + video_title: "PR-151: The Unreasonable Effectiveness of Deep Features as a Perceptual Metric" + number_of_likes: 6 + number_of_views: 716 + published_date: { + seconds: 1553438571 + } + uploader: "taesu" + } + } +} +pr_id_to_video: { + key: 152 + value: { + pr_id: 152 + papers: { + paper_id: "stargan-unified-generative-adversarial" + title: "StarGAN: Unified Generative Adversarial Networks for Multi-Domain Image-to-Image Translation" + arxiv_id: "1711.09020" + abstract: "Recent studies have shown remarkable success in image-to-image translation\nfor two domains. However, existing approaches have limited scalability and\nrobustness in handling more than two domains, since different models should be\nbuilt independently for every pair of image domains. To address this\nlimitation, we propose StarGAN, a novel and scalable approach that can perform\nimage-to-image translations for multiple domains using only a single model.\nSuch a unified model architecture of StarGAN allows simultaneous training of\nmultiple datasets with different domains within a single network. This leads to\nStarGAN's superior quality of translated images compared to existing models as\nwell as the novel capability of flexibly translating an input image to any\ndesired target domain. We empirically demonstrate the effectiveness of our\napproach on a facial attribute transfer and a facial expression synthesis\ntasks." + pub_date: { + seconds: 1511481600 + } + authors: "Yunjey Choi" + authors: "Minje Choi" + authors: "Munyoung Kim" + authors: "Jung-Woo Ha" + authors: "Sunghun Kim" + authors: "Jaegul Choo" + repositories: { + url: "https://github.com/Kal213/StarGAN-Tutorial-Tensorflow-2.3" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 6 + description: "Intuitive StarGAN Code written in Tensorflow 2.3" + } + repositories: { + url: "https://github.com/MACderRu/StarGan_pytorch" + framework: FRAMEWORK_PYTORCH + description: "My implementation of StarGan paper" + } + repositories: { + url: "https://github.com/Masao-Taketani/StarGAN-tf2" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "TensorFlow 2 Implementation of \"StarGAN: Unified Generative Adversarial Networks for Multi-Domain Image-to-Image Translation\"." + } + repositories: { + url: "https://github.com/hello-world-cc/starGANv1-Pytorch" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/shaominghe/stargan_adience" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/stevebong31/stargan" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/nguyen-nhat-anh/Star-GAN" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/shridhivyah/starGAN" + framework: FRAMEWORK_TENSORFLOW + description: "FaceAttributeChange_StarGAN" + } + repositories: { + url: "https://github.com/aditiasthana1004/StarGAN" + framework: FRAMEWORK_OTHERS + description: "StarGAN" + } + repositories: { + url: "https://github.com/MasterXiYu/stargan_mul" + framework: FRAMEWORK_PYTORCH + description: "face_for_stargan" + } + } + video: { + video_id: "i3-rTEFpyv0" + video_title: "PR-152:StarGAN: Unified Generative Adversarial Networks for Multi-Domain Image-to-Image Translation" + number_of_likes: 21 + number_of_views: 1592 + published_date: { + seconds: 1554040628 + } + uploader: "Jinsung Yoon" + } + } +} +pr_id_to_video: { + key: 153 + value: { + pr_id: 153 + papers: { + paper_id: "a-simple-neural-attentive-meta-learner" + title: "A Simple Neural Attentive Meta-Learner" + arxiv_id: "1707.03141" + abstract: "Deep neural networks excel in regimes with large amounts of data, but tend to\nstruggle when data is scarce or when they need to adapt quickly to changes in\nthe task. In response, recent work in meta-learning proposes training a\nmeta-learner on a distribution of similar tasks, in the hopes of generalization\nto novel but related tasks by learning a high-level strategy that captures the\nessence of the problem it is asked to solve. However, many recent meta-learning\napproaches are extensively hand-designed, either using architectures\nspecialized to a particular application, or hard-coding algorithmic components\nthat constrain how the meta-learner solves the task. We propose a class of\nsimple and generic meta-learner architectures that use a novel combination of\ntemporal convolutions and soft attention; the former to aggregate information\nfrom past experience and the latter to pinpoint specific pieces of information.\nIn the most extensive set of meta-learning experiments to date, we evaluate the\nresulting Simple Neural AttentIve Learner (or SNAIL) on several\nheavily-benchmarked tasks. On all tasks, in both supervised and reinforcement\nlearning, SNAIL attains state-of-the-art performance by significant margins." + pub_date: { + seconds: 1499731200 + } + authors: "Nikhil Mishra" + authors: "Mostafa Rohaninejad" + authors: "Xi Chen" + authors: "Pieter Abbeel" + repositories: { + url: "https://github.com/seujung/SNAIL-gluon" + framework: FRAMEWORK_OTHERS + number_of_stars: 10 + description: "Implementation of SNAIL(A Simple Neural Attentive Meta-Learner) with Gluon" + } + repositories: { + url: "https://github.com/eambutu/snail-pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 117 + description: "Implementation of \"A Simple Neural Attentive Meta-Learner\" (SNAIL, https://arxiv.org/pdf/1707.03141.pdf) in PyTorch" + } + repositories: { + url: "https://github.com/Michedev/snail" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + description: "Pytorch implementation of SNAIL (Simple Neural Attentive Meta-Learner)" + } + methods: { + name: "Scaled Dot-Product Attention" + full_name: "Scaled Dot-Product Attention" + description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." + } + methods: { + name: "Dilated Causal Convolution" + full_name: "Dilated Causal Convolution" + description: "A **Dilated Causal Convolution** is a causal convolution where the filter is applied over an area larger than its length by skipping input values with a certain step. A dilated causal convolution effectively allows the network to have very large receptive fields with just a few layers." + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "SNAIL" + full_name: "Simple Neural Attention Meta-Learner" + description: "The **Simple Neural Attention Meta-Learner**, or **SNAIL**, combines the benefits of temporal convolutions and attention to solve meta-learning tasks. They introduce positional dependence through temporal convolutions to make the model applicable to reinforcement tasks - where the observations, actions, and rewards are intrinsically sequential. They also introduce attention in order to provide pinpoint access over an infinitely large context. SNAIL is constructing by combining the two: we use temporal convolutions to produce the context over which we use a causal attention operation." + } + } + video: { + video_id: "zGrwpa5-_0Y" + video_title: "PR-153: SNAIL: A Simple Neural Attentive Meta-Learner" + number_of_likes: 11 + number_of_views: 923 + published_date: { + seconds: 1554043097 + } + uploader: "Taekmin Kim" + } + } +} +pr_id_to_video: { + key: 154 + value: { + pr_id: 154 + papers: { + paper_id: "semantic-image-synthesis-with-spatially" + title: "Semantic Image Synthesis with Spatially-Adaptive Normalization" + arxiv_id: "1903.07291" + abstract: "We propose spatially-adaptive normalization, a simple but effective layer for synthesizing photorealistic images given an input semantic layout. Previous methods directly feed the semantic layout as input to the deep network, which is then processed through stacks of convolution, normalization, and nonlinearity layers. We show that this is suboptimal as the normalization layers tend to ``wash away'' semantic information. To address the issue, we propose using the input layout for modulating the activations in normalization layers through a spatially-adaptive, learned transformation. Experiments on several challenging datasets demonstrate the advantage of the proposed method over existing approaches, regarding both visual fidelity and alignment with input layouts. Finally, our model allows user control over both semantic and style. Code is available at https://github.com/NVlabs/SPADE ." + pub_date: { + seconds: 1552867200 + } + authors: "Taesung Park" + authors: "Ming-Yu Liu" + authors: "Ting-Chun Wang" + authors: "Jun-Yan Zhu" + repositories: { + url: "https://github.com/KushajveerSingh/SPADE-PyTorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 22 + description: "PyTorch unofficial implementation of Semantic Image Synthesis with Spatially-Adaptive Normalization paper by Nvidia Research" + } + repositories: { + url: "https://github.com/PacktPublishing/Hands-On-Image-Generation-with-TensorFlow-2.0/tree/master/Chapter06" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 62 + description: "Hands-On Image Generation with TensorFlow 2.0, published by Packt" + } + repositories: { + url: "https://github.com/GrahamRigby/GauGanPlus" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/AhmedAmraniAkdi/BudgetNvidiaGaugan" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/LoganOneal/neuralpaint-server" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/noyoshi/hacksc" + framework: FRAMEWORK_PYTORCH + number_of_stars: 195 + description: "🖌 photorealistic drawings from simple sketches using NVIDIA's GauGAN " + } + repositories: { + url: "https://github.com/noyoshi/smart-sketch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 195 + description: "🖌 photorealistic drawings from simple sketches using NVIDIA's GauGAN " + } + repositories: { + url: "https://github.com/taki0112/SPADE-Tensorflow" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 312 + description: "Simple Tensorflow implementation of \"Semantic Image Synthesis with Spatially-Adaptive Normalization\" a.k.a. GauGAN, SPADE (CVPR 2019 Oral)" + } + repositories: { + url: "https://github.com/Dominioncher/smart-sketch" + framework: FRAMEWORK_PYTORCH + description: "NVidia netural network for sketches" + } + repositories: { + url: "https://github.com/manicman1999/StyleGAN-Keras" + framework: FRAMEWORK_OTHERS + number_of_stars: 160 + description: "StyleGAN made with Keras" + } + } + video: { + video_id: "1nJf35TSYtE" + video_title: "PR-154: Semantic Image Synthesis with Spatially-Adaptive Normalization" + number_of_likes: 19 + number_of_views: 1468 + published_date: { + seconds: 1554651283 + } + uploader: "정지원" + } + } +} +pr_id_to_video: { + key: 155 + value: { + pr_id: 155 + papers: { + paper_id: "exploring-randomly-wired-neural-networks-for" + title: "Exploring Randomly Wired Neural Networks for Image Recognition" + arxiv_id: "1904.01569" + abstract: "Neural networks for image recognition have evolved through extensive manual\ndesign from simple chain-like models to structures with multiple wiring paths.\nThe success of ResNets and DenseNets is due in large part to their innovative\nwiring plans. Now, neural architecture search (NAS) studies are exploring the\njoint optimization of wiring and operation types, however, the space of\npossible wirings is constrained and still driven by manual design despite being\nsearched. In this paper, we explore a more diverse set of connectivity patterns\nthrough the lens of randomly wired neural networks. To do this, we first define\nthe concept of a stochastic network generator that encapsulates the entire\nnetwork generation process. Encapsulation provides a unified view of NAS and\nrandomly wired networks. Then, we use three classical random graph models to\ngenerate randomly wired graphs for networks. The results are surprising:\nseveral variants of these random generators yield network instances that have\ncompetitive accuracy on the ImageNet benchmark. These results suggest that new\nefforts focusing on designing better network generators may lead to new\nbreakthroughs by exploring less constrained search spaces with more room for\nnovel design." + pub_date: { + seconds: 1554163200 + } + authors: "Saining Xie" + authors: "Alexander Kirillov" + authors: "Ross Girshick" + authors: "Kaiming He" + repositories: { + url: "https://github.com/JihaoLee/Randomly_Wired_reproducibility" + framework: FRAMEWORK_PYTORCH + description: "This is a reimplementation of Exploring Randomly Wired Neural Networks for Image Recognition" + } + repositories: { + url: "https://github.com/wolszhang/randWireNN" + framework: FRAMEWORK_OTHERS + description: "compare different randomly wired neural network" + } + repositories: { + url: "https://github.com/swdsld/RandWire_tensorflow" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 29 + description: "tensorflow implementation of Exploring Randomly Wired Neural Networks for Image Recognition" + } + repositories: { + url: "https://github.com/leaderj1001/RandWireNN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 77 + description: "Implementing Randomly Wired Neural Networks for Image Recognition, Using CIFAR-10 dataset, CIFAR-100 dataset" + } + repositories: { + url: "https://github.com/timctho/random-wired-nn-tensorflow" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 8 + description: "Tensorflow implementation of \"Exploring Randomly Wired Neural Networks for Image Recognition\"" + } + repositories: { + url: "https://github.com/seungwonpark/RandWireNN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 676 + description: "Implementation of: \"Exploring Randomly Wired Neural Networks for Image Recognition\"" + } + repositories: { + url: "https://github.com/hebo1221/RandWireNN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 10 + description: "Unofficial Implementation of: \"Exploring Randomly Wired Neural Networks for Image Recognition\"" + } + repositories: { + url: "https://github.com/AbdouJaouhar/Exploring-Randomly-Wired-Neural-Networks-for-Image-Recognition" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Neural Architechture Search based on https://arxiv.org/abs/1904.01569" + } + repositories: { + url: "https://github.com/facebookresearch/pycls" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1597 + description: "Codebase for Image Classification Research, written in PyTorch." + } + repositories: { + url: "https://github.com/JiaminRen/RandWireNN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 270 + description: "Pytorch Implementation of: \"Exploring Randomly Wired Neural Networks for Image Recognition\"" + } + methods: { + name: "Weight Decay" + full_name: "Weight Decay" + description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + methods: { + name: "Random Resized Crop" + full_name: "Random Resized Crop" + description: "**RandomResizedCrop** is a type of image data augmentation where a crop of random size of the original size and a random aspect ratio of the original aspect ratio is made. This crop is finally resized to given size.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" + } + methods: { + name: "Batch Normalization" + full_name: "Batch Normalization" + description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." + } + methods: { + name: "SGD with Momentum" + full_name: "SGD with Momentum" + description: "**SGD with Momentum** is a stochastic optimization method that adds a momentum term to regular stochastic gradient descent:\r\n\r\n$$v\\_{t} = \\gamma{v}\\_{t-1} + \\eta\\nabla\\_{\\theta}J\\left(\\theta\\right)$$\r\n$$\\theta\\_{t} = \\theta\\_{t-1} - v\\_{t} $$\r\n\r\nA typical value for $\\gamma$ is $0.9$. The momentum name comes from an analogy to physics, such as ball accelerating down a slope. In the case of weight updates, we can think of the weights as a particle traveling through parameter space which incurs acceleration from the gradient of the loss.\r\n\r\nImage Source: [Juan Du](https://www.researchgate.net/figure/The-compare-of-the-SGD-algorithms-with-and-without-momentum-Take-Task-1-as-example-The_fig1_333469047)" + } + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Cosine Annealing" + full_name: "Cosine Annealing" + description: "**Cosine Annealing** is a type of learning rate schedule that has the effect of starting with a large learning rate that is relatively rapidly decreased to a minimum value before being increased rapidly again. The resetting of the learning rate acts like a simulated restart of the learning process and the re-use of good weights as the starting point of the restart is referred to as a \"warm restart\" in contrast to a \"cold restart\" where a new set of small random numbers may be used as a starting point.\r\n\r\n$$\\eta\\_{t} = \\eta\\_{min}^{i} + \\frac{1}{2}\\left(\\eta\\_{max}^{i}-\\eta\\_{min}^{i}\\right)\\left(1+\\cos\\left(\\frac{T\\_{cur}}{T\\_{i}}\\pi\\right)\\right)\r\n$$\r\n\r\nWhere where $\\eta\\_{min}^{i}$ and $ \\eta\\_{max}^{i}$ are ranges for the learning rate, and $T\\_{cur}$ account for how many epochs have been performed since the last restart.\r\n\r\nText Source: [Jason Brownlee](https://machinelearningmastery.com/snapshot-ensemble-deep-learning-neural-network/)\r\n\r\nImage Source: [Gao Huang](https://www.researchgate.net/figure/Training-loss-of-100-layer-DenseNet-on-CIFAR10-using-standard-learning-rate-blue-and-M_fig2_315765130)" + } + methods: { + name: "Label Smoothing" + full_name: "Label Smoothing" + description: "**Label Smoothing** is a regularization technique that introduces noise for the labels. This accounts for the fact that datasets may have mistakes in them, so maximizing the likelihood of $\\log{p}\\left(y\\mid{x}\\right)$ directly can be harmful. Assume for a small constant $\\epsilon$, the training set label $y$ is correct with probability $1-\\epsilon$ and incorrect otherwise. Label Smoothing regularizes a model based on a softmax with $k$ output values by replacing the hard $0$ and $1$ classification targets with targets of $\\frac{\\epsilon}{k-1}$ and $1-\\epsilon$ respectively.\r\n\r\nSource: Deep Learning, Goodfellow et al\r\n\r\nImage Source: [When Does Label Smoothing Help?](https://arxiv.org/abs/1906.02629)" + } + } + video: { + video_id: "qnGm1h365tc" + video_title: "PR-155: Exploring Randomly Wired Neural Networks for Image Recognition" + number_of_likes: 92 + number_of_views: 4138 + published_date: { + seconds: 1554649684 + } + uploader: "JinWon Lee" + } + } +} +pr_id_to_video: { + key: 156 + value: { + pr_id: 156 + papers: { + paper_id: "channelnets-compact-and-efficient" + title: "ChannelNets: Compact and Efficient Convolutional Neural Networks via Channel-Wise Convolutions" + arxiv_id: "1809.01330" + abstract: "Convolutional neural networks (CNNs) have shown great capability of solving\nvarious artificial intelligence tasks. However, the increasing model size has\nraised challenges in employing them in resource-limited applications. In this\nwork, we propose to compress deep models by using channel-wise convolutions,\nwhich re- place dense connections among feature maps with sparse ones in CNNs.\nBased on this novel operation, we build light-weight CNNs known as ChannelNets.\nChannel- Nets use three instances of channel-wise convolutions; namely group\nchannel-wise convolutions, depth-wise separable channel-wise convolutions, and\nthe convolu- tional classification layer. Compared to prior CNNs designed for\nmobile devices, ChannelNets achieve a significant reduction in terms of the\nnumber of parameters and computational cost without loss in accuracy. Notably,\nour work represents the first attempt to compress the fully-connected\nclassification layer, which usually accounts for about 25% of total parameters\nin compact CNNs. Experimental results on the ImageNet dataset demonstrate that\nChannelNets achieve consistently better performance compared to prior methods." + pub_date: { + seconds: 1536105600 + } + authors: "Hongyang Gao" + authors: "Zhengyang Wang" + authors: "Shuiwang Ji" + repositories: { + url: "https://github.com/osmr/imgclsmob" + framework: FRAMEWORK_OTHERS + number_of_stars: 2202 + description: "Sandbox for training deep learning networks" + } + repositories: { + url: "https://github.com/HongyangGao/ChannelNets" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 77 + description: "Tensorflow Implementation of ChannelNets (NeurIPS 18)" + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + } + video: { + video_id: "oZbKWOBfNhk" + video_title: "PR-156: ChannelNets: Compact and Efficient CNN via Channel-Wise Convolutions" + number_of_likes: 1 + number_of_views: 222 + published_date: { + seconds: 1565744830 + } + uploader: "Young Seok Kim" + } + } +} +pr_id_to_video: { + key: 157 + value: { + pr_id: 157 + papers: { + paper_id: "fluid-annotation-a-human-machine" + title: "Fluid Annotation: A Human-Machine Collaboration Interface for Full Image Annotation" + arxiv_id: "1806.07527" + abstract: "We introduce Fluid Annotation, an intuitive human-machine collaboration\ninterface for annotating the class label and outline of every object and\nbackground region in an image. Fluid annotation is based on three principles:\n(I) Strong Machine-Learning aid. We start from the output of a strong neural\nnetwork model, which the annotator can edit by correcting the labels of\nexisting regions, adding new regions to cover missing objects, and removing\nincorrect regions. The edit operations are also assisted by the model. (II)\nFull image annotation in a single pass. As opposed to performing a series of\nsmall annotation tasks in isolation, we propose a unified interface for full\nimage annotation in a single pass. (III) Empower the annotator. We empower the\nannotator to choose what to annotate and in which order. This enables\nconcentrating on what the machine does not already know, i.e. putting human\neffort only on the errors it made. This helps using the annotation budget\neffectively. Through extensive experiments on the COCO+Stuff dataset, we\ndemonstrate that Fluid Annotation leads to accurate annotations very\nefficiently, taking three times less annotation time than the popular LabelMe\ninterface." + pub_date: { + seconds: 1529452800 + } + authors: "Mykhaylo Andriluka" + authors: "Jasper R. R. Uijlings" + authors: "Vittorio Ferrari" + } + video: { + video_id: "JbXdn44myP4" + video_title: "PR-157: Best of both worlds: human-machine collaboration for object annotation" + number_of_views: 349 + published_date: { + seconds: 1556532811 + } + uploader: "visionNoob" + } + } +} +pr_id_to_video: { + key: 158 + value: { + pr_id: 158 + papers: { + paper_id: "fots-fast-oriented-text-spotting-with-a" + title: "FOTS: Fast Oriented Text Spotting with a Unified Network" + arxiv_id: "1801.01671" + abstract: "Incidental scene text spotting is considered one of the most difficult and\nvaluable challenges in the document analysis community. Most existing methods\ntreat text detection and recognition as separate tasks. In this work, we\npropose a unified end-to-end trainable Fast Oriented Text Spotting (FOTS)\nnetwork for simultaneous detection and recognition, sharing computation and\nvisual information among the two complementary tasks. Specially, RoIRotate is\nintroduced to share convolutional features between detection and recognition.\nBenefiting from convolution sharing strategy, our FOTS has little computation\noverhead compared to baseline text detection network, and the joint training\nmethod learns more generic features to make our method perform better than\nthese two-stage methods. Experiments on ICDAR 2015, ICDAR 2017 MLT, and ICDAR\n2013 datasets demonstrate that the proposed method outperforms state-of-the-art\nmethods significantly, which further allows us to develop the first real-time\noriented text spotting system which surpasses all previous state-of-the-art\nresults by more than 5% on ICDAR 2015 text spotting task while keeping 22.6\nfps." + pub_date: { + seconds: 1515110400 + } + authors: "Xuebo Liu" + authors: "Ding Liang" + authors: "Shi Yan" + authors: "Dagui Chen" + authors: "Yu Qiao" + authors: "Junjie Yan" + repositories: { + url: "https://github.com/ArashJavan/FOTS" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + } + repositories: { + url: "https://github.com/Kaushal28/FOTS-PyTorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + description: "PyTorch Implementation of Fast Oriented Text Spotting (FOTS)" + } + repositories: { + url: "https://github.com/xieyufei1993/FOTS" + framework: FRAMEWORK_PYTORCH + number_of_stars: 171 + description: "An Implementation of the FOTS: Fast Oriented Text Spotting with a Unified Network" + } + repositories: { + url: "https://github.com/Pay20Y/FOTS_TF" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 176 + description: "This an implementation of FOTS with tensorflow" + } + repositories: { + url: "https://github.com/yu20103983/FOTS" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 88 + description: "FOTS tensorflow implementation, Include train and test, EAST+Rotate+CRNN. FOTS: Fast Oriented Text Spotting with a Unified Network" + } + repositories: { + url: "https://github.com/jiangxiluning/FOTS.PyTorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 534 + description: "FOTS Pytorch Implementation" + } + repositories: { + url: "https://github.com/Masao-Taketani/FOTS_OCR" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 41 + description: "TensorFlow Implementation of FOTS, Fast Oriented Text Spotting with a Unified Network." + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + } + video: { + video_id: "hOFViMbYnrs" + video_title: "PR-158: FOTS: Fast Oriented Text Spotting with a Unified Network" + number_of_likes: 25 + number_of_views: 992 + published_date: { + seconds: 1556529052 + } + uploader: "Soyeon Kim" + } + } +} +pr_id_to_video: { + key: 159 + value: { + pr_id: 159 + papers: { + paper_id: "synergistic-image-and-feature-adaptation" + title: "Synergistic Image and Feature Adaptation: Towards Cross-Modality Domain Adaptation for Medical Image Segmentation" + arxiv_id: "1901.08211" + abstract: "This paper presents a novel unsupervised domain adaptation framework, called Synergistic Image and Feature Adaptation (SIFA), to effectively tackle the problem of domain shift. Domain adaptation has become an important and hot topic in recent studies on deep learning, aiming to recover performance degradation when applying the neural networks to new testing domains. Our proposed SIFA is an elegant learning diagram which presents synergistic fusion of adaptations from both image and feature perspectives. In particular, we simultaneously transform the appearance of images across domains and enhance domain-invariance of the extracted features towards the segmentation task. The feature encoder layers are shared by both perspectives to grasp their mutual benefits during the end-to-end learning procedure. Without using any annotation from the target domain, the learning of our unified model is guided by adversarial losses, with multiple discriminators employed from various aspects. We have extensively validated our method with a challenging application of cross-modality medical image segmentation of cardiac structures. Experimental results demonstrate that our SIFA model recovers the degraded performance from 17.2% to 73.0%, and outperforms the state-of-the-art methods by a significant margin." + pub_date: { + seconds: 1548288000 + } + authors: "Cheng Chen" + authors: "Qi Dou" + authors: "Hao Chen" + authors: "Jing Qin" + authors: "Pheng-Ann Heng" + repositories: { + is_official: true + url: "https://github.com/cchen-cc/SIFA" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 153 + } + } + video: { + video_id: "sR7hBJGpwQo" + video_title: "PR-159: SIFA: Towards Cross- Modality Domain Adaptation for Medical Image Segmentation" + number_of_likes: 10 + number_of_views: 476 + published_date: { + seconds: 1557132628 + } + uploader: "Sunghoon Joo" + } + } +} +pr_id_to_video: { + key: 160 + value: { + pr_id: 160 + papers: { + paper_id: "glomo-unsupervisedly-learned-relational" + title: "GLoMo: Unsupervisedly Learned Relational Graphs as Transferable Representations" + arxiv_id: "1806.05662" + abstract: "Modern deep transfer learning approaches have mainly focused on learning\ngeneric feature vectors from one task that are transferable to other tasks,\nsuch as word embeddings in language and pretrained convolutional features in\nvision. However, these approaches usually transfer unary features and largely\nignore more structured graphical representations. This work explores the\npossibility of learning generic latent relational graphs that capture\ndependencies between pairs of data units (e.g., words or pixels) from\nlarge-scale unlabeled data and transferring the graphs to downstream tasks. Our\nproposed transfer learning framework improves performance on various tasks\nincluding question answering, natural language inference, sentiment analysis,\nand image classification. We also show that the learned graphs are generic\nenough to be transferred to different embeddings on which the graphs have not\nbeen trained (including GloVe embeddings, ELMo embeddings, and task-specific\nRNN hidden unit), or embedding-free units such as image pixels." + pub_date: { + seconds: 1528934400 + } + authors: "Zhilin Yang" + authors: "Jake Zhao" + authors: "Bhuwan Dhingra" + authors: "Kaiming He" + authors: "William W. Cohen" + authors: "Ruslan Salakhutdinov" + authors: "Yann LeCun" + repositories: { + url: "https://github.com/YJHMITWEB/GLoMo-tensorflow" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 14 + description: "This is a tensorflow implementation of 2018 NIPS paper: [GLoMo: Unsupervisedly Learned Relational Graphs as Transferable Representations.]" + } + methods: { + name: "ELMo" + full_name: "ELMo" + description: "**Embeddings from Language Models**, or **ELMo**, is a type of deep contextualized word representation that models both (1) complex characteristics of word use (e.g., syntax and semantics), and (2) how these uses vary across linguistic contexts (i.e., to model polysemy). Word vectors are learned functions of the internal states of a deep bidirectional language model (biLM), which is pre-trained on a large text corpus.\r\n\r\nA biLM combines both a forward and backward LM. ELMo jointly maximizes the log likelihood of the forward and backward directions. To add ELMo to a supervised model, we freeze the weights of the biLM and then concatenate the ELMo vector $\\textbf{ELMO}^{task}_k$ with $\\textbf{x}_k$ and pass the ELMO enhanced representation $[\\textbf{x}_k; \\textbf{ELMO}^{task}_k]$ into the task RNN. Here $\\textbf{x}_k$ is a context-independent token representation for each token position. \r\n\r\nImage Source: [here](https://medium.com/@duyanhnguyen_38925/create-a-strong-text-classification-with-the-help-from-elmo-e90809ba29da)" + } + methods: { + name: "Tanh Activation" + full_name: "Tanh Activation" + description: "**Tanh Activation** is an activation function used for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$\r\n\r\nHistorically, the tanh function became preferred over the [sigmoid function](https://paperswithcode.com/method/sigmoid-activation) as it gave better performance for multi-layer neural networks. But it did not solve the vanishing gradient problem that sigmoids suffered, which was tackled more effectively with the introduction of [ReLU](https://paperswithcode.com/method/relu) activations.\r\n\r\nImage Source: [Junxi Feng](https://www.researchgate.net/profile/Junxi_Feng)" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "GloVe" + full_name: "GloVe Embeddings" + description: "**GloVe Embeddings** are a type of word embedding that encode the co-occurrence probability ratio between two words as vector differences. GloVe uses a weighted least squares objective $J$ that minimizes the difference between the dot product of the vectors of two words and the logarithm of their number of co-occurrences:\r\n\r\n$$ J=\\sum\\_{i, j=1}^{V}f\\left(𝑋\\_{i j}\\right)(w^{T}\\_{i}\\tilde{w}_{j} + b\\_{i} + \\tilde{b}\\_{j} - \\log{𝑋}\\_{ij})^{2} $$\r\n\r\nwhere $w\\_{i}$ and $b\\_{i}$ are the word vector and bias respectively of word $i$, $\\tilde{w}_{j}$ and $b\\_{j}$ are the context word vector and bias respectively of word $k$, $X\\_{ij}$ is the number of times word $i$ occurs in the context of word $j$, and $f$ is a weighting function that assigns lower weights to rare and frequent co-occurrences." + } + methods: { + name: "BiLSTM" + full_name: "Bidirectional LSTM" + description: "A **Bidirectional LSTM**, or **biLSTM**, is a sequence processing model that consists of two LSTMs: one taking the input in a forward direction, and the other in a backwards direction. BiLSTMs effectively increase the amount of information available to the network, improving the context available to the algorithm (e.g. knowing what words immediately follow *and* precede a word in a sentence).\r\n\r\nImage Source: Modelling Radiological Language with Bidirectional Long Short-Term Memory Networks, Cornegruta et al" + } + methods: { + name: "LSTM" + full_name: "Long Short-Term Memory" + description: "An **LSTM** is a type of [recurrent neural network](https://paperswithcode.com/methods/category/recurrent-neural-networks) that addresses the vanishing gradient problem in vanilla RNNs through additional cells, input and output gates. Intuitively, vanishing gradients are solved through additional *additive* components, and forget gate activations, that allow the gradients to flow through the network without vanishing as quickly.\r\n\r\n(Image Source [here](https://medium.com/datadriveninvestor/how-do-lstm-networks-solve-the-problem-of-vanishing-gradients-a6784971a577))\r\n\r\n(Introduced by Hochreiter and Schmidhuber)" + } + methods: { + name: "Sigmoid Activation" + full_name: "Sigmoid Activation" + description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." + } + } + video: { + video_id: "V9KusGzsx10" + video_title: "PR-160: GLoMo Unsupervised Learning of Transferable Relational Graph" + number_of_likes: 13 + number_of_views: 623 + published_date: { + seconds: 1557076158 + } + uploader: "Doyup Lee" + } + } +} +pr_id_to_video: { + key: 161 + value: { + pr_id: 161 + papers: { + paper_id: "transformer-xl-attentive-language-models" + title: "Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context" + arxiv_id: "1901.02860" + abstract: "Transformers have a potential of learning longer-term dependency, but are limited by a fixed-length context in the setting of language modeling. We propose a novel neural architecture Transformer-XL that enables learning dependency beyond a fixed length without disrupting temporal coherence. It consists of a segment-level recurrence mechanism and a novel positional encoding scheme. Our method not only enables capturing longer-term dependency, but also resolves the context fragmentation problem. As a result, Transformer-XL learns dependency that is 80% longer than RNNs and 450% longer than vanilla Transformers, achieves better performance on both short and long sequences, and is up to 1,800+ times faster than vanilla Transformers during evaluation. Notably, we improve the state-of-the-art results of bpc/perplexity to 0.99 on enwiki8, 1.08 on text8, 18.3 on WikiText-103, 21.8 on One Billion Word, and 54.5 on Penn Treebank (without finetuning). When trained only on WikiText-103, Transformer-XL manages to generate reasonably coherent, novel text articles with thousands of tokens. Our code, pretrained models, and hyperparameters are available in both Tensorflow and PyTorch." + pub_date: { + seconds: 1546992000 + } + authors: "Zihang Dai" + authors: "Zhilin Yang" + authors: "Yiming Yang" + authors: "Jaime Carbonell" + authors: "Quoc V. Le" + authors: "Ruslan Salakhutdinov" + repositories: { + url: "https://github.com/PaddlePaddle/PaddleNLP/tree/develop/examples/language_model/transformer-xl" + framework: FRAMEWORK_OTHERS + number_of_stars: 1379 + description: "An NLP library with Awesome pre-trained Transformer models and easy-to-use interface, supporting wide-range of NLP tasks from research to industrial applications." + } + repositories: { + url: "https://github.com/huggingface/transformers" + framework: FRAMEWORK_PYTORCH + number_of_stars: 47629 + description: "🤗Transformers: State-of-the-art Natural Language Processing for Pytorch, TensorFlow, and JAX." + } + repositories: { + url: "https://github.com/facebookresearch/code-prediction-transformer" + framework: FRAMEWORK_PYTORCH + number_of_stars: 62 + description: "This repo will contain replication package for the paper \"Feeding Trees to Transformers for Code Completion\"" + } + repositories: { + url: "https://github.com/lab-ml/nn/tree/master/labml_nn/transformers/xl" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3069 + description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." + } + repositories: { + url: "https://github.com/Jmkernes/PAR-Transformer-XL" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 3 + description: "An implementation of the Pay Attention when Required transformer: https://arxiv.org/pdf/2009.04534.pdf" + } + repositories: { + url: "https://github.com/sooftware/conformer" + framework: FRAMEWORK_PYTORCH + number_of_stars: 123 + description: "PyTorch implementation of \"Conformer: Convolution-augmented Transformer for Speech Recognition\" (INTERSPEECH 2020)" + } + repositories: { + url: "https://github.com/sooftware/nlp-attentions" + framework: FRAMEWORK_PYTORCH + number_of_stars: 51 + description: "PyTorch implementation of some attentions for Deep Learning Researchers. " + } + repositories: { + url: "https://github.com/sooftware/Attention-Implementation" + framework: FRAMEWORK_PYTORCH + number_of_stars: 51 + description: "PyTorch implementation of some attentions for Deep Learning Researchers. " + } + repositories: { + url: "https://github.com/sh951011/Attention-Implementation" + framework: FRAMEWORK_PYTORCH + number_of_stars: 51 + description: "PyTorch implementation of some attentions for Deep Learning Researchers. " + } + repositories: { + url: "https://github.com/cedrickchee/pytorch-pretrained-BERT" + framework: FRAMEWORK_PYTORCH + number_of_stars: 8 + description: "PyTorch version of Google AI's BERT model with script to load Google's pre-trained models" + } + methods: { + name: "Variational Dropout" + full_name: "Variational Dropout" + description: "**Variational Dropout** is a regularization technique based on [dropout](https://paperswithcode.com/method/dropout), but uses a variational inference grounded approach. In Variational Dropout, we repeat the same dropout mask at each time step for both inputs, outputs, and recurrent layers (drop the same network units at each time step). This is in contrast to ordinary Dropout where different dropout masks are sampled at each time step for the inputs and outputs alone." + } + methods: { + name: "Adaptive Input Representations" + full_name: "Adaptive Input Representations" + description: "**Adaptive Input Embeddings** extend the adaptive softmax to input word representations. The factorization assigns more capacity to frequent words and reduces the capacity for less frequent words with the benefit of reducing overfitting to rare words." + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Cosine Annealing" + full_name: "Cosine Annealing" + description: "**Cosine Annealing** is a type of learning rate schedule that has the effect of starting with a large learning rate that is relatively rapidly decreased to a minimum value before being increased rapidly again. The resetting of the learning rate acts like a simulated restart of the learning process and the re-use of good weights as the starting point of the restart is referred to as a \"warm restart\" in contrast to a \"cold restart\" where a new set of small random numbers may be used as a starting point.\r\n\r\n$$\\eta\\_{t} = \\eta\\_{min}^{i} + \\frac{1}{2}\\left(\\eta\\_{max}^{i}-\\eta\\_{min}^{i}\\right)\\left(1+\\cos\\left(\\frac{T\\_{cur}}{T\\_{i}}\\pi\\right)\\right)\r\n$$\r\n\r\nWhere where $\\eta\\_{min}^{i}$ and $ \\eta\\_{max}^{i}$ are ranges for the learning rate, and $T\\_{cur}$ account for how many epochs have been performed since the last restart.\r\n\r\nText Source: [Jason Brownlee](https://machinelearningmastery.com/snapshot-ensemble-deep-learning-neural-network/)\r\n\r\nImage Source: [Gao Huang](https://www.researchgate.net/figure/Training-loss-of-100-layer-DenseNet-on-CIFAR10-using-standard-learning-rate-blue-and-M_fig2_315765130)" + } + methods: { + name: "Multi-Head Attention" + full_name: "Multi-Head Attention" + description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" + } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "Adaptive Softmax" + full_name: "Adaptive Softmax" + description: "**Adaptive Softmax** is a speedup technique for the computation of probability distributions over words. The adaptive softmax is inspired by the class-based hierarchical softmax, where the word classes are built to minimize the computation time. Adaptive softmax achieves efficiency by explicitly taking into account the computation time of matrix-multiplication on parallel systems and combining it with a few important observations, namely keeping a shortlist of frequent words in the root node\r\nand reducing the capacity of rare words." + } + methods: { + name: "Layer Normalization" + full_name: "Layer Normalization" + description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." + } + methods: { + name: "Scaled Dot-Product Attention" + full_name: "Scaled Dot-Product Attention" + description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + } + video: { + video_id: "lSTljZy8ag4" + video_title: "PR-161: Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context" + number_of_likes: 36 + number_of_views: 2218 + published_date: { + seconds: 1557744220 + } + uploader: "박성남" + } + } +} +pr_id_to_video: { + key: 162 + value: { + pr_id: 162 + papers: { + paper_id: "deeppermnet-visual-permutation-learning" + title: "DeepPermNet: Visual Permutation Learning" + arxiv_id: "1704.02729" + abstract: "We present a principled approach to uncover the structure of visual data by\nsolving a novel deep learning task coined visual permutation learning. The goal\nof this task is to find the permutation that recovers the structure of data\nfrom shuffled versions of it. In the case of natural images, this task boils\ndown to recovering the original image from patches shuffled by an unknown\npermutation matrix. Unfortunately, permutation matrices are discrete, thereby\nposing difficulties for gradient-based methods. To this end, we resort to a\ncontinuous approximation of these matrices using doubly-stochastic matrices\nwhich we generate from standard CNN predictions using Sinkhorn iterations.\nUnrolling these iterations in a Sinkhorn network layer, we propose DeepPermNet,\nan end-to-end CNN model for this task. The utility of DeepPermNet is\ndemonstrated on two challenging computer vision problems, namely, (i) relative\nattributes learning and (ii) self-supervised representation learning. Our\nresults show state-of-the-art performance on the Public Figures and OSR\nbenchmarks for (i) and on the classification and segmentation tasks on the\nPASCAL VOC dataset for (ii)." + pub_date: { + seconds: 1491782400 + } + authors: "Rodrigo Santa Cruz" + authors: "Basura Fernando" + authors: "Anoop Cherian" + authors: "Stephen Gould" + } + video: { + video_id: "AqStpR29lTA" + video_title: "PR-162: DeepPermNet: Visual Permutation Learning" + number_of_likes: 2 + number_of_views: 311 + published_date: { + seconds: 1557675917 + } + uploader: "강민국" + } + } +} +pr_id_to_video: { + key: 163 + value: { + pr_id: 163 + papers: { + paper_id: "explainable-cnn-attention-networks-c" + title: "Explainable CNN-attention Networks (C-Attention Network) for Automated Detection of Alzheimer's Disease" + arxiv_id: "2006.14135" + abstract: "In this work, we propose three explainable deep learning architectures to automatically detect patients with Alzheimer`s disease based on their language abilities. The architectures use: (1) only the part-of-speech features; (2) only language embedding features and (3) both of these feature classes via a unified architecture. We use self-attention mechanisms and interpretable 1-dimensional ConvolutionalNeural Network (CNN) to generate two types of explanations of the model`s action: intra-class explanation and inter-class explanation. The inter-class explanation captures the relative importance of each of the different features in that class, while the inter-class explanation captures the relative importance between the classes. Note that although we have considered two classes of features in this paper, the architecture is easily expandable to more classes because of its modularity. Extensive experimentation and comparison with several recent models show that our method outperforms these methods with an accuracy of 92.2% and F1 score of 0.952on the DementiaBank dataset while being able to generate explanations. We show by examples, how to generate these explanations using attention values." + pub_date: { + seconds: 1593043200 + } + authors: "Ning Wang" + authors: "Mingxuan Chen" + authors: "K. P. Subbalakshmi" + } + video: { + video_id: "Dvi5_YC8Yts" + video_title: "PR-163: CNN Attention Networks" + number_of_likes: 125 + number_of_views: 8274 + published_date: { + seconds: 1558274434 + } + uploader: "Taeoh Kim" + } + } +} +pr_id_to_video: { + key: 164 + value: { + pr_id: 164 + papers: { + paper_id: "infovae-information-maximizing-variational" + title: "InfoVAE: Information Maximizing Variational Autoencoders" + arxiv_id: "1706.02262" + abstract: "A key advance in learning generative models is the use of amortized inference\ndistributions that are jointly trained with the models. We find that existing\ntraining objectives for variational autoencoders can lead to inaccurate\namortized inference distributions and, in some cases, improving the objective\nprovably degrades the inference quality. In addition, it has been observed that\nvariational autoencoders tend to ignore the latent variables when combined with\na decoding distribution that is too flexible. We again identify the cause in\nexisting training criteria and propose a new class of objectives (InfoVAE) that\nmitigate these problems. We show that our model can significantly improve the\nquality of the variational posterior and can make effective use of the latent\nfeatures regardless of the flexibility of the decoding distribution. Through\nextensive qualitative and quantitative analyses, we demonstrate that our models\noutperform competing approaches on multiple performance metrics." + pub_date: { + seconds: 1496793600 + } + authors: "Shengjia Zhao" + authors: "Jiaming Song" + authors: "Stefano Ermon" + repositories: { + url: "https://github.com/zacheberhart/Convolutional-Disentangled-Variational-Autoencoder" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: "A Convolutional β-VAE in PyTorch based loosely off of the Conv VAE used in the World Models research paper." + } + repositories: { + url: "https://github.com/JakobHavtorn/vae" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "VAE in PyTorch" + } + repositories: { + url: "https://github.com/Saswatm123/MMD-VAE" + framework: FRAMEWORK_PYTORCH + number_of_stars: 34 + description: "Pytorch implementation of Maximum Mean Discrepancy Variational Autoencoder, a member of the InfoVAE family that maximizes Mutual Information between the Isotropic Gaussian Prior (as the latent space) and the Data Distribution." + } + repositories: { + url: "https://github.com/AntixK/PyTorch-VAE" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2111 + description: "A Collection of Variational Autoencoders (VAE) in PyTorch." + } + repositories: { + url: "https://github.com/zacheberhart/Maximum-Mean-Discrepancy-Variational-Autoencoder" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 37 + description: "A PyTorch implementation of the MMD-VAE, an Information-Maximizing Variational Autoencoder (InfoVAE) based off of the TensorFlow implementation published by the author of the original InfoVAE paper." + } + } + video: { + video_id: "29QcXLoYC60" + video_title: "PR-164: InfoVAE: Balancing Learning and Inference in Variational Autoencoders" + number_of_views: 593 + published_date: { + seconds: 1558883112 + } + uploader: "Byung-Hak Kim" + } + } +} +pr_id_to_video: { + key: 165 + value: { + pr_id: 165 + papers: { + paper_id: "few-shot-adversarial-learning-of-realistic" + title: "Few-Shot Adversarial Learning of Realistic Neural Talking Head Models" + arxiv_id: "1905.08233" + abstract: "Several recent works have shown how highly realistic human head images can be obtained by training convolutional neural networks to generate them. In order to create a personalized talking head model, these works require training on a large dataset of images of a single person. However, in many practical scenarios, such personalized talking head models need to be learned from a few image views of a person, potentially even a single image. Here, we present a system with such few-shot capability. It performs lengthy meta-learning on a large dataset of videos, and after that is able to frame few- and one-shot learning of neural talking head models of previously unseen people as adversarial training problems with high capacity generators and discriminators. Crucially, the system is able to initialize the parameters of both the generator and the discriminator in a person-specific way, so that training can be based on just a few images and done quickly, despite the need to tune tens of millions of parameters. We show that such an approach is able to learn highly realistic and personalized talking head models of new people and even portrait paintings." + pub_date: { + seconds: 1558310400 + } + authors: "Egor Zakharov" + authors: "Aliaksandra Shysheya" + authors: "Egor Burkov" + authors: "Victor Lempitsky" + repositories: { + url: "https://github.com/krisrjohnson/Realistic-Neural-Talking-Head-Models" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/Ierezell/PapierFewShot" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Reimplementation in pytorch of the paper https://arxiv.org/pdf/1905.08233.pdf" + } + repositories: { + url: "https://github.com/times2049/talkinghead" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/ZVK/Talking-Heads" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + } + repositories: { + url: "https://github.com/shoutOutYangJie/Few-Shot-Adversarial-Learning-for-face-swap" + framework: FRAMEWORK_PYTORCH + number_of_stars: 123 + description: "This is a unofficial re-implementation of the paper \"Few-Shot Adversarial Learning of Realistic Neural Talking Head Models\"" + } + repositories: { + url: "https://github.com/vincent-thevenin/Realistic-Neural-Talking-Head-Models" + framework: FRAMEWORK_PYTORCH + number_of_stars: 643 + description: "My implementation of Few-Shot Adversarial Learning of Realistic Neural Talking Head Models (Egor Zakharov et al.)." + } + repositories: { + url: "https://github.com/ZVK/talking_heads" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + } + repositories: { + url: "https://github.com/grey-eye/talking-heads" + framework: FRAMEWORK_PYTORCH + number_of_stars: 507 + description: "Our implementation of \"Few-Shot Adversarial Learning of Realistic Neural Talking Head Models\" (Egor Zakharov et al.)" + } + } + video: { + video_id: "4pY_6VG4npc" + video_title: "PR-165: Few-Shot Adversarial Learning of Realistic Neural Talking Head Models" + number_of_likes: 40 + number_of_views: 2879 + published_date: { + seconds: 1558879643 + } + uploader: "taesu" + } + } +} +pr_id_to_video: { + key: 166 + value: { + pr_id: 166 + papers: { + paper_id: "nas-fpn-learning-scalable-feature-pyramid" + title: "NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection" + arxiv_id: "1904.07392" + abstract: "Current state-of-the-art convolutional architectures for object detection are\nmanually designed. Here we aim to learn a better architecture of feature\npyramid network for object detection. We adopt Neural Architecture Search and\ndiscover a new feature pyramid architecture in a novel scalable search space\ncovering all cross-scale connections. The discovered architecture, named\nNAS-FPN, consists of a combination of top-down and bottom-up connections to\nfuse features across scales. NAS-FPN, combined with various backbone models in\nthe RetinaNet framework, achieves better accuracy and latency tradeoff compared\nto state-of-the-art object detection models. NAS-FPN improves mobile detection\naccuracy by 2 AP compared to state-of-the-art SSDLite with MobileNetV2 model in\n[32] and achieves 48.3 AP which surpasses Mask R-CNN [10] detection accuracy\nwith less computation time." + pub_date: { + seconds: 1555372800 + } + authors: "Golnaz Ghiasi" + authors: "Tsung-Yi Lin" + authors: "Ruoming Pang" + authors: "Quoc V. Le" + repositories: { + url: "https://github.com/open-mmlab/mmdetection" + framework: FRAMEWORK_PYTORCH + number_of_stars: 15379 + description: "OpenMMLab Detection Toolbox and Benchmark" + } + repositories: { + url: "https://github.com/tensorflow/tpu/tree/master/models/official/detection" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4338 + description: "Reference models and tools for Cloud TPUs." + } + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "Bottleneck Residual Block" + full_name: "Bottleneck Residual Block" + description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." + } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "Sigmoid Activation" + full_name: "Sigmoid Activation" + description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." + } + methods: { + name: "RoIAlign" + full_name: "RoIAlign" + description: "**Region of Interest Align**, or **RoIAlign**, is an operation for extracting a small feature map from each RoI in detection and segmentation based tasks. It removes the harsh quantization of [RoI Pool](https://paperswithcode.com/method/roi-pooling), properly *aligning* the extracted features with the input. To avoid any quantization of the RoI boundaries or bins (using $x/16$ instead of $[x/16]$), RoIAlign uses bilinear interpolation to compute the exact values of the input features at four regularly sampled locations in each RoI bin, and the result is then aggregated (using max or average)." + } + methods: { + name: "Pointwise Convolution" + full_name: "Pointwise Convolution" + description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "MobileNetV2" + full_name: "MobileNetV2" + description: "**MobileNetV2** is a convolutional neural network architecture that seeks to perform well on mobile devices. It is based on an inverted residual structure where the residual connections are between the bottleneck layers. The intermediate expansion layer uses lightweight depthwise convolutions to filter features as a source of non-linearity. As a whole, the architecture of MobileNetV2 contains the initial fully convolution layer with 32 filters, followed by 19 residual bottleneck layers." + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + } + } + video: { + video_id: "FAAt0jejWOA" + video_title: "PR-166: NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection" + number_of_likes: 15 + number_of_views: 2115 + published_date: { + seconds: 1560917270 + } + uploader: "Taekmin Kim" + } + } +} +pr_id_to_video: { + key: 167 + value: { + pr_id: 167 + papers: { + paper_id: "interpretability-beyond-feature-attribution" + title: "Interpretability Beyond Feature Attribution: Quantitative Testing with Concept Activation Vectors (TCAV)" + arxiv_id: "1711.11279" + abstract: "The interpretation of deep learning models is a challenge due to their size,\ncomplexity, and often opaque internal state. In addition, many systems, such as\nimage classifiers, operate on low-level features rather than high-level\nconcepts. To address these challenges, we introduce Concept Activation Vectors\n(CAVs), which provide an interpretation of a neural net's internal state in\nterms of human-friendly concepts. The key idea is to view the high-dimensional\ninternal state of a neural net as an aid, not an obstacle. We show how to use\nCAVs as part of a technique, Testing with CAVs (TCAV), that uses directional\nderivatives to quantify the degree to which a user-defined concept is important\nto a classification result--for example, how sensitive a prediction of \"zebra\"\nis to the presence of stripes. Using the domain of image classification as a\ntesting ground, we describe how CAVs may be used to explore hypotheses and\ngenerate insights for a standard image classification network as well as a\nmedical application." + pub_date: { + seconds: 1512000000 + } + authors: "Been Kim" + authors: "Martin Wattenberg" + authors: "Justin Gilmer" + authors: "Carrie Cai" + authors: "James Wexler" + authors: "Fernanda Viegas" + authors: "Rory Sayres" + repositories: { + url: "https://github.com/jwendyr/tcav" + framework: FRAMEWORK_TENSORFLOW + description: "tcav" + } + repositories: { + url: "https://github.com/giovannimaffei/concept_activation_vectors" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Simple implementation of \"Interpretability Beyond Feature Attribution: Quantitative Testing with Concept Activation Vectors (TCAV)\", Been Kim et al., 2017 " + } + repositories: { + url: "https://github.com/medgift/iMIMIC-RCVs" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 14 + description: "This repository contains the code for implementing Bidirectional Relevance scores for Digital Histopathology, which was used for the results in the iMIMIC workshop paper: Regression Concept Vectors for Bidirectional Explanations in Histopathology" + } + repositories: { + url: "https://github.com/maragraziani/iMIMIC-RCVs" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 3 + description: "This repository contains the code for implementing Bidirectional Relevance scores for Digital Histopathology, which was used for the results in the iMIMIC workshop paper: Regression Concept Vectors for Bidirectional Explanations in Histopathology" + } + repositories: { + url: "https://github.com/fursovia/tcav_nlp" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 3 + description: "\"Interpretability Beyond Feature Attribution: Quantitative Testing with Concept Activation Vectors (TCAV)\" paper implementation" + } + repositories: { + url: "https://github.com/pnxenopoulos/cav-keras" + framework: FRAMEWORK_OTHERS + number_of_stars: 9 + description: "Concept activation vectors for Keras" + } + repositories: { + url: "https://github.com/tensorflow/tcav" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 466 + description: "Code for the TCAV ML interpretability project" + } + repositories: { + url: "https://github.com/soumyadip1995/TCAV" + framework: FRAMEWORK_OTHERS + description: " ⚙📲Interpretability Beyond Feature Attribution: Quantitative Testing with Concept Activation Vectors (TCAV)" + } + } + video: { + video_id: "-y0oghbEHMM" + video_title: "PR-167: Interpretability Beyond Feature Attribution: Testing with Concept Activation Vector (TCAV)" + number_of_likes: 4 + number_of_views: 769 + published_date: { + seconds: 1559486974 + } + uploader: "Jinsung Yoon" + } + } +} +pr_id_to_video: { + key: 168 + value: { + pr_id: 168 + papers: { + paper_id: "few-shot-unsupervised-image-to-image" + title: "Few-Shot Unsupervised Image-to-Image Translation" + arxiv_id: "1905.01723" + abstract: "Unsupervised image-to-image translation methods learn to map images in a given class to an analogous image in a different class, drawing on unstructured (non-registered) datasets of images. While remarkably successful, current methods require access to many images in both source and destination classes at training time. We argue this greatly limits their use. Drawing inspiration from the human capability of picking up the essence of a novel object from a small number of examples and generalizing from there, we seek a few-shot, unsupervised image-to-image translation algorithm that works on previously unseen target classes that are specified, at test time, only by a few example images. Our model achieves this few-shot generation capability by coupling an adversarial training scheme with a novel network design. Through extensive experimental validation and comparisons to several baseline methods on benchmark datasets, we verify the effectiveness of the proposed framework. Our implementation and datasets are available at https://github.com/NVlabs/FUNIT ." + pub_date: { + seconds: 1557014400 + } + authors: "Ming-Yu Liu" + authors: "Xun Huang" + authors: "Arun Mallya" + authors: "Tero Karras" + authors: "Timo Aila" + authors: "Jaakko Lehtinen" + authors: "Jan Kautz" + repositories: { + url: "https://github.com/samuelchassot/FUNIT" + framework: FRAMEWORK_PYTORCH + description: "Translate images to unseen domains in the test time with few example images." + } + repositories: { + url: "https://github.com/mkolodny/funit" + framework: FRAMEWORK_PYTORCH + } + repositories: { + is_official: true + url: "https://github.com/NVlabs/FUNIT" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1429 + description: "Translate images to unseen domains in the test time with few example images." + } + repositories: { + url: "https://github.com/taki0112/FUNIT-Tensorflow" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 62 + description: "Simple Tensorflow implementation of \"Few-Shot Unsupervised Image-to-Image Translation\" (ICCV 2019)" + } + repositories: { + url: "https://github.com/chipsi/FUNIT" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/yaxingwang/SEMIT" + framework: FRAMEWORK_PYTORCH + number_of_stars: 38 + description: "Image to Image translation, image generataton, few shot learning" + } + repositories: { + url: "https://github.com/shaoanlu/fewshot-face-translation-GAN" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 720 + description: "Generative adversarial networks integrating modules from FUNIT and SPADE for face-swapping." + } + repositories: { + url: "https://github.com/sumfish/music-style-transfer" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/cleye/FUNIT-Fringe" + framework: FRAMEWORK_PYTORCH + description: "Using NVlabs FUNIT to make a photobooth transforming your face into an animal. Displayed at FRINGE Festival 2020" + } + } + video: { + video_id: "ANwAhuOeaiE" + video_title: "PR-168: Few Shot Unsupervised Image to Image Translation" + number_of_likes: 11 + number_of_views: 895 + published_date: { + seconds: 1560267339 + } + uploader: "정지원" + } + } +} +pr_id_to_video: { + key: 169 + value: { + pr_id: 169 + papers: { + paper_id: "efficientnet-rethinking-model-scaling-for" + title: "EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" + arxiv_id: "1905.11946" + abstract: "Convolutional Neural Networks (ConvNets) are commonly developed at a fixed resource budget, and then scaled up for better accuracy if more resources are available. In this paper, we systematically study model scaling and identify that carefully balancing network depth, width, and resolution can lead to better performance. Based on this observation, we propose a new scaling method that uniformly scales all dimensions of depth/width/resolution using a simple yet highly effective compound coefficient. We demonstrate the effectiveness of this method on scaling up MobileNets and ResNet. To go even further, we use neural architecture search to design a new baseline network and scale it up to obtain a family of models, called EfficientNets, which achieve much better accuracy and efficiency than previous ConvNets. In particular, our EfficientNet-B7 achieves state-of-the-art 84.3% top-1 accuracy on ImageNet, while being 8.4x smaller and 6.1x faster on inference than the best existing ConvNet. Our EfficientNets also transfer well and achieve state-of-the-art accuracy on CIFAR-100 (91.7%), Flowers (98.8%), and 3 other transfer learning datasets, with an order of magnitude fewer parameters. Source code is at https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet." + pub_date: { + seconds: 1559001600 + } + authors: "Mingxing Tan" + authors: "Quoc V. Le" + repositories: { + url: "https://github.com/jason90330/EdgeFinal" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/Tirth27/Skin-Cancer-Classification-using-Deep-Learning" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Classify Skin cancer from the skin lesion images using Image classification. The dataset for the project is obtained from the Kaggle SIIM-ISIC-Melanoma-Classification competition. " + } + repositories: { + url: "https://github.com/reyvaz/pneumothorax_detection" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Pneumothorax Disease Detection and Segmentation using X-Ray Images" + } + repositories: { + url: "https://github.com/reyvaz/steel-defect-segmentation" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "EfficientNet-Based Unet++ Model for Steel Defect Segmentation" + } + repositories: { + url: "https://github.com/gladwinyjh/FYP-2020" + framework: FRAMEWORK_TENSORFLOW + description: "Deep learning of brain images with EfficientNet" + } + repositories: { + url: "https://github.com/jaketae/mlp-mixer" + framework: FRAMEWORK_PYTORCH + number_of_stars: 7 + description: "PyTorch implementation of MLP-Mixer: An all-MLP Architecture for Vision" + } + repositories: { + url: "https://github.com/lpirola13/flower-recognizer" + framework: FRAMEWORK_TENSORFLOW + description: "This project aims to create a deep learning model suitable in a mobile context that can recognize flowers from images." + } + repositories: { + url: "https://github.com/HyeonhoonLee/MAIC2021_Sleep" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "4th place in MAIC2021 Sleep AI Challenge (SleepingDragon)" + } + repositories: { + url: "https://github.com/lukemelas/EfficientNet-PyTorch" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 6135 + description: "A PyTorch implementation of EfficientNet and EfficientNetV2 (coming soon!)" + } + repositories: { + url: "https://github.com/BobMcDear/pytorch-efficientnet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + } + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "Bottleneck Residual Block" + full_name: "Bottleneck Residual Block" + description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." + } + methods: { + name: "Sigmoid Activation" + full_name: "Sigmoid Activation" + description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." + } + methods: { + name: "AutoAugment" + full_name: "AutoAugment" + description: "**AutoAugment** is an automated approach to find data augmentation policies from data. It formulates the problem of finding the best augmentation policy as a discrete search problem. It consists of two components: a search algorithm and a search space. \r\n\r\nAt a high level, the search algorithm (implemented as a controller RNN) samples a data augmentation policy $S$, which has information about what image processing operation to use, the probability of using the operation in each batch, and the magnitude of the operation. The policy $S$ is used to train a neural network with a fixed architecture, whose validation accuracy $R$ is sent back to update the controller. Since $R$ is not differentiable, the controller will be updated by policy gradient methods. \r\n\r\nThe operations used are from PIL, a popular Python image library: all functions in PIL that accept an image as input and output an image. It additionally uses two other augmentation techniques: Cutout and SamplePairing. The operations searched over are ShearX/Y, TranslateX/Y, Rotate, AutoContrast, Invert, Equalize, Solarize, Posterize, Contrast, Color, Brightness, Sharpness, Cutout and Sample Pairing." + } + methods: { + name: "Squeeze-and-Excitation Block" + full_name: "Squeeze-and-Excitation Block" + description: "The **Squeeze-and-Excitation Block** is an architectural unit designed to improve the representational power of a network by enabling it to perform dynamic channel-wise feature recalibration. The process is:\r\n\r\n- The block has a convolutional block as an input.\r\n- Each channel is \"squeezed\" into a single numeric value using average pooling.\r\n- A dense layer followed by a ReLU adds non-linearity and output channel complexity is reduced by a ratio.\r\n- Another dense layer followed by a sigmoid gives each channel a smooth gating function.\r\n- Finally, we weight each feature map of the convolutional block based on the side network; the \"excitation\"." + } + methods: { + name: "Pointwise Convolution" + full_name: "Pointwise Convolution" + description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" + } + methods: { + name: "Stochastic Depth" + full_name: "Stochastic Depth" + description: "**Stochastic Depth** aims to shrink the depth of a network during training, while\r\nkeeping it unchanged during testing. This is achieved by randomly dropping entire [ResBlocks](https://paperswithcode.com/method/residual-block) during training and bypassing their transformations through skip connections. \r\n\r\nLet $b\\_{l} \\in$ {$0, 1$} denote a Bernoulli random variable, which indicates whether the $l$th ResBlock is active ($b\\_{l} = 1$) or inactive ($b\\_{l} = 0$). Further, let us denote the “survival” probability of ResBlock $l$ as $p\\_{l} = \\text{Pr}\\left(b\\_{l} = 1\\right)$. With this definition we can bypass the $l$th ResBlock by multiplying its function $f\\_{l}$ with $b\\_{l}$ and we extend the update rule to:\r\n\r\n$$ H\\_{l} = \\text{ReLU}\\left(b\\_{l}f\\_{l}\\left(H\\_{l-1}\\right) + \\text{id}\\left(H\\_{l-1}\\right)\\right) $$\r\n\r\nIf $b\\_{l} = 1$, this reduces to the original ResNet update and this ResBlock remains unchanged. If $b\\_{l} = 0$, the ResBlock reduces to the identity function, $H\\_{l} = \\text{id}\\left((H\\_{l}−1\\right)$." + } + } + video: { + video_id: "Vhz0quyvR7I" + video_title: "PR-169: EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" + number_of_likes: 99 + number_of_views: 6335 + published_date: { + seconds: 1560496231 + } + uploader: "JinWon Lee" + } + } +} +pr_id_to_video: { + key: 170 + value: { + pr_id: 170 + papers: { + paper_id: "deep-residual-learning-for-image-recognition" + title: "Deep Residual Learning for Image Recognition" + arxiv_id: "1512.03385" + abstract: "Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation." + pub_date: { + seconds: 1449705600 + } + authors: "Kaiming He" + authors: "Xiangyu Zhang" + authors: "Shaoqing Ren" + authors: "Jian Sun" + repositories: { + url: "https://github.com/tensorflow/models/tree/master/research/deeplab" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 70341 + description: "Models and examples built with TensorFlow" + } + repositories: { + url: "https://github.com/vinod377/STN-OCR" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Implementation of \"STN-OCR: A single Neural Network for Text Detection and Text Recognition\" in natural Scenes by Christian Bartz." + } + repositories: { + url: "https://github.com/facebookresearch/pycls" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1597 + description: "Codebase for Image Classification Research, written in PyTorch." + } + repositories: { + url: "https://github.com/MarkHershey/arxiv-dl" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "Command-line arXiv.org Papers Downloader" + } + repositories: { + url: "https://github.com/FrancescoSaverioZuppichini/ResNet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 94 + description: "Clean, scalable and easy to use ResNet implementation in Pytorch" + } + repositories: { + url: "https://github.com/Masao-Taketani/FOTS_OCR" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 41 + description: "TensorFlow Implementation of FOTS, Fast Oriented Text Spotting with a Unified Network." + } + repositories: { + url: "https://github.com/amogh7joshi/plant-health-detection" + framework: FRAMEWORK_TENSORFLOW + description: "Detecting plant health using neural networks." + } + repositories: { + url: "https://github.com/tiagoCuervo/JapaNet" + framework: FRAMEWORK_TENSORFLOW + description: "Detection and classification of Kuzushiji characters for the Kuzushiji Recognition Kaggle challenge using CenterNet as detector and multiple classifiers" + } + repositories: { + url: "https://github.com/pytorch/vision" + framework: FRAMEWORK_PYTORCH + number_of_stars: 9309 + description: "Datasets, Transforms and Models specific to Computer Vision" + } + repositories: { + url: "https://github.com/winycg/MCL-OKD" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + description: "Multi-view contrastive learning for online knowledge distillation (MCL-OKD)" + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + methods: { + name: "Kaiming Initialization" + full_name: "Kaiming Initialization" + description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." + } + methods: { + name: "Batch Normalization" + full_name: "Batch Normalization" + description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." + } + methods: { + name: "Step Decay" + full_name: "Step Decay" + description: "**Step Decay** is a learning rate schedule that drops the learning rate by a factor every few epochs, where the number of epochs is a hyperparameter.\r\n\r\nImage Credit: [Suki Lau](https://towardsdatascience.com/learning-rate-schedules-and-adaptive-learning-rate-methods-for-deep-learning-2c8f433990d1)" + } + methods: { + name: "SGD with Momentum" + full_name: "SGD with Momentum" + description: "**SGD with Momentum** is a stochastic optimization method that adds a momentum term to regular stochastic gradient descent:\r\n\r\n$$v\\_{t} = \\gamma{v}\\_{t-1} + \\eta\\nabla\\_{\\theta}J\\left(\\theta\\right)$$\r\n$$\\theta\\_{t} = \\theta\\_{t-1} - v\\_{t} $$\r\n\r\nA typical value for $\\gamma$ is $0.9$. The momentum name comes from an analogy to physics, such as ball accelerating down a slope. In the case of weight updates, we can think of the weights as a particle traveling through parameter space which incurs acceleration from the gradient of the loss.\r\n\r\nImage Source: [Juan Du](https://www.researchgate.net/figure/The-compare-of-the-SGD-algorithms-with-and-without-momentum-Take-Task-1-as-example-The_fig1_333469047)" + } + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "Bottleneck Residual Block" + full_name: "Bottleneck Residual Block" + description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." + } + methods: { + name: "Weight Decay" + full_name: "Weight Decay" + description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + } + } + video: { + video_id: "7fSgqlC7Wdo" + video_title: "PR-170: ResNet - Deep Residual Learning for Image Recognition" + number_of_likes: 16 + number_of_views: 1730 + published_date: { + seconds: 1565744287 + } + uploader: "Young Seok Kim" + } + } +} +pr_id_to_video: { + key: 171 + value: { + pr_id: 171 + papers: { + paper_id: "large-margin-softmax-loss-for-convolutional" + title: "Large-Margin Softmax Loss for Convolutional Neural Networks" + arxiv_id: "1612.02295" + abstract: "Cross-entropy loss together with softmax is arguably one of the most common\nused supervision components in convolutional neural networks (CNNs). Despite\nits simplicity, popularity and excellent performance, the component does not\nexplicitly encourage discriminative learning of features. In this paper, we\npropose a generalized large-margin softmax (L-Softmax) loss which explicitly\nencourages intra-class compactness and inter-class separability between learned\nfeatures. Moreover, L-Softmax not only can adjust the desired margin but also\ncan avoid overfitting. We also show that the L-Softmax loss can be optimized by\ntypical stochastic gradient descent. Extensive experiments on four benchmark\ndatasets demonstrate that the deeply-learned features with L-softmax loss\nbecome more discriminative, hence significantly boosting the performance on a\nvariety of visual classification and verification tasks." + pub_date: { + seconds: 1481068800 + } + authors: "Weiyang Liu" + authors: "Yandong Wen" + authors: "Zhiding Yu" + authors: "Meng Yang" + repositories: { + is_official: true + url: "https://github.com/wy1iu/LargeMargin_Softmax_Loss" + framework: FRAMEWORK_PYTORCH + number_of_stars: 321 + description: "Implementation for in ICML'16." + } + repositories: { + url: "https://github.com/amirhfarzaneh/lsoftmax-pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 141 + description: "The Pytorch Implementation of L-Softmax" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + } + video: { + video_id: "7TugLIfExKM" + video_title: "PR-171: Large margin softmax loss for Convolutional Neural Networks" + number_of_likes: 4 + number_of_views: 1051 + published_date: { + seconds: 1561534996 + } + uploader: "visionNoob" + } + } +} +pr_id_to_video: { + key: 172 + value: { + pr_id: 172 + papers: { + paper_id: "generalized-intersection-over-union-a-metric" + title: "Generalized Intersection over Union: A Metric and A Loss for Bounding Box Regression" + arxiv_id: "1902.09630" + abstract: "Intersection over Union (IoU) is the most popular evaluation metric used in\nthe object detection benchmarks. However, there is a gap between optimizing the\ncommonly used distance losses for regressing the parameters of a bounding box\nand maximizing this metric value. The optimal objective for a metric is the\nmetric itself. In the case of axis-aligned 2D bounding boxes, it can be shown\nthat $IoU$ can be directly used as a regression loss. However, $IoU$ has a\nplateau making it infeasible to optimize in the case of non-overlapping\nbounding boxes. In this paper, we address the weaknesses of $IoU$ by\nintroducing a generalized version as both a new loss and a new metric. By\nincorporating this generalized $IoU$ ($GIoU$) as a loss into the state-of-the\nart object detection frameworks, we show a consistent improvement on their\nperformance using both the standard, $IoU$ based, and new, $GIoU$ based,\nperformance measures on popular object detection benchmarks such as PASCAL VOC\nand MS COCO." + pub_date: { + seconds: 1551052800 + } + authors: "Hamid Rezatofighi" + authors: "Nathan Tsoi" + authors: "JunYoung Gwak" + authors: "Amir Sadeghian" + authors: "Ian Reid" + authors: "Silvio Savarese" + repositories: { + url: "https://github.com/AnselmC/bamot" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "Bundle Adjustment for Multiple Object Tracking" + } + repositories: { + url: "https://github.com/sremes/a2d2" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + } + repositories: { + url: "https://github.com/JaryHuang/awesome_SSD_FPN_GIoU" + framework: FRAMEWORK_PYTORCH + number_of_stars: 97 + description: "This repository carries out some paper recurring work" + } + repositories: { + url: "https://github.com/OFRIN/Tensorflow_GIoU" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 11 + description: "Generalized Intersection Over Union: A Metric and a Loss for Bounding Box Regression (CVPR2019)" + } + repositories: { + url: "https://github.com/RuiminChen/GIou_loss_caffe" + framework: FRAMEWORK_OTHERS + number_of_stars: 39 + description: "Caffe version Generalized & Distance & Complete Iou loss Implementation for Faster RCNN/FPN bbox regression" + } + repositories: { + url: "https://github.com/RuiminChen/GIouloss_CIouloss_caffe" + framework: FRAMEWORK_OTHERS + number_of_stars: 39 + description: "Caffe version Generalized & Distance & Complete Iou loss Implementation for Faster RCNN/FPN bbox regression" + } + repositories: { + url: "https://github.com/LinRiver/YOLOv3-on-LISA-Traffic-Sign-Detection-with-darknet" + framework: FRAMEWORK_OTHERS + number_of_stars: 3 + description: "This project is to improve YOLOv3 performance by GIOU instead of IOU and the integration of conv and batch_normalization layers" + } + repositories: { + url: "https://github.com/kalubin-awym/GIoU-loss-for-RetinaNet" + framework: FRAMEWORK_OTHERS + number_of_stars: 11 + description: "Change smooth L1 loss to GIoU loss for RetinaNet" + } + } + video: { + video_id: "ENZBhDx0kqM" + video_title: "PR-172: Generalized Intersection over Union: A Metric and A Loss for Bounding Box Regression" + number_of_likes: 30 + number_of_views: 1823 + published_date: { + seconds: 1561353991 + } + uploader: "Soyeon Kim" + } + } +} +pr_id_to_video: { + key: 173 + value: { + pr_id: 173 + papers: { + paper_id: "automatic-chemical-design-using-a-data-driven" + title: "Automatic chemical design using a data-driven continuous representation of molecules" + arxiv_id: "1610.02415" + abstract: "We report a method to convert discrete representations of molecules to and\nfrom a multidimensional continuous representation. This model allows us to\ngenerate new molecules for efficient exploration and optimization through\nopen-ended spaces of chemical compounds. A deep neural network was trained on\nhundreds of thousands of existing chemical structures to construct three\ncoupled functions: an encoder, a decoder and a predictor. The encoder converts\nthe discrete representation of a molecule into a real-valued continuous vector,\nand the decoder converts these continuous vectors back to discrete molecular\nrepresentations. The predictor estimates chemical properties from the latent\ncontinuous vector representation of the molecule. Continuous representations\nallow us to automatically generate novel chemical structures by performing\nsimple operations in the latent space, such as decoding random vectors,\nperturbing known chemical structures, or interpolating between molecules.\nContinuous representations also allow the use of powerful gradient-based\noptimization to efficiently guide the search for optimized functional\ncompounds. We demonstrate our method in the domain of drug-like molecules and\nalso in the set of molecules with fewer that nine heavy atoms." + pub_date: { + seconds: 1475798400 + } + authors: "Rafael Gómez-Bombarelli" + authors: "Jennifer N. Wei" + authors: "David Duvenaud" + authors: "José Miguel Hernández-Lobato" + authors: "Benjamín Sánchez-Lengeling" + authors: "Dennis Sheberla" + authors: "Jorge Aguilera-Iparraguirre" + authors: "Timothy D. Hirzel" + authors: "Ryan P. Adams" + authors: "Alán Aspuru-Guzik" + repositories: { + url: "https://github.com/Ishan-Kumar2/Molecular_VAE_Pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "PyTorch implementation of the paper \"Automatic Chemical Design Using a Data-Driven Continuous Representation of Molecules\"" + } + repositories: { + url: "https://github.com/TrentBrick/PAE" + framework: FRAMEWORK_PYTORCH + description: "Primary and Tertiary Sequence AutoEncoder" + } + repositories: { + url: "https://github.com/leungjch/drug_VAE" + framework: FRAMEWORK_OTHERS + description: "VAE trained on MOSES SMILES to produce novel molecules with druglike properties." + } + repositories: { + url: "https://github.com/shamelmerchant/keras-molecules" + framework: FRAMEWORK_TENSORFLOW + description: "Auto-encoder network for learning a continuous representation of chemical structures" + } + repositories: { + url: "https://github.com/brettin/keras-molecule" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + is_official: true + url: "https://github.com/aspuru-guzik-group/chemical_vae" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 303 + description: "Code for 10.1021/acscentsci.7b00572, now running on Keras 2.0 and Tensorflow" + } + repositories: { + is_official: true + url: "https://github.com/HIPS/molecule-autoencoder" + framework: FRAMEWORK_OTHERS + number_of_stars: 136 + description: "A project to enable optimization of molecules by transforming them to and from a continuous representation." + } + repositories: { + url: "https://github.com/aksub99/molecular-vae" + framework: FRAMEWORK_PYTORCH + number_of_stars: 20 + description: "Pytorch implementation of the paper \"Automatic Chemical Design Using a Data-Driven Continuous Representation of Molecules\"" + } + repositories: { + url: "https://github.com/tevang/keras-molecules" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + } + } + video: { + video_id: "hk4e8ZCkNWg" + video_title: "PR-173 : Automatic Chemical Design Using a Data-Driven Continuous Representation of Molecules" + number_of_likes: 10 + number_of_views: 390 + published_date: { + seconds: 1561309066 + } + uploader: "Sunghoon Joo" + } + } +} +pr_id_to_video: { + key: 174 + value: { + pr_id: 174 + video: { + video_id: "yqFDyX4ErSI" + video_title: "PR-174: Restricted Boltzmann Machine and Deep Belief Networks" + number_of_likes: 12 + number_of_views: 978 + published_date: { + seconds: 1561903626 + } + uploader: "MinGuk Kang" + } + } +} +pr_id_to_video: { + key: 175 + value: { + pr_id: 175 + papers: { + paper_id: "xlnet-generalized-autoregressive-pretraining" + title: "XLNet: Generalized Autoregressive Pretraining for Language Understanding" + arxiv_id: "1906.08237" + abstract: "With the capability of modeling bidirectional contexts, denoising autoencoding based pretraining like BERT achieves better performance than pretraining approaches based on autoregressive language modeling. However, relying on corrupting the input with masks, BERT neglects dependency between the masked positions and suffers from a pretrain-finetune discrepancy. In light of these pros and cons, we propose XLNet, a generalized autoregressive pretraining method that (1) enables learning bidirectional contexts by maximizing the expected likelihood over all permutations of the factorization order and (2) overcomes the limitations of BERT thanks to its autoregressive formulation. Furthermore, XLNet integrates ideas from Transformer-XL, the state-of-the-art autoregressive model, into pretraining. Empirically, under comparable experiment settings, XLNet outperforms BERT on 20 tasks, often by a large margin, including question answering, natural language inference, sentiment analysis, and document ranking." + pub_date: { + seconds: 1560902400 + } + authors: "Zhilin Yang" + authors: "Zihang Dai" + authors: "Yiming Yang" + authors: "Jaime Carbonell" + authors: "Ruslan Salakhutdinov" + authors: "Quoc V. Le" + repositories: { + url: "https://github.com/huggingface/transformers" + framework: FRAMEWORK_PYTORCH + number_of_stars: 47629 + description: "🤗Transformers: State-of-the-art Natural Language Processing for Pytorch, TensorFlow, and JAX." + } + repositories: { + url: "https://github.com/PaddlePaddle/PaddleNLP/tree/develop/examples/language_model/xlnet" + framework: FRAMEWORK_OTHERS + number_of_stars: 1379 + description: "An NLP library with Awesome pre-trained Transformer models and easy-to-use interface, supporting wide-range of NLP tasks from research to industrial applications." + } + repositories: { + url: "https://github.com/https-seyhan/BugAI" + framework: FRAMEWORK_OTHERS + number_of_stars: 4 + description: "Deep Learning Models (Long Short Term Memory (LSTM), Recurrent Neural Networks (RNN), Convolutional Neural Networks (CNN) for AI based Bug prediction" + } + repositories: { + url: "https://github.com/utterworks/fast-bert" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1593 + description: "Super easy library for BERT based NLP models" + } + repositories: { + url: "https://github.com/zaradana/Fast_BERT" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/studio-ousia/luke" + framework: FRAMEWORK_PYTORCH + number_of_stars: 313 + description: "LUKE -- Language Understanding with Knowledge-based Embeddings" + } + repositories: { + url: "https://github.com/huggingface/xlnet" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 25 + description: "XLNet: Generalized Autoregressive Pretraining for Language Understanding" + } + repositories: { + url: "https://github.com/cuhksz-nlp/SAPar" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + } + repositories: { + url: "https://github.com/graykode/xlnet-Pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 513 + description: "Simple XLNet implementation with Pytorch Wrapper" + } + repositories: { + url: "https://github.com/tomgoter/nlp_finalproject" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Repository for Final Project for W266: Natural Language Processing with Deep Learning" + } + methods: { + name: "Variational Dropout" + full_name: "Variational Dropout" + description: "**Variational Dropout** is a regularization technique based on [dropout](https://paperswithcode.com/method/dropout), but uses a variational inference grounded approach. In Variational Dropout, we repeat the same dropout mask at each time step for both inputs, outputs, and recurrent layers (drop the same network units at each time step). This is in contrast to ordinary Dropout where different dropout masks are sampled at each time step for the inputs and outputs alone." + } + methods: { + name: "GELU" + full_name: "Gaussian Error Linear Units" + description: "The **Gaussian Error Linear Unit**, or **GELU**, is an activation function. The GELU activation function is $x\\Phi(x)$, where $\\Phi(x)$ the standard Gaussian cumulative distribution function. The GELU nonlinearity weights inputs by their percentile, rather than gates inputs by their sign as in [ReLUs](https://paperswithcode.com/method/relu) ($x\\mathbf{1}_{x>0}$). Consequently the GELU can be thought of as a smoother ReLU.\r\n\r\n$$\\text{GELU}\\left(x\\right) = x{P}\\left(X\\leq{x}\\right) = x\\Phi\\left(x\\right) = x \\cdot \\frac{1}{2}\\left[1 + \\text{erf}(x/\\sqrt{2})\\right],$$\r\nif $X\\sim \\mathcal{N}(0,1)$.\r\n\r\nOne can approximate the GELU with\r\n$0.5x\\left(1+\\tanh\\left[\\sqrt{2/\\pi}\\left(x + 0.044715x^{3}\\right)\\right]\\right)$ or $x\\sigma\\left(1.702x\\right),$\r\nbut PyTorch's exact implementation is sufficiently fast such that these approximations may be unnecessary. (See also the [SiLU](https://paperswithcode.com/method/silu) $x\\sigma(x)$ which was also coined in the paper that introduced the GELU.)\r\n\r\nGELUs are used in GPT-3, BERT, and most other Transformers." + } + methods: { + name: "Adaptive Input Representations" + full_name: "Adaptive Input Representations" + description: "**Adaptive Input Embeddings** extend the adaptive softmax to input word representations. The factorization assigns more capacity to frequent words and reduces the capacity for less frequent words with the benefit of reducing overfitting to rare words." + } + methods: { + name: "XLNet" + full_name: "XLNet" + description: "**XLNet** is an autoregressive Transformer that leverages the best of both autoregressive language modeling and autoencoding while attempting to avoid their limitations. Instead of using a fixed forward or backward factorization order as in conventional autoregressive models, XLNet maximizes the expected log likelihood of a sequence w.r.t. all possible permutations of the factorization order. Thanks to the permutation operation, the context for each position can consist of tokens from both left and right. In expectation, each position learns to utilize contextual information from all positions, i.e., capturing bidirectional context.\r\n\r\nAdditionally, inspired by the latest advancements in autogressive language modeling, XLNet integrates the segment recurrence mechanism and relative encoding scheme of [Transformer-XL](https://paperswithcode.com/method/transformer-xl) into pretraining, which empirically improves the performance especially for tasks involving a longer text sequence." + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "SentencePiece" + full_name: "SentencePiece" + description: "**SentencePiece** is a subword tokenizer and detokenizer for natural language processing. It performs subword segmentation, supporting the byte-pair-encoding (BPE) algorithm and unigram language model, and then converts this text into an id sequence guarantee perfect reproducibility of the normalization and subword segmentation." + } + methods: { + name: "Cosine Annealing" + full_name: "Cosine Annealing" + description: "**Cosine Annealing** is a type of learning rate schedule that has the effect of starting with a large learning rate that is relatively rapidly decreased to a minimum value before being increased rapidly again. The resetting of the learning rate acts like a simulated restart of the learning process and the re-use of good weights as the starting point of the restart is referred to as a \"warm restart\" in contrast to a \"cold restart\" where a new set of small random numbers may be used as a starting point.\r\n\r\n$$\\eta\\_{t} = \\eta\\_{min}^{i} + \\frac{1}{2}\\left(\\eta\\_{max}^{i}-\\eta\\_{min}^{i}\\right)\\left(1+\\cos\\left(\\frac{T\\_{cur}}{T\\_{i}}\\pi\\right)\\right)\r\n$$\r\n\r\nWhere where $\\eta\\_{min}^{i}$ and $ \\eta\\_{max}^{i}$ are ranges for the learning rate, and $T\\_{cur}$ account for how many epochs have been performed since the last restart.\r\n\r\nText Source: [Jason Brownlee](https://machinelearningmastery.com/snapshot-ensemble-deep-learning-neural-network/)\r\n\r\nImage Source: [Gao Huang](https://www.researchgate.net/figure/Training-loss-of-100-layer-DenseNet-on-CIFAR10-using-standard-learning-rate-blue-and-M_fig2_315765130)" + } + methods: { + name: "Multi-Head Attention" + full_name: "Multi-Head Attention" + description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" + } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "Adaptive Softmax" + full_name: "Adaptive Softmax" + description: "**Adaptive Softmax** is a speedup technique for the computation of probability distributions over words. The adaptive softmax is inspired by the class-based hierarchical softmax, where the word classes are built to minimize the computation time. Adaptive softmax achieves efficiency by explicitly taking into account the computation time of matrix-multiplication on parallel systems and combining it with a few important observations, namely keeping a shortlist of frequent words in the root node\r\nand reducing the capacity of rare words." + } + } + video: { + video_id: "koj9BKiu1rU" + video_title: "PR-175: XLNet: Generalized Autoregressive Pretraining for Language Understanding" + number_of_likes: 55 + number_of_views: 2415 + published_date: { + seconds: 1561964703 + } + uploader: "박성남" + } + } +} +pr_id_to_video: { + key: 176 + value: { + pr_id: 176 + papers: { + paper_id: "combating-label-noise-in-deep-learning-using" + title: "Combating Label Noise in Deep Learning Using Abstention" + arxiv_id: "1905.10964" + abstract: "We introduce a novel method to combat label noise when training deep neural networks for classification. We propose a loss function that permits abstention during training thereby allowing the DNN to abstain on confusing samples while continuing to learn and improve classification performance on the non-abstained samples. We show how such a deep abstaining classifier (DAC) can be used for robust learning in the presence of different types of label noise. In the case of structured or systematic label noise -- where noisy training labels or confusing examples are correlated with underlying features of the data-- training with abstention enables representation learning for features that are associated with unreliable labels. In the case of unstructured (arbitrary) label noise, abstention during training enables the DAC to be used as an effective data cleaner by identifying samples that are likely to have label noise. We provide analytical results on the loss function behavior that enable dynamic adaption of abstention rates based on learning progress during training. We demonstrate the utility of the deep abstaining classifier for various image classification tasks under different types of label noise; in the case of arbitrary label noise, we show significant improvements over previously published results on multiple image benchmarks. Source code is available at https://github.com/thulas/dac-label-noise" + pub_date: { + seconds: 1558915200 + } + authors: "Sunil Thulasidasan" + authors: "Tanmoy Bhattacharya" + authors: "Jeff Bilmes" + authors: "Gopinath Chennupati" + authors: "Jamal Mohd-Yusof" + repositories: { + url: "https://github.com/eabarnes1010/controlled_abstention_networks" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Neural network loss functions for regression and classification tasks that can say \"I don't know\"." + } + repositories: { + is_official: true + url: "https://github.com/thulas/dac-label-noise" + framework: FRAMEWORK_PYTORCH + number_of_stars: 45 + description: "Label de-noising for deep learning" + } + } + video: { + video_id: "ihbEF6WGlrA" + video_title: "PR-176: Combating Label Noise in Deep Learning using Abstention" + number_of_likes: 18 + number_of_views: 902 + published_date: { + seconds: 1562509657 + } + uploader: "Doyup Lee" + } + } +} +pr_id_to_video: { + key: 177 + value: { + pr_id: 177 + papers: { + paper_id: "framing-u-net-via-deep-convolutional" + title: "Framing U-Net via Deep Convolutional Framelets: Application to Sparse-view CT" + arxiv_id: "1708.08333" + abstract: "X-ray computed tomography (CT) using sparse projection views is a recent\napproach to reduce the radiation dose. However, due to the insufficient\nprojection views, an analytic reconstruction approach using the filtered back\nprojection (FBP) produces severe streaking artifacts. Recently, deep learning\napproaches using large receptive field neural networks such as U-Net have\ndemonstrated impressive performance for sparse- view CT reconstruction.\nHowever, theoretical justification is still lacking. Inspired by the recent\ntheory of deep convolutional framelets, the main goal of this paper is,\ntherefore, to reveal the limitation of U-Net and propose new multi-resolution\ndeep learning schemes. In particular, we show that the alternative U- Net\nvariants such as dual frame and the tight frame U-Nets satisfy the so-called\nframe condition which make them better for effective recovery of high frequency\nedges in sparse view- CT. Using extensive experiments with real patient data\nset, we demonstrate that the new network architectures provide better\nreconstruction performance." + pub_date: { + seconds: 1503878400 + } + authors: "Yoseob Han" + authors: "Jong Chul Ye" + repositories: { + url: "https://github.com/hjahan58/framing-u-net" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/jongcye/FramingUNet" + framework: FRAMEWORK_OTHERS + description: "improving U-Net using Frame Theory: Dual-Frame and Tight-Frame U-Nets" + } + repositories: { + is_official: true + url: "https://github.com/hanyoseob/framing-u-net" + framework: FRAMEWORK_OTHERS + number_of_stars: 15 + description: "Deep Convolutional Framelets: A General Deep Learning Framework for Inverse Problems" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Max Pooling" + full_name: "Max Pooling" + description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + methods: { + name: "U-Net" + full_name: "U-Net" + description: "**U-Net** is an architecture for semantic segmentation. It consists of a contracting path and an expansive path. The contracting path follows the typical architecture of a convolutional network. It consists of the repeated application of two 3x3 convolutions (unpadded convolutions), each followed by a rectified linear unit (ReLU) and a 2x2 max pooling operation with stride 2 for downsampling. At each downsampling step we double the number of feature channels. Every step in the expansive path consists of an upsampling of the feature map followed by a 2x2 convolution (“up-convolution”) that halves the number of feature channels, a concatenation with the correspondingly cropped feature map from the contracting path, and two 3x3 convolutions, each followed by a ReLU. The cropping is necessary due to the loss of border pixels in every convolution. At the final layer a 1x1 convolution is used to map each 64-component feature vector to the desired number of classes. In total the network has 23 convolutional layers." + } + methods: { + name: "Concatenated Skip Connection" + full_name: "Concatenated Skip Connection" + description: "A **Concatenated Skip Connection** is a type of skip connection that seeks to reuse features by concatenating them to new layers, allowing more information to be retained from previous layers of the network. This contrasts with say, residual connections, where element-wise summation is used instead to incorporate information from previous layers. This type of skip connection is prominently used in DenseNets (and also Inception networks), which the Figure to the right illustrates." + } + } + video: { + video_id: "KSJcQlEKI0Q" + video_title: "PR-177: Framing U-Net via Deep Convolutional Framelets" + number_of_views: 584 + published_date: { + seconds: 1562511247 + } + uploader: "Byung-Hak Kim" + } + } +} +pr_id_to_video: { + key: 178 + value: { + pr_id: 178 + papers: { + paper_id: "semi-supervised-classification-with-graph" + title: "Semi-Supervised Classification with Graph Convolutional Networks" + arxiv_id: "1609.02907" + abstract: "We present a scalable approach for semi-supervised learning on\ngraph-structured data that is based on an efficient variant of convolutional\nneural networks which operate directly on graphs. We motivate the choice of our\nconvolutional architecture via a localized first-order approximation of\nspectral graph convolutions. Our model scales linearly in the number of graph\nedges and learns hidden layer representations that encode both local graph\nstructure and features of nodes. In a number of experiments on citation\nnetworks and on a knowledge graph dataset we demonstrate that our approach\noutperforms related methods by a significant margin." + pub_date: { + seconds: 1473379200 + } + authors: "Thomas N. Kipf" + authors: "Max Welling" + repositories: { + url: "https://github.com/andrejmiscic/gcn-pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Implementation of the Graph Convolutional Networks in Pytorch" + } + repositories: { + url: "https://github.com/switiz/gnn-gcn-gat" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "example of gnns" + } + repositories: { + url: "https://github.com/hazdzz/GCN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "The PyTorch version of GCN implemented by the paper." + } + repositories: { + url: "https://github.com/LouisDumont/GCN---re-implementation" + framework: FRAMEWORK_PYTORCH + description: "A re-implementation of the Graph Neural Networks described in https://arxiv.org/abs/1609.02907" + } + repositories: { + url: "https://github.com/thanhtrunghuynh93/pygcn" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/lipingcoding/pygcn" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/ChengSashankh/gcn-graph-classification" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/LeeWooJung/GCN_reproduce" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "Reproduce GCN in pytorch" + } + repositories: { + url: "https://github.com/dtriepke/Graph_Convolutional_Network" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + } + repositories: { + url: "https://github.com/Anieca/GCN" + framework: FRAMEWORK_PYTORCH + } + methods: { + name: "Graph Convolutional Networks" + full_name: "Graph Convolutional Networks" + description: "A Graph Convolutional Network, or GCN, is an approach for semi-supervised learning on graph-structured data. It is based on an efficient variant of convolutional neural networks which operate directly on graphs.\r\n\r\nImage source: [Semi-Supervised Classification with Graph Convolutional Networks](https://arxiv.org/pdf/1609.02907v4.pdf)" + } + methods: { + name: "GCN" + full_name: "Graph Convolutional Network" + description: "A **Graph Convolutional Network**, or **GCN**, is an approach for semi-supervised learning on graph-structured data. It is based on an efficient variant of [convolutional neural networks](https://paperswithcode.com/methods/category/convolutional-neural-networks) which operate directly on graphs. The choice of convolutional architecture is motivated via a localized first-order approximation of spectral graph convolutions. The model scales linearly in the number of graph edges and learns hidden layer representations that encode both local graph structure and features of nodes." + } + } + video: { + video_id: "uqBsvoOY8jM" + video_title: "PR-178: Graph Convolutional Network" + number_of_likes: 95 + number_of_views: 6071 + published_date: { + seconds: 1563112484 + } + uploader: "Taeoh Kim" + } + } +} +pr_id_to_video: { + key: 179 + value: { + pr_id: 179 + papers: { + paper_id: "m3d-gan-multi-modal-multi-domain-translation" + title: "M3D-GAN: Multi-Modal Multi-Domain Translation with Universal Attention" + arxiv_id: "1907.04378" + abstract: "Generative adversarial networks have led to significant advances in cross-modal/domain translation. However, typically these networks are designed for a specific task (e.g., dialogue generation or image synthesis, but not both). We present a unified model, M3D-GAN, that can translate across a wide range of modalities (e.g., text, image, and speech) and domains (e.g., attributes in images or emotions in speech). Our model consists of modality subnets that convert data from different modalities into unified representations, and a unified computing body where data from different modalities share the same network architecture. We introduce a universal attention module that is jointly trained with the whole network and learns to encode a large range of domain information into a highly structured latent space. We use this to control synthesis in novel ways, such as producing diverse realistic pictures from a sketch or varying the emotion of synthesized speech. We evaluate our approach on extensive benchmark tasks, including image-to-image, text-to-image, image captioning, text-to-speech, speech recognition, and machine translation. Our results show state-of-the-art performance on some of the tasks." + pub_date: { + seconds: 1562630400 + } + authors: "Shuang Ma" + authors: "Daniel McDuff" + authors: "Yale Song" + } + video: { + video_id: "CpRGaFPIZnw" + video_title: "PR-179: M3D-GAN: Multi-Modal Multi-Domain Translation with Universal Attention" + number_of_likes: 8 + number_of_views: 558 + published_date: { + seconds: 1563115146 + } + uploader: "taesu" + } + } +} +pr_id_to_video: { + key: 180 + value: { + pr_id: 180 + papers: { + paper_id: "the-lottery-ticket-hypothesis-finding-sparse" + title: "The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks" + arxiv_id: "1803.03635" + abstract: "Neural network pruning techniques can reduce the parameter counts of trained\nnetworks by over 90%, decreasing storage requirements and improving\ncomputational performance of inference without compromising accuracy. However,\ncontemporary experience is that the sparse architectures produced by pruning\nare difficult to train from the start, which would similarly improve training\nperformance.\n We find that a standard pruning technique naturally uncovers subnetworks\nwhose initializations made them capable of training effectively. Based on these\nresults, we articulate the \"lottery ticket hypothesis:\" dense,\nrandomly-initialized, feed-forward networks contain subnetworks (\"winning\ntickets\") that - when trained in isolation - reach test accuracy comparable to\nthe original network in a similar number of iterations. The winning tickets we\nfind have won the initialization lottery: their connections have initial\nweights that make training particularly effective.\n We present an algorithm to identify winning tickets and a series of\nexperiments that support the lottery ticket hypothesis and the importance of\nthese fortuitous initializations. We consistently find winning tickets that are\nless than 10-20% of the size of several fully-connected and convolutional\nfeed-forward architectures for MNIST and CIFAR10. Above this size, the winning\ntickets that we find learn faster than the original network and reach higher\ntest accuracy." + pub_date: { + seconds: 1520553600 + } + authors: "Jonathan Frankle" + authors: "Michael Carbin" + repositories: { + url: "https://github.com/phiandark/SiftingFeatures" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Code for the paper \"Sifting out the features by pruning: Are convolutional networks the winning lottery ticket of fully connected ones?\"" + } + repositories: { + url: "https://github.com/hdo0947/Lottery-Ticket-Hypothesis" + framework: FRAMEWORK_PYTORCH + description: "Project with Jack Weitze" + } + repositories: { + url: "https://github.com/JingtongSu/sanity-checking-pruning" + framework: FRAMEWORK_PYTORCH + number_of_stars: 21 + description: "Code for Sanity-Checking Pruning Methods: Random Tickets can Win the Jackpot" + } + repositories: { + url: "https://github.com/ARMargolis/melanoma-pytorch" + framework: FRAMEWORK_PYTORCH + description: "Development of a PyTorch model for Kaggle melanoma competition" + } + repositories: { + url: "https://github.com/zhangtj1996/lottery-ticket-hypothesis-Mxnet" + framework: FRAMEWORK_OTHERS + number_of_stars: 3 + description: "A reimplementation of \"The Lottery Ticket Hypothesis\" (Frankle and Carbin) by Mxnet for FC network." + } + repositories: { + url: "https://github.com/Taoudi/LotteryTicketHypothesis" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + description: "Kevin Ammouri and Youssef Taoudi" + } + repositories: { + url: "https://github.com/COMP6248-Reproducability-Challenge/REPRODUCIBILITY-REPORT-THE-LOTTERY-TICKET-HYPOTHESIS" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/Theys96/lottery-ticket-hypothesis" + framework: FRAMEWORK_TENSORFLOW + description: "Experimentation setup for the \"Lottery Ticket\" hypothesis for neural networks." + } + repositories: { + url: "https://github.com/Happy-Virus-IkBeom/LTH_Tensorflow" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/kenichdietrich/LotteryTicketHypothesis" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Codes to perform LTH with Keras and Tensorflow" + } + } + video: { + video_id: "dkNmYu610r8" + video_title: "PR-180: The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks" + number_of_likes: 11 + number_of_views: 759 + published_date: { + seconds: 1564043119 + } + uploader: "Taekmin Kim" + } + } +} +pr_id_to_video: { + key: 181 + value: { + pr_id: 181 + papers: { + paper_id: "data-shapley-equitable-valuation-of-data-for" + title: "Data Shapley: Equitable Valuation of Data for Machine Learning" + arxiv_id: "1904.02868" + abstract: "As data becomes the fuel driving technological and economic growth, a fundamental challenge is how to quantify the value of data in algorithmic predictions and decisions. For example, in healthcare and consumer markets, it has been suggested that individuals should be compensated for the data that they generate, but it is not clear what is an equitable valuation for individual data. In this work, we develop a principled framework to address data valuation in the context of supervised machine learning. Given a learning algorithm trained on $n$ data points to produce a predictor, we propose data Shapley as a metric to quantify the value of each training datum to the predictor performance. Data Shapley value uniquely satisfies several natural properties of equitable data valuation. We develop Monte Carlo and gradient-based methods to efficiently estimate data Shapley values in practical settings where complex learning algorithms, including neural networks, are trained on large datasets. In addition to being equitable, extensive experiments across biomedical, image and synthetic data demonstrate that data Shapley has several other benefits: 1) it is more powerful than the popular leave-one-out or leverage score in providing insight on what data is more valuable for a given learning task; 2) low Shapley value data effectively capture outliers and corruptions; 3) high Shapley value data inform what type of new data to acquire to improve the predictor." + pub_date: { + seconds: 1554422400 + } + authors: "Amirata Ghorbani" + authors: "James Zou" + repositories: { + url: "https://github.com/Weixin-Liang/HERALD" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "HERALD: An Annotation Efficient Method to Train User EngagementPredictors in Dialogs (ACL 2021)" + } + repositories: { + url: "https://github.com/Weixin-Liang/dialog_evaluation_CMADE" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "Beyond User Self-Reported Likert Scale Ratings: A Comparison Model for Automatic Dialog Evaluation (ACL 2020)" + } + repositories: { + is_official: true + url: "https://github.com/amiratag/DataShapley" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 132 + description: "Data Shapley: Equitable Valuation of Data for Machine Learning" + } + repositories: { + url: "https://github.com/GISH123/Cathay-Holdings-CIP-Projects-for-Interpretable-Machine-Learning" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "All my two month works for projects toward Interpretable Machine Learning for Cathay(國泰金控 數數發 資料科學研發科 Lab)" + } + } + video: { + video_id: "YdCXbBDuVuE" + video_title: "PR-181: Data Shapley: Equitable Valuation of Data for Machine Learning" + number_of_likes: 7 + number_of_views: 534 + published_date: { + seconds: 1563717023 + } + uploader: "Jinsung Yoon" + } + } +} +pr_id_to_video: { + key: 182 + value: { + pr_id: 182 + papers: { + paper_id: "ensemble-deep-learning-a-review" + title: "Ensemble deep learning: A review" + arxiv_id: "2104.02395" + abstract: "Ensemble learning combines several individual models to obtain better generalization performance. Currently, deep learning models with multilayer processing architecture is showing better performance as compared to the shallow or traditional classification models. Deep ensemble learning models combine the advantages of both the deep learning models as well as the ensemble learning such that the final model has better generalization performance. This paper reviews the state-of-art deep ensemble models and hence serves as an extensive summary for the researchers. The ensemble models are broadly categorised into ensemble models like bagging, boosting and stacking, negative correlation based deep ensemble models, explicit/implicit ensembles, homogeneous /heterogeneous ensemble, decision fusion strategies, unsupervised, semi-supervised, reinforcement learning and online/incremental, multilabel based deep ensemble models. Application of deep ensemble models in different domains is also briefly discussed. Finally, we conclude this paper with some future recommendations and research directions." + pub_date: { + seconds: 1617667200 + } + authors: "M. A. Ganaie" + authors: "Minghui Hu" + authors: "M. Tanveer*" + authors: "P. N. Suganthan*" + } + video: { + video_id: "twhZ3j_VCa0" + video_title: "PR-182: Deep Learning Ensemble Method" + number_of_likes: 15 + number_of_views: 775 + published_date: { + seconds: 1564898851 + } + uploader: "정지원" + } + } +} +pr_id_to_video: { + key: 183 + value: { + pr_id: 183 + papers: { + paper_id: "mixnet-mixed-depthwise-convolutional-kernels" + title: "MixConv: Mixed Depthwise Convolutional Kernels" + arxiv_id: "1907.09595" + abstract: "Depthwise convolution is becoming increasingly popular in modern efficient ConvNets, but its kernel size is often overlooked. In this paper, we systematically study the impact of different kernel sizes, and observe that combining the benefits of multiple kernel sizes can lead to better accuracy and efficiency. Based on this observation, we propose a new mixed depthwise convolution (MixConv), which naturally mixes up multiple kernel sizes in a single convolution. As a simple drop-in replacement of vanilla depthwise convolution, our MixConv improves the accuracy and efficiency for existing MobileNets on both ImageNet classification and COCO object detection. To demonstrate the effectiveness of MixConv, we integrate it into AutoML search space and develop a new family of models, named as MixNets, which outperform previous mobile models including MobileNetV2 [20] (ImageNet top-1 accuracy +4.2%), ShuffleNetV2 [16] (+3.5%), MnasNet [26] (+1.3%), ProxylessNAS [2] (+2.2%), and FBNet [27] (+2.0%). In particular, our MixNet-L achieves a new state-of-the-art 78.9% ImageNet top-1 accuracy under typical mobile settings (<600M FLOPS). Code is at https://github.com/ tensorflow/tpu/tree/master/models/official/mnasnet/mixnet" + pub_date: { + seconds: 1563753600 + } + authors: "Mingxing Tan" + authors: "Quoc V. Le" + repositories: { + url: "https://github.com/rwightman/pytorch-image-models" + framework: FRAMEWORK_PYTORCH + number_of_stars: 11097 + description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" + } + repositories: { + url: "https://github.com/osmr/imgclsmob" + framework: FRAMEWORK_OTHERS + number_of_stars: 2202 + description: "Sandbox for training deep learning networks" + } + repositories: { + url: "https://github.com/rwightman/efficientnet-jax" + framework: FRAMEWORK_OTHERS + number_of_stars: 65 + description: "EfficientNet, MobileNetV3, MobileNetV2, MixNet, etc in JAX w/ Flax Linen and Objax" + } + repositories: { + url: "https://github.com/chrisway613/MixConv" + framework: FRAMEWORK_PYTORCH + description: "Mixed Depth-Wise Convolution" + } + repositories: { + url: "https://github.com/neeraj-j/MixNet" + framework: FRAMEWORK_PYTORCH + description: "Pytorch implementation of MixNet" + } + repositories: { + is_official: true + url: "https://github.com/tensorflow/tpu" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4338 + description: "Reference models and tools for Cloud TPUs." + } + repositories: { + url: "https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet/mixnet" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4338 + description: "Reference models and tools for Cloud TPUs." + } + repositories: { + url: "https://github.com/zsef123/MixNet-PyTorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 7 + description: "A PyTorch implementation of MixNet: Mixed Depthwise Convolutional Kernels" + } + repositories: { + url: "https://github.com/JinLi711/Convolution_Variants" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 8 + description: "Reimplementing SOTA convolution variants with Tensorflow 2.0." + } + repositories: { + url: "https://github.com/JinLi711/Attention-Augmented-Convolution" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 8 + description: "Reimplementing SOTA convolution variants with Tensorflow 2.0." + } + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "MobileNetV1" + full_name: "MobileNetV1" + description: "**MobileNet** is a type of convolutional neural network designed for mobile and embedded vision applications. They are based on a streamlined architecture that uses depthwise separable convolutions to build lightweight deep neural networks that can have low latency for mobile and embedded devices." + } + methods: { + name: "Depthwise Separable Convolution" + full_name: "Depthwise Separable Convolution" + description: "While [standard convolution](https://paperswithcode.com/method/convolution) performs the channelwise and spatial-wise computation in one step, **Depthwise Separable Convolution** splits the computation into two steps: depthwise convolution applies a single convolutional filter per each input channel and pointwise convolution is used to create a linear combination of the output of the depthwise convolution. The comparison of standard convolution and depthwise separable convolution is shown to the right.\r\n\r\nCredit: [Depthwise Convolution Is All You Need for Learning Multiple Visual Domains](https://paperswithcode.com/paper/depthwise-convolution-is-all-you-need-for)" + } + methods: { + name: "MixConv" + full_name: "Mixed Depthwise Convolution" + description: "**MixConv**, or **Mixed Depthwise Convolution**, is a type of depthwise convolution that naturally mixes up multiple kernel sizes in a single convolution. It is based on the insight that depthwise convolution applies a single kernel size to all channels, which MixConv overcomes by combining the benefits of multiple kernel sizes. It does this by partitioning channels into groups and applying a different kernel size to each group." + } + methods: { + name: "Grouped Convolution" + full_name: "Grouped Convolution" + description: "A **Grouped Convolution** uses a group of convolutions - multiple kernels per layer - resulting in multiple channel outputs per layer. This leads to wider networks helping a network learn a varied set of low level and high level features. The original motivation of using Grouped Convolutions in [AlexNet](https://paperswithcode.com/method/alexnet) was to distribute the model over multiple GPUs as an engineering compromise. But later, with models such as [ResNeXt](https://paperswithcode.com/method/alexnet), it was shown this module could be used to improve classification accuracy. Specifically by exposing a new dimension through grouped convolutions, *cardinality* (the size of set of transformations), we can increase accuracy by increasing it." + } + methods: { + name: "MixNet" + full_name: "MixNet" + description: "**MixNet** is a type of convolutional neural network discovered via AutoML that utilises MixConvs instead of regular depthwise convolutions." + } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "Sigmoid Activation" + full_name: "Sigmoid Activation" + description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." + } + } + video: { + video_id: "252YxqpHzsg" + video_title: "PR-183: MixNet: Mixed Depthwise Convolutional Kernels" + number_of_likes: 28 + number_of_views: 1681 + published_date: { + seconds: 1564326548 + } + uploader: "JinWon Lee" + } + } +} +pr_id_to_video: { + key: 184 + value: { + pr_id: 184 + papers: { + paper_id: "and-the-bit-goes-down-revisiting-the" + title: "And the Bit Goes Down: Revisiting the Quantization of Neural Networks" + arxiv_id: "1907.05686" + abstract: "In this paper, we address the problem of reducing the memory footprint of convolutional network architectures. We introduce a vector quantization method that aims at preserving the quality of the reconstruction of the network outputs rather than its weights. The principle of our approach is that it minimizes the loss reconstruction error for in-domain inputs. Our method only requires a set of unlabelled data at quantization time and allows for efficient inference on CPU by using byte-aligned codebooks to store the compressed weights. We validate our approach by quantizing a high performing ResNet-50 model to a memory size of 5MB (20x compression factor) while preserving a top-1 accuracy of 76.1% on ImageNet object classification and by compressing a Mask R-CNN with a 26x factor." + pub_date: { + seconds: 1562889600 + } + authors: "Pierre Stock" + authors: "Armand Joulin" + authors: "Rémi Gribonval" + authors: "Benjamin Graham" + authors: "Hervé Jégou" + repositories: { + url: "https://github.com/huggingface/block_movement_pruning" + framework: FRAMEWORK_PYTORCH + number_of_stars: 23 + description: "Block Sparse movement pruning" + } + repositories: { + url: "https://github.com/uber-research/permute-quantize-finetune" + framework: FRAMEWORK_PYTORCH + number_of_stars: 89 + description: "Using ideas from product quantization for state-of-the-art neural network compression." + } + repositories: { + is_official: true + url: "https://github.com/facebookresearch/kill-the-bits" + framework: FRAMEWORK_PYTORCH + number_of_stars: 614 + description: "Code for: \"And the bit goes down: Revisiting the quantization of neural networks\"" + } + methods: { + name: "Mask R-CNN" + full_name: "Mask R-CNN" + description: "**Mask R-CNN** extends [Faster R-CNN](http://paperswithcode.com/method/faster-r-cnn) to solve instance segmentation tasks. It achieves this by adding a branch for predicting an object mask in parallel with the existing branch for bounding box recognition. In principle, Mask R-CNN is an intuitive extension of Faster R-CNN, but constructing the mask branch properly is critical for good results. \r\n\r\nMost importantly, Faster R-CNN was not designed for pixel-to-pixel alignment between network inputs and outputs. This is evident in how [RoIPool](http://paperswithcode.com/method/roi-pooling), the *de facto* core operation for attending to instances, performs coarse spatial quantization for feature extraction. To fix the misalignment, Mask R-CNN utilises a simple, quantization-free layer, called [RoIAlign](http://paperswithcode.com/method/roi-align), that faithfully preserves exact spatial locations. \r\n\r\nSecondly, Mask R-CNN *decouples* mask and class prediction: it predicts a binary mask for each class independently, without competition among classes, and relies on the network's RoI classification branch to predict the category. In contrast, an [FCN](http://paperswithcode.com/method/fcn) usually perform per-pixel multi-class categorization, which couples segmentation and classification." + } + methods: { + name: "RoIAlign" + full_name: "RoIAlign" + description: "**Region of Interest Align**, or **RoIAlign**, is an operation for extracting a small feature map from each RoI in detection and segmentation based tasks. It removes the harsh quantization of [RoI Pool](https://paperswithcode.com/method/roi-pooling), properly *aligning* the extracted features with the input. To avoid any quantization of the RoI boundaries or bins (using $x/16$ instead of $[x/16]$), RoIAlign uses bilinear interpolation to compute the exact values of the input features at four regularly sampled locations in each RoI bin, and the result is then aggregated (using max or average)." + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + } + video: { + video_id: "FPLvzxH8geY" + video_title: "PR-184: And the Bit Goes Down: Revisiting the Quantization of Neural Networks" + number_of_likes: 7 + number_of_views: 530 + published_date: { + seconds: 1564926928 + } + uploader: "Young Seok Kim" + } + } +} +pr_id_to_video: { + key: 185 + value: { + pr_id: 185 + papers: { + paper_id: "190500641" + title: "RetinaFace: Single-stage Dense Face Localisation in the Wild" + arxiv_id: "1905.00641" + abstract: "Face Analysis Project on MXNet" + pub_date: { + seconds: 1556755200 + } + authors: "Jiankang Deng" + authors: "Jia Guo" + authors: "Yuxiang Zhou" + authors: "Jinke Yu" + authors: "Irene Kotsia" + authors: "Stefanos Zafeiriou" + repositories: { + url: "https://github.com/jason90330/EdgeFinal" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/vladimirwest/insightface_cinematic" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } + repositories: { + url: "https://github.com/iChenning/facedetection" + framework: FRAMEWORK_PYTORCH + description: "face detection,based on retinaface" + } + repositories: { + url: "https://github.com/serengil/retinaface" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 64 + description: "RetinaFace: Deep Face Detection Library in TensorFlow for Python" + } + repositories: { + url: "https://github.com/serengil/deepface" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1696 + description: "A Lightweight Deep Face Recognition and Facial Attribute Analysis (Age, Gender, Emotion and Race) Framework for Python" + } + repositories: { + url: "https://github.com/nickuntitled/censorface-js" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Face Detection in Javascript by ONNX.js" + } + repositories: { + url: "https://github.com/prajinkhadka/face_det_check" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/Johnny952/retinaface_mod" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/SohamSarfare/ADS" + framework: FRAMEWORK_OTHERS + description: "Re-evaluating the results of the paper RetinaFace algorithm using original data along with the original WIDERFACE dataset. " + } + repositories: { + url: "https://github.com/bubbliiiing/retinaface-keras" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 40 + description: "这是一个retinaface-keras的源码,可以用于训练自己的模型。" + } + } + video: { + video_id: "DkcHEnxkXpM" + video_title: "PR-185: RetinaFace: Single-stage Dense Face Localisation in the Wild" + number_of_likes: 25 + number_of_views: 2397 + published_date: { + seconds: 1570081394 + } + uploader: "visionNoob" + } + } +} +pr_id_to_video: { + key: 186 + value: { + pr_id: 186 + papers: { + paper_id: "arbitrary-style-transfer-in-real-time-with" + title: "Arbitrary Style Transfer in Real-time with Adaptive Instance Normalization" + arxiv_id: "1703.06868" + abstract: "Gatys et al. recently introduced a neural algorithm that renders a content\nimage in the style of another image, achieving so-called style transfer.\nHowever, their framework requires a slow iterative optimization process, which\nlimits its practical application. Fast approximations with feed-forward neural\nnetworks have been proposed to speed up neural style transfer. Unfortunately,\nthe speed improvement comes at a cost: the network is usually tied to a fixed\nset of styles and cannot adapt to arbitrary new styles. In this paper, we\npresent a simple yet effective approach that for the first time enables\narbitrary style transfer in real-time. At the heart of our method is a novel\nadaptive instance normalization (AdaIN) layer that aligns the mean and variance\nof the content features with those of the style features. Our method achieves\nspeed comparable to the fastest existing approach, without the restriction to a\npre-defined set of styles. In addition, our approach allows flexible user\ncontrols such as content-style trade-off, style interpolation, color & spatial\ncontrols, all using a single feed-forward neural network." + pub_date: { + seconds: 1489968000 + } + authors: "Xun Huang" + authors: "Serge Belongie" + repositories: { + url: "https://github.com/KaiyangZhou/ssdg-benchmark" + framework: FRAMEWORK_PYTORCH + number_of_stars: 24 + description: "Benchmarks for semi-supervised domain generalization." + } + repositories: { + url: "https://github.com/KaiyangZhou/mixstyle-release" + framework: FRAMEWORK_PYTORCH + number_of_stars: 87 + description: "Domain Generalization with MixStyle. ICLR'21." + } + repositories: { + url: "https://github.com/PacktPublishing/Hands-On-Image-Generation-with-TensorFlow-2.0/tree/master/Chapter05" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 62 + description: "Hands-On Image Generation with TensorFlow 2.0, published by Packt" + } + repositories: { + url: "https://github.com/srihari-humbarwadi/adain-tensorflow2.x" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5 + description: "TensorFlow2.x implementation of Arbitrary Style Transfer in Real-time with Adaptive Instance Normalization" + } + repositories: { + url: "https://github.com/krisrjohnson/Realistic-Neural-Talking-Head-Models" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/JeongsolKim/BiS400_term_project" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/Jwrede/neural_style_transfer" + framework: FRAMEWORK_PYTORCH + description: "Pytorch implementation of the paper Arbitrary Style Transfer in Real-time with Adaptive Instance Normalization" + } + repositories: { + url: "https://github.com/times2049/talkinghead" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/ptran1203/style_transfer" + framework: FRAMEWORK_TENSORFLOW + description: "Arbitrary Style Transfer With Adaptive Instance Normalization" + } + repositories: { + url: "https://github.com/Yijunmaverick/UniversalStyleTransfer" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 521 + description: "The source code of NIPS17 'Universal Style Transfer via Feature Transforms'." + } + methods: { + name: "Style Transfer Module" + full_name: "Style Transfer Module" + description: "Modules used in GAN's style transfer." + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Adaptive Instance Normalization" + full_name: "Adaptive Instance Normalization" + description: "**Adaptive Instance Normalization** is a normalization method that aligns the mean and variance of the content features with those of the style features. \r\n\r\n[Instance Normalization](https://paperswithcode.com/method/instance-normalization) normalizes the input to a single style specified by the affine parameters. Adaptive Instance Normaliation is an extension. In AdaIN, we receive a content input $x$ and a style input $y$, and we simply align the channel-wise mean and variance of $x$ to match those of $y$. Unlike [Batch Normalization](https://paperswithcode.com/method/batch-normalization), Instance Normalization or Conditional Instance Normalization, AdaIN has no learnable affine parameters. Instead, it adaptively computes the affine parameters from the style input:\r\n\r\n$$\r\n\\textrm{AdaIN}(x, y)= \\sigma(y)\\left(\\frac{x-\\mu(x)}{\\sigma(x)}\\right)+\\mu(y)\r\n$$" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "VGG" + full_name: "VGG" + description: "**VGG** is a classical convolutional neural network architecture. It was based on an analysis of how to increase the depth of such networks. The network utilises small 3 x 3 filters. Otherwise the network is characterized by its simplicity: the only other components being pooling layers and a fully connected layer.\r\n\r\nImage: [Davi Frossard](https://www.cs.toronto.edu/frossard/post/vgg16/)" + } + methods: { + name: "Max Pooling" + full_name: "Max Pooling" + description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + methods: { + name: "Adam" + full_name: "Adam" + description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + } + methods: { + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + } + } + video: { + video_id: "16BGnsIyh6M" + video_title: "PR-186: Arbitrary Style Transfer in Real-time with Adaptive Instance Normalization" + number_of_likes: 23 + number_of_views: 821 + published_date: { + seconds: 1565608448 + } + uploader: "Soyeon Kim" + } + } +} +pr_id_to_video: { + key: 187 + value: { + pr_id: 187 + papers: { + paper_id: "morphnet-fast-simple-resource-constrained" + title: "MorphNet: Fast & Simple Resource-Constrained Structure Learning of Deep Networks" + arxiv_id: "1711.06798" + abstract: "We present MorphNet, an approach to automate the design of neural network\nstructures. MorphNet iteratively shrinks and expands a network, shrinking via a\nresource-weighted sparsifying regularizer on activations and expanding via a\nuniform multiplicative factor on all layers. In contrast to previous\napproaches, our method is scalable to large networks, adaptable to specific\nresource constraints (e.g. the number of floating-point operations per\ninference), and capable of increasing the network's performance. When applied\nto standard network architectures on a wide variety of datasets, our approach\ndiscovers novel structures in each domain, obtaining higher performance while\nrespecting the resource constraint." + pub_date: { + seconds: 1510963200 + } + authors: "Ariel Gordon" + authors: "Elad Eban" + authors: "Ofir Nachum" + authors: "Bo Chen" + authors: "Hao Wu" + authors: "Tien-Ju Yang" + authors: "Edward Choi" + repositories: { + url: "https://github.com/google-research/morph-net" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 961 + description: "Fast & Simple Resource-Constrained Learning of Deep Network Structure" + } + repositories: { + url: "https://github.com/tensorflow/models" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 70339 + description: "Models and examples built with TensorFlow" + } + repositories: { + url: "https://github.com/NatGr/Master_Thesis" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 10 + description: "Repo for my Master Thesis at ULiège in 2019 (Machine learning under resource constraints)" + } + } + video: { + video_id: "vUNAJsO2G98" + video_title: "PR-187 : MorphNet: Fast & Simple Resource-Constrained Structure Learning of Deep Networks" + number_of_likes: 4 + number_of_views: 507 + published_date: { + seconds: 1565712056 + } + uploader: "Sunghoon Joo" + } + } +} +pr_id_to_video: { + key: 188 + value: { + pr_id: 188 + papers: { + paper_id: "online-meta-learning" + title: "Online Meta-Learning" + arxiv_id: "1902.08438" + abstract: "A central capability of intelligent systems is the ability to continuously build upon previous experiences to speed up and enhance learning of new tasks. Two distinct research paradigms have studied this question. Meta-learning views this problem as learning a prior over model parameters that is amenable for fast adaptation on a new task, but typically assumes the set of tasks are available together as a batch. In contrast, online (regret based) learning considers a sequential setting in which problems are revealed one after the other, but conventionally train only a single model without any task-specific adaptation. This work introduces an online meta-learning setting, which merges ideas from both the aforementioned paradigms to better capture the spirit and practice of continual lifelong learning. We propose the follow the meta leader algorithm which extends the MAML algorithm to this setting. Theoretically, this work provides an $\\mathcal{O}(\\log T)$ regret guarantee with only one additional higher order smoothness assumption in comparison to the standard online setting. Our experimental evaluation on three different large-scale tasks suggest that the proposed algorithm significantly outperforms alternatives based on traditional online learning approaches." + pub_date: { + seconds: 1550793600 + } + authors: "Chelsea Finn" + authors: "Aravind Rajeswaran" + authors: "Sham Kakade" + authors: "Sergey Levine" + methods: { + name: "MAML" + full_name: "Model-Agnostic Meta-Learning" + description: "**MAML**, or **Model-Agnostic Meta-Learning**, is a model and task-agnostic algorithm for meta-learning that trains a model’s parameters such that a small number of gradient updates will lead to fast learning on a new task.\r\n\r\nConsider a model represented by a parametrized function $f\\_{\\theta}$ with parameters $\\theta$. When adapting to a new task $\\mathcal{T}\\_{i}$, the model’s parameters $\\theta$ become $\\theta'\\_{i}$. With MAML, the updated parameter vector $\\theta'\\_{i}$ is computed using one or more gradient descent updates on task $\\mathcal{T}\\_{i}$. For example, when using one gradient update,\r\n\r\n$$ \\theta'\\_{i} = \\theta - \\alpha\\nabla\\_{\\theta}\\mathcal{L}\\_{\\mathcal{T}\\_{i}}\\left(f\\_{\\theta}\\right) $$\r\n\r\nThe step size $\\alpha$ may be fixed as a hyperparameter or metalearned. The model parameters are trained by optimizing for the performance of $f\\_{\\theta'\\_{i}}$ with respect to $\\theta$ across tasks sampled from $p\\left(\\mathcal{T}\\_{i}\\right)$. More concretely the meta-objective is as follows:\r\n\r\n$$ \\min\\_{\\theta} \\sum\\_{\\mathcal{T}\\_{i} \\sim p\\left(\\mathcal{T}\\right)} \\mathcal{L}\\_{\\mathcal{T\\_{i}}}\\left(f\\_{\\theta'\\_{i}}\\right) = \\sum\\_{\\mathcal{T}\\_{i} \\sim p\\left(\\mathcal{T}\\right)} \\mathcal{L}\\_{\\mathcal{T\\_{i}}}\\left(f\\_{\\theta - \\alpha\\nabla\\_{\\theta}\\mathcal{L}\\_{\\mathcal{T}\\_{i}}\\left(f\\_{\\theta}\\right)}\\right) $$\r\n\r\nNote that the meta-optimization is performed over the model parameters $\\theta$, whereas the objective is computed using the updated model parameters $\\theta'$. In effect MAML aims to optimize the model parameters such that one or a small number of gradient steps on a new task will produce maximally effective behavior on that task. The meta-optimization across tasks is performed via stochastic gradient descent (SGD), such that the model parameters $\\theta$ are updated as follows:\r\n\r\n$$ \\theta \\leftarrow \\theta - \\beta\\nabla\\_{\\theta} \\sum\\_{\\mathcal{T}\\_{i} \\sim p\\left(\\mathcal{T}\\right)} \\mathcal{L}\\_{\\mathcal{T\\_{i}}}\\left(f\\_{\\theta'\\_{i}}\\right)$$\r\n\r\nwhere $\\beta$ is the meta step size." + } + } + video: { + video_id: "vUNAJsO2G98" + video_title: "PR-187 : MorphNet: Fast & Simple Resource-Constrained Structure Learning of Deep Networks" + number_of_likes: 4 + number_of_views: 507 + published_date: { + seconds: 1565712056 + } + uploader: "Sunghoon Joo" + } + } +} +pr_id_to_video: { + key: 189 + value: { + pr_id: 189 + papers: { + paper_id: "unsupervised-data-augmentation-1" + title: "Unsupervised Data Augmentation for Consistency Training" + arxiv_id: "1904.12848" + abstract: "Semi-supervised learning lately has shown much promise in improving deep learning models when labeled data is scarce. Common among recent approaches is the use of consistency training on a large amount of unlabeled data to constrain model predictions to be invariant to input noise. In this work, we present a new perspective on how to effectively noise unlabeled examples and argue that the quality of noising, specifically those produced by advanced data augmentation methods, plays a crucial role in semi-supervised learning. By substituting simple noising operations with advanced data augmentation methods such as RandAugment and back-translation, our method brings substantial improvements across six language and three vision tasks under the same consistency training framework. On the IMDb text classification dataset, with only 20 labeled examples, our method achieves an error rate of 4.20, outperforming the state-of-the-art model trained on 25,000 labeled examples. On a standard semi-supervised learning benchmark, CIFAR-10, our method outperforms all previous approaches and achieves an error rate of 5.43 with only 250 examples. Our method also combines well with transfer learning, e.g., when finetuning from BERT, and yields improvements in high-data regime, such as ImageNet, whether when there is only 10% labeled data or when a full labeled set with 1.3M extra unlabeled examples is used. Code is available at https://github.com/google-research/uda." + pub_date: { + seconds: 1556496000 + } + authors: "Qizhe Xie" + authors: "Zihang Dai" + authors: "Eduard Hovy" + authors: "Minh-Thang Luong" + authors: "Quoc V. Le" + repositories: { + url: "https://github.com/kekmodel/UDA-pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "An unofficial PyTorch implementation of Unsupervised Data Augmentation" + } + repositories: { + url: "https://github.com/A-Telfer/AugKey" + framework: FRAMEWORK_OTHERS + description: "RandAugment with Keypoints Annotation Support." + } + repositories: { + url: "https://github.com/rwbfd/OpenCompetitionV2" + framework: FRAMEWORK_PYTORCH + number_of_stars: 40 + description: "This is a collection of convenient methods for data science competition." + } + repositories: { + url: "https://github.com/joannayu25/NLP_Project_MIDS-W266" + framework: FRAMEWORK_TENSORFLOW + description: "Final Project for NLP class in UC Berkeley MIDS Program W266" + } + repositories: { + url: "https://github.com/leblancdaniel/paraphraser" + framework: FRAMEWORK_TENSORFLOW + description: "paraphrasing w/ unsupervised data augmentation (source: https://github.com/google-research/uda)" + } + repositories: { + is_official: true + url: "https://github.com/google-research/uda" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1725 + description: "Unsupervised Data Augmentation (UDA)" + } + repositories: { + url: "https://github.com/SanghunYun/UDA_pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 168 + description: "UDA(Unsupervised Data Augmentation) implemented by pytorch" + } + repositories: { + url: "https://github.com/bhacquin/UDA_pytorch" + framework: FRAMEWORK_PYTORCH + description: "Pytorch version of the algorithm described in Unsupervised Data Augmentation. " + } + repositories: { + url: "https://github.com/PhamNguyen97/TSA_pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 8 + description: "Training Signal Annealing" + } + repositories: { + url: "https://github.com/tomgoter/nlp_finalproject" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Repository for Final Project for W266: Natural Language Processing with Deep Learning" + } + methods: { + name: "Multi-Head Attention" + full_name: "Multi-Head Attention" + description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" + } + methods: { + name: "WordPiece" + full_name: "WordPiece" + description: "**WordPiece** is a subword segmentation algorithm used in natural language processing. The vocabulary is initialized with individual characters in the language, then the most frequent combinations of symbols in the vocabulary are iteratively added to the vocabulary. The process is:\r\n\r\n1. Initialize the word unit inventory with all the characters in the text.\r\n2. Build a language model on the training data using the inventory from 1.\r\n3. Generate a new word unit by combining two units out of the current word inventory to increment the word unit inventory by one. Choose the new word unit out of all the possible ones that increases the likelihood on the training data the most when added to the model.\r\n4. Goto 2 until a predefined limit of word units is reached or the likelihood increase falls below a certain threshold.\r\n\r\nText: [Source](https://stackoverflow.com/questions/55382596/how-is-wordpiece-tokenization-helpful-to-effectively-deal-with-rare-words-proble/55416944#55416944)\r\n\r\nImage: WordPiece as used in BERT" + } + methods: { + name: "Layer Normalization" + full_name: "Layer Normalization" + description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." + } + methods: { + name: "Residual Block" + full_name: "Residual Block" + description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." + } + methods: { + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + } + methods: { + name: "Adam" + full_name: "Adam" + description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + } + methods: { + name: "Weight Decay" + full_name: "Weight Decay" + description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + } + methods: { + name: "Kaiming Initialization" + full_name: "Kaiming Initialization" + description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." + } + methods: { + name: "Bottleneck Residual Block" + full_name: "Bottleneck Residual Block" + description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." + } + methods: { + name: "Scaled Dot-Product Attention" + full_name: "Scaled Dot-Product Attention" + description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." + } + } + video: { + video_id: "YiKn93Ud4dA" + video_title: "PR-189: Unsupervised Data Augmentation for Consistency Training" + number_of_likes: 17 + number_of_views: 1176 + published_date: { + seconds: 1566745737 + } + uploader: "박성남" + } + } +} +pr_id_to_video: { + key: 190 + value: { + pr_id: 190 + papers: { + paper_id: "a-baseline-for-detecting-misclassified-and" + title: "A Baseline for Detecting Misclassified and Out-of-Distribution Examples in Neural Networks" + arxiv_id: "1610.02136" + abstract: "We consider the two related problems of detecting if an example is\nmisclassified or out-of-distribution. We present a simple baseline that\nutilizes probabilities from softmax distributions. Correctly classified\nexamples tend to have greater maximum softmax probabilities than erroneously\nclassified and out-of-distribution examples, allowing for their detection. We\nassess performance by defining several tasks in computer vision, natural\nlanguage processing, and automatic speech recognition, showing the\neffectiveness of this baseline across all. We then show the baseline can\nsometimes be surpassed, demonstrating the room for future research on these\nunderexplored detection tasks." + pub_date: { + seconds: 1475798400 + } + authors: "Dan Hendrycks" + authors: "Kevin Gimpel" + repositories: { + url: "https://github.com/sooonwoo/RotNet-OOD" + framework: FRAMEWORK_PYTORCH + description: "Self-Supervised Learning for OOD Detection (NeurIPS 2019)" + } + repositories: { + is_official: true + url: "https://github.com/hendrycks/error-detection" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 132 + description: "A Baseline for Detecting Misclassified and Out-of-Distribution Examples in Neural Networks" + } + repositories: { + url: "https://github.com/dabsdamoon/MNIST-Auxiliary-Decoder" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "Implemented auxiliary decoder mentioned in the paper 'A Baseline for Detecting Misclassified and Out-of-Distribution Examples in Neural Networks' (https://arxiv.org/abs/1610.02136)" + } + repositories: { + url: "https://github.com/2sang/OOD-baseline" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 7 + description: "Reproducing experiment result of 'A Baseline for Detecting Misclassified and Out-of-Distribution Examples in Neural Networks', by Hendrycks et al.(2017)" + } + repositories: { + url: "https://github.com/omallo/kaggle-whale" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/guyAmit/GLOD" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: "Github for the conference paper GLOD-Gaussian Likelihood OOD detector" + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Residual Block" + full_name: "Residual Block" + description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." + } + methods: { + name: "Max Pooling" + full_name: "Max Pooling" + description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + methods: { + name: "Kaiming Initialization" + full_name: "Kaiming Initialization" + description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." + } + methods: { + name: "Batch Normalization" + full_name: "Batch Normalization" + description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." + } + methods: { + name: "Bottleneck Residual Block" + full_name: "Bottleneck Residual Block" + description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." + } + } + video: { + video_id: "xaABseUoHAI" + video_title: "PR-190: A Baseline For Detecting Misclassified and Out-of-Distribution Examples In Neural Networks" + number_of_likes: 10 + number_of_views: 1045 + published_date: { + seconds: 1569764236 + } + uploader: "MinGuk Kang" + } + } +} +pr_id_to_video: { + key: 191 + value: { + pr_id: 191 + papers: { + paper_id: "learning-adversarially-fair-and-transferable" + title: "Learning Adversarially Fair and Transferable Representations" + arxiv_id: "1802.06309" + abstract: "In this paper, we advocate for representation learning as the key to\nmitigating unfair prediction outcomes downstream. Motivated by a scenario where\nlearned representations are used by third parties with unknown objectives, we\npropose and explore adversarial representation learning as a natural method of\nensuring those parties act fairly. We connect group fairness (demographic\nparity, equalized odds, and equal opportunity) to different adversarial\nobjectives. Through worst-case theoretical guarantees and experimental\nvalidation, we show that the choice of this objective is crucial to fair\nprediction. Furthermore, we present the first in-depth experimental\ndemonstration of fair transfer learning and demonstrate empirically that our\nlearned representations admit fair predictions on new tasks while maintaining\nutility, an essential goal of fair representation learning." + pub_date: { + seconds: 1518825600 + } + authors: "David Madras" + authors: "Elliot Creager" + authors: "Toniann Pitassi" + authors: "Richard Zemel" + repositories: { + url: "https://github.com/rvr-account/rvr" + framework: FRAMEWORK_OTHERS + description: "Representation via Representations is a project aimed at improving transfer learning to out-of-distribution examples. Motivated by the challenge of finding robust biomedical predictors of disease, the model leverages data from heterogenous sources to discover feature representations that allow for accurate prediction outside of the training data." + } + repositories: { + is_official: true + url: "https://github.com/VectorInstitute/laftr" + framework: FRAMEWORK_OTHERS + number_of_stars: 33 + description: "Learning Adversarially Fair and Transferable Representations" + } + repositories: { + url: "https://github.com/ecreager/laftr" + framework: FRAMEWORK_OTHERS + number_of_stars: 33 + description: "Learning Adversarially Fair and Transferable Representations" + } + } + video: { + video_id: "cgolskL-_WM" + video_title: "PR-191: Learning Adversarially Fair and Transferable Representations" + number_of_views: 380 + published_date: { + seconds: 1567348971 + } + uploader: "Byung-Hak Kim" + } + } +} +pr_id_to_video: { + key: 192 + value: { + pr_id: 192 + papers: { + paper_id: "mocogan-decomposing-motion-and-content-for" + title: "MoCoGAN: Decomposing Motion and Content for Video Generation" + arxiv_id: "1707.04993" + abstract: "Visual signals in a video can be divided into content and motion. While\ncontent specifies which objects are in the video, motion describes their\ndynamics. Based on this prior, we propose the Motion and Content decomposed\nGenerative Adversarial Network (MoCoGAN) framework for video generation. The\nproposed framework generates a video by mapping a sequence of random vectors to\na sequence of video frames. Each random vector consists of a content part and a\nmotion part. While the content part is kept fixed, the motion part is realized\nas a stochastic process. To learn motion and content decomposition in an\nunsupervised manner, we introduce a novel adversarial learning scheme utilizing\nboth image and video discriminators. Extensive experimental results on several\nchallenging datasets with qualitative and quantitative comparison to the\nstate-of-the-art approaches, verify effectiveness of the proposed framework. In\naddition, we show that MoCoGAN allows one to generate videos with same content\nbut different motion as well as videos with different content and same motion." + pub_date: { + seconds: 1500249600 + } + authors: "Sergey Tulyakov" + authors: "Ming-Yu Liu" + authors: "Xiaodong Yang" + authors: "Jan Kautz" + repositories: { + url: "https://github.com/ubc-vision/DwNet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 19 + } + repositories: { + url: "https://github.com/DLHacks/mocogan" + framework: FRAMEWORK_PYTORCH + number_of_stars: 93 + description: "A pytorch implemention of MoCoGAN" + } + repositories: { + url: "https://github.com/vaibhavsingh9/MoCoGAN_implementation" + framework: FRAMEWORK_PYTORCH + description: "Learning motion GAN's for video based generations" + } + repositories: { + url: "https://github.com/HappyBahman/ldvdGAN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "ldvdGAN, Lower Dimensional Kernels for Video DiscriminatorsLdvdGAN" + } + repositories: { + is_official: true + url: "https://github.com/sergeytulyakov/mocogan" + framework: FRAMEWORK_PYTORCH + number_of_stars: 437 + description: "MoCoGAN: Decomposing Motion and Content for Video Generation" + } + repositories: { + url: "https://github.com/UBC-Computer-Vision-Group/DwNet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 19 + } + } + video: { + video_id: "9uNFtnRa_JU" + video_title: "PR-192: MoCoGAN: Decomposing Motion and Content for Video Generation" + number_of_likes: 9 + number_of_views: 1223 + published_date: { + seconds: 1568189938 + } + uploader: "Taeoh Kim" + } + } +} +pr_id_to_video: { + key: 193 + value: { + pr_id: 193 + papers: { + paper_id: "nisp-pruning-networks-using-neuron-importance" + title: "NISP: Pruning Networks using Neuron Importance Score Propagation" + arxiv_id: "1711.05908" + abstract: "To reduce the significant redundancy in deep Convolutional Neural Networks\n(CNNs), most existing methods prune neurons by only considering statistics of\nan individual layer or two consecutive layers (e.g., prune one layer to\nminimize the reconstruction error of the next layer), ignoring the effect of\nerror propagation in deep networks. In contrast, we argue that it is essential\nto prune neurons in the entire neuron network jointly based on a unified goal:\nminimizing the reconstruction error of important responses in the \"final\nresponse layer\" (FRL), which is the second-to-last layer before classification,\nfor a pruned network to retrain its predictive power. Specifically, we apply\nfeature ranking techniques to measure the importance of each neuron in the FRL,\nand formulate network pruning as a binary integer optimization problem and\nderive a closed-form solution to it for pruning neurons in earlier layers.\nBased on our theoretical analysis, we propose the Neuron Importance Score\nPropagation (NISP) algorithm to propagate the importance scores of final\nresponses to every neuron in the network. The CNN is pruned by removing neurons\nwith least importance, and then fine-tuned to retain its predictive power. NISP\nis evaluated on several datasets with multiple CNN models and demonstrated to\nachieve significant acceleration and compression with negligible accuracy loss." + pub_date: { + seconds: 1510790400 + } + authors: "Ruichi Yu" + authors: "Ang Li" + authors: "Chun-Fu Chen" + authors: "Jui-Hsin Lai" + authors: "Vlad I. Morariu" + authors: "Xintong Han" + authors: "Mingfei Gao" + authors: "Ching-Yung Lin" + authors: "Larry S. Davis" + } + video: { + video_id: "3KoqN_yYhmI" + video_title: "PR-193: NISP: Pruning Networks using Neural Importance Score Propagation" + number_of_likes: 10 + number_of_views: 566 + published_date: { + seconds: 1567953078 + } + uploader: "taesu" + } + } +} +pr_id_to_video: { + key: 194 + value: { + pr_id: 194 + papers: { + paper_id: "once-for-all-train-one-network-and-specialize" + title: "Once-for-All: Train One Network and Specialize it for Efficient Deployment" + arxiv_id: "1908.09791" + abstract: "We address the challenging problem of efficient inference across many devices and resource constraints, especially on edge devices. Conventional approaches either manually design or use neural architecture search (NAS) to find a specialized neural network and train it from scratch for each case, which is computationally prohibitive (causing $CO_2$ emission as much as 5 cars' lifetime) thus unscalable. In this work, we propose to train a once-for-all (OFA) network that supports diverse architectural settings by decoupling training and search, to reduce the cost. We can quickly get a specialized sub-network by selecting from the OFA network without additional training. To efficiently train OFA networks, we also propose a novel progressive shrinking algorithm, a generalized pruning method that reduces the model size across many more dimensions than pruning (depth, width, kernel size, and resolution). It can obtain a surprisingly large number of sub-networks ($> 10^{19}$) that can fit different hardware platforms and latency constraints while maintaining the same level of accuracy as training independently. On diverse edge devices, OFA consistently outperforms state-of-the-art (SOTA) NAS methods (up to 4.0% ImageNet top1 accuracy improvement over MobileNetV3, or same accuracy but 1.5x faster than MobileNetV3, 2.6x faster than EfficientNet w.r.t measured latency) while reducing many orders of magnitude GPU hours and $CO_2$ emission. In particular, OFA achieves a new SOTA 80.0% ImageNet top-1 accuracy under the mobile setting ($<$600M MACs). OFA is the winning solution for the 3rd Low Power Computer Vision Challenge (LPCVC), DSP classification track and the 4th LPCVC, both classification track and detection track. Code and 50 pre-trained models (for many devices & many latency constraints) are released at https://github.com/mit-han-lab/once-for-all." + pub_date: { + seconds: 1566777600 + } + authors: "Han Cai" + authors: "Chuang Gan" + authors: "Tianzhe Wang" + authors: "Zhekai Zhang" + authors: "Song Han" + repositories: { + url: "https://github.com/seulkiyeom/once-for-all" + framework: FRAMEWORK_PYTORCH + description: "Transformable NAS (based on OFA network)" + } + repositories: { + is_official: true + url: "https://github.com/mit-han-lab/once-for-all" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1249 + description: "[ICLR 2020] Once for All: Train One Network and Specialize it for Efficient Deployment" + } + repositories: { + url: "https://github.com/MIT-HAN-LAB/ProxylessNAS" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1240 + description: "[ICLR 2019] ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware" + } + repositories: { + url: "https://github.com/mit-han-lab/ProxylessNAS" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1240 + description: "[ICLR 2019] ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware" + } + repositories: { + url: "https://github.com/mit-han-lab/lpcvc" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 35 + description: "[LPIRC 2019, ICCV 2019] Winner Solution for 4th LPCVC" + } + } + video: {} + } +} +pr_id_to_video: { + key: 195 + value: { + pr_id: 195 + papers: { + paper_id: "mixmatch-a-holistic-approach-to-semi" + title: "MixMatch: A Holistic Approach to Semi-Supervised Learning" + arxiv_id: "1905.02249" + abstract: "Semi-supervised learning has proven to be a powerful paradigm for leveraging unlabeled data to mitigate the reliance on large labeled datasets. In this work, we unify the current dominant approaches for semi-supervised learning to produce a new algorithm, MixMatch, that works by guessing low-entropy labels for data-augmented unlabeled examples and mixing labeled and unlabeled data using MixUp. We show that MixMatch obtains state-of-the-art results by a large margin across many datasets and labeled data amounts. For example, on CIFAR-10 with 250 labels, we reduce error rate by a factor of 4 (from 38% to 11%) and by a factor of 2 on STL-10. We also demonstrate how MixMatch can help achieve a dramatically better accuracy-privacy trade-off for differential privacy. Finally, we perform an ablation study to tease apart which components of MixMatch are most important for its success." + pub_date: { + seconds: 1557100800 + } + authors: "David Berthelot" + authors: "Nicholas Carlini" + authors: "Ian Goodfellow" + authors: "Nicolas Papernot" + authors: "Avital Oliver" + authors: "Colin Raffel" + repositories: { + url: "https://github.com/google-research/crest" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 7 + description: "Repo for CReST: A Class-Rebalancing Self-Training Framework for Imbalanced Semi-Supervised Learning" + } + repositories: { + url: "https://github.com/narendoraiswamy/MixMatch-pytorch-demo" + framework: FRAMEWORK_PYTORCH + description: "The execution of tests for mixmatch." + } + repositories: { + url: "https://github.com/DonghwanKIM0101/CS492I_CV" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/ktran1/Manifold-attack" + framework: FRAMEWORK_PYTORCH + description: "This is an implementation of manifold attack" + } + repositories: { + url: "https://github.com/dhx000/DGM_project" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + } + repositories: { + url: "https://github.com/ms903-github/MixMatch-imdb" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/yuxi120407/semi-supervised_tensorflow2.0" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5 + description: "This is an Tensorflow implementation of semi-supervised learning with the following methods: Pseudo-label, Pi_model, VAT, mean_teacher, Mixup, ICT and Mixmatch." + } + repositories: { + url: "https://github.com/rit-git/Snippext_public" + framework: FRAMEWORK_PYTORCH + number_of_stars: 44 + description: "Snippext: Semi-supervised Opinion Mining with Augmented Data" + } + repositories: { + url: "https://github.com/ntozer/mixmatch-tensorflow2.0" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 11 + description: "Implementation of \"MixMatch: A Holistic Approach to Semi-Supervised Learning\" in TensorFlow 2.0" + } + repositories: { + url: "https://github.com/FelixAbrahamsson/mixmatch-pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 31 + description: "An implementation of MixMatch with PyTorch" + } + } + video: { + video_id: "ud863JQmUW0" + video_title: "PR-195: MixMatch: A Holistic Approach to Semi-Supervised Learning" + number_of_likes: 21 + number_of_views: 1420 + published_date: { + seconds: 1569160250 + } + uploader: "Jinsung Yoon" + } + } +} +pr_id_to_video: { + key: 196 + value: { + pr_id: 196 + papers: { + paper_id: "stand-alone-self-attention-in-vision-models" + title: "Stand-Alone Self-Attention in Vision Models" + arxiv_id: "1906.05909" + abstract: "Convolutions are a fundamental building block of modern computer vision systems. Recent approaches have argued for going beyond convolutions in order to capture long-range dependencies. These efforts focus on augmenting convolutional models with content-based interactions, such as self-attention and non-local means, to achieve gains on a number of vision tasks. The natural question that arises is whether attention can be a stand-alone primitive for vision models instead of serving as just an augmentation on top of convolutions. In developing and testing a pure self-attention vision model, we verify that self-attention can indeed be an effective stand-alone layer. A simple procedure of replacing all instances of spatial convolutions with a form of self-attention applied to ResNet model produces a fully self-attentional model that outperforms the baseline on ImageNet classification with 12% fewer FLOPS and 29% fewer parameters. On COCO object detection, a pure self-attention model matches the mAP of a baseline RetinaNet while having 39% fewer FLOPS and 34% fewer parameters. Detailed ablation studies demonstrate that self-attention is especially impactful when used in later layers. These results establish that stand-alone self-attention is an important addition to the vision practitioner's toolbox." + pub_date: { + seconds: 1560384000 + } + authors: "Prajit Ramachandran" + authors: "Niki Parmar" + authors: "Ashish Vaswani" + authors: "Irwan Bello" + authors: "Anselm Levskaya" + authors: "Jonathon Shlens" + repositories: { + url: "https://github.com/MartinGer/Stand-Alone-Self-Attention-in-Vision-Models" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Pytorch implementation of the paper Stand-Alone Self-Attention in Vision Models" + } + repositories: { + url: "https://github.com/MaheepChaudhary/Stand-Alone_Self-Attention" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + description: "Implemented the Stand-Alone Self-Attention research paper form scratch in Tensorflow" + } + repositories: { + url: "https://github.com/The-AI-Summer/self_attention" + framework: FRAMEWORK_PYTORCH + number_of_stars: 449 + description: "Implementation of various self-attention mechanisms focused on computer vision. Ongoing repository. " + } + repositories: { + url: "https://github.com/leaderj1001/Stand-Alone-Self-Attention" + framework: FRAMEWORK_PYTORCH + number_of_stars: 343 + description: "Implementing Stand-Alone Self-Attention in Vision Models using Pytorch" + } + repositories: { + is_official: true + url: "https://github.com/google-research/google-research" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 18081 + description: "Google Research" + } + repositories: { + url: "https://github.com/JoeRoussy/adaptive-attention-in-cv" + framework: FRAMEWORK_PYTORCH + number_of_stars: 24 + description: "Implementation for our paper exploring a novel 2D adaptive attention span kernel in computer vision." + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Residual Block" + full_name: "Residual Block" + description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." + } + methods: { + name: "Max Pooling" + full_name: "Max Pooling" + description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" + } + methods: { + name: "Focal Loss" + full_name: "Focal Loss" + description: "A **Focal Loss** function addresses class imbalance during training in tasks like object detection. Focal loss applies a modulating term to the cross entropy loss in order to focus learning on hard negative examples. It is a dynamically scaled cross entropy loss, where the scaling factor decays to zero as confidence in the correct class increases. Intuitively, this scaling factor can automatically down-weight the contribution of easy examples during training and rapidly focus the model on hard examples. \r\n\r\nFormally, the Focal Loss adds a factor $(1 - p\\_{t})^\\gamma$ to the standard cross entropy criterion. Setting $\\gamma>0$ reduces the relative loss for well-classified examples ($p\\_{t}>.5$), putting more focus on hard, misclassified examples. Here there is tunable *focusing* parameter $\\gamma \\ge 0$. \r\n\r\n$$ {\\text{FL}(p\\_{t}) = - (1 - p\\_{t})^\\gamma \\log\\left(p\\_{t}\\right)} $$" + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + } + methods: { + name: "RetinaNet" + full_name: "RetinaNet" + description: "**RetinaNet** is a one-stage object detection model that utilizes a focal loss function to address class imbalance during training. Focal loss applies a modulating term to the cross entropy loss in order to focus learning on hard negative examples. RetinaNet is a single, unified network composed of a *backbone* network and two task-specific *subnetworks*. The backbone is responsible for computing a convolutional feature map over an entire input image and is an off-the-self convolutional network. The first subnet performs convolutional object classification on the backbone's output; the second subnet performs convolutional bounding box regression. The two subnetworks feature a simple design that the authors propose specifically for one-stage, dense detection. \r\n\r\nWe can see the motivation for focal loss by comparing with two-stage object detectors. Here class imbalance is addressed by a two-stage cascade and sampling heuristics. The proposal stage (e.g., [Selective Search](https://paperswithcode.com/method/selective-search), [EdgeBoxes](https://paperswithcode.com/method/edgeboxes), [DeepMask](https://paperswithcode.com/method/deepmask), [RPN](https://paperswithcode.com/method/rpn)) rapidly narrows down the number of candidate object locations to a small number (e.g., 1-2k), filtering out most background samples. In the second classification stage, sampling heuristics, such as a fixed foreground-to-background ratio, or online hard example mining ([OHEM](https://paperswithcode.com/method/ohem)), are performed to maintain a\r\nmanageable balance between foreground and background.\r\n\r\nIn contrast, a one-stage detector must process a much larger set of candidate object locations regularly sampled across an image. To tackle this, RetinaNet uses a focal loss function, a dynamically scaled cross entropy loss, where the scaling factor decays to zero as confidence in the correct class increases. Intuitively, this scaling factor can automatically down-weight the contribution of easy examples during training and rapidly focus the model on hard examples. \r\n\r\nFormally, the Focal Loss adds a factor $(1 - p\\_{t})^\\gamma$ to the standard cross entropy criterion. Setting $\\gamma>0$ reduces the relative loss for well-classified examples ($p\\_{t}>.5$), putting more focus on hard, misclassified examples. Here there is tunable *focusing* parameter $\\gamma \\ge 0$. \r\n\r\n$$ {\\text{FL}(p\\_{t}) = - (1 - p\\_{t})^\\gamma \\log\\left(p\\_{t}\\right)} $$" + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + methods: { + name: "Kaiming Initialization" + full_name: "Kaiming Initialization" + description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." + } + methods: { + name: "Batch Normalization" + full_name: "Batch Normalization" + description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." + } + methods: { + name: "FPN" + full_name: "Feature Pyramid Network" + description: "A **Feature Pyramid Network**, or **FPN**, is a feature extractor that takes a single-scale image of an arbitrary size as input, and outputs proportionally sized feature maps at multiple levels, in a fully convolutional fashion. This process is independent of the backbone convolutional architectures. It therefore acts as a generic solution for building feature pyramids inside deep convolutional networks to be used in tasks like object detection.\r\n\r\nThe construction of the pyramid involves a bottom-up pathway and a top-down pathway.\r\n\r\nThe bottom-up pathway is the feedforward computation of the backbone ConvNet, which computes a feature hierarchy consisting of feature maps at several scales with a scaling step of 2. For the feature\r\npyramid, one pyramid level is defined for each stage. The output of the last layer of each stage is used as a reference set of feature maps. For [ResNets](https://paperswithcode.com/method/resnet) we use the feature activations output by each stage’s last residual block. \r\n\r\nThe top-down pathway hallucinates higher resolution features by upsampling spatially coarser, but semantically stronger, feature maps from higher pyramid levels. These features are then enhanced with features from the bottom-up pathway via lateral connections. Each lateral connection merges feature maps of the same spatial size from the bottom-up pathway and the top-down pathway. The bottom-up feature map is of lower-level semantics, but its activations are more accurately localized as it was subsampled fewer times." + } + } + video: { + video_id: "6hadVw4Sy2M" + video_title: "PR-196: Stand Alone Self Attention in Vision Models" + number_of_likes: 9 + number_of_views: 1559 + published_date: { + seconds: 1571072079 + } + uploader: "정지원" + } + } +} +pr_id_to_video: { + key: 197 + value: { + pr_id: 197 + papers: { + paper_id: "one-ticket-to-win-them-all-generalizing" + title: "One ticket to win them all: generalizing lottery ticket initializations across datasets and optimizers" + arxiv_id: "1906.02773" + abstract: "The success of lottery ticket initializations (Frankle and Carbin, 2019) suggests that small, sparsified networks can be trained so long as the network is initialized appropriately. Unfortunately, finding these \"winning ticket\" initializations is computationally expensive. One potential solution is to reuse the same winning tickets across a variety of datasets and optimizers. However, the generality of winning ticket initializations remains unclear. Here, we attempt to answer this question by generating winning tickets for one training configuration (optimizer and dataset) and evaluating their performance on another configuration. Perhaps surprisingly, we found that, within the natural images domain, winning ticket initializations generalized across a variety of datasets, including Fashion MNIST, SVHN, CIFAR-10/100, ImageNet, and Places365, often achieving performance close to that of winning tickets generated on the same dataset. Moreover, winning tickets generated using larger datasets consistently transferred better than those generated using smaller datasets. We also found that winning ticket initializations generalize across optimizers with high performance. These results suggest that winning ticket initializations generated by sufficiently large datasets contain inductive biases generic to neural networks more broadly which improve training across many settings and provide hope for the development of better initialization methods." + pub_date: { + seconds: 1559779200 + } + authors: "Ari S. Morcos" + authors: "Haonan Yu" + authors: "Michela Paganini" + authors: "Yuandong Tian" + repositories: { + url: "https://github.com/varungohil/Generalizing-Lottery-Tickets" + framework: FRAMEWORK_PYTORCH + number_of_stars: 42 + description: "This repository contains code to replicate the experiments given in NeurIPS 2019 paper \"One ticket to win them all: generalizing lottery ticket initializations across datasets and optimizers\"" + } + } + video: { + video_id: "YmTNpF2OOjA" + video_title: "PR-197: One ticket to win them all: generalizing lottery ticket initialization" + number_of_likes: 21 + number_of_views: 1014 + published_date: { + seconds: 1569769625 + } + uploader: "JinWon Lee" + } + } +} +pr_id_to_video: { + key: 198 + value: { + pr_id: 198 + papers: { + paper_id: "temporal-shift-module-for-efficient-video" + title: "TSM: Temporal Shift Module for Efficient Video Understanding" + arxiv_id: "1811.08383" + abstract: "The explosive growth in video streaming gives rise to challenges on performing video understanding at high accuracy and low computation cost. Conventional 2D CNNs are computationally cheap but cannot capture temporal relationships; 3D CNN based methods can achieve good performance but are computationally intensive, making it expensive to deploy. In this paper, we propose a generic and effective Temporal Shift Module (TSM) that enjoys both high efficiency and high performance. Specifically, it can achieve the performance of 3D CNN but maintain 2D CNN's complexity. TSM shifts part of the channels along the temporal dimension; thus facilitate information exchanged among neighboring frames. It can be inserted into 2D CNNs to achieve temporal modeling at zero computation and zero parameters. We also extended TSM to online setting, which enables real-time low-latency online video recognition and video object detection. TSM is accurate and efficient: it ranks the first place on the Something-Something leaderboard upon publication; on Jetson Nano and Galaxy Note8, it achieves a low latency of 13ms and 35ms for online video recognition. The code is available at: https://github.com/mit-han-lab/temporal-shift-module." + pub_date: { + seconds: 1542672000 + } + authors: "Ji Lin" + authors: "Chuang Gan" + authors: "Song Han" + repositories: { + url: "https://github.com/open-mmlab/mmaction2" + framework: FRAMEWORK_PYTORCH + number_of_stars: 939 + description: "OpenMMLab's Next Generation Video Understanding Toolbox and Benchmark" + } + repositories: { + url: "https://github.com/rijuldhir/TSM" + framework: FRAMEWORK_PYTORCH + } + repositories: { + is_official: true + url: "https://github.com/MIT-HAN-LAB/temporal-shift-module" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1384 + description: "[ICCV 2019] TSM: Temporal Shift Module for Efficient Video Understanding" + } + repositories: { + url: "https://github.com/WavesUR/embedded_TSM" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "cs231n project" + } + repositories: { + url: "https://github.com/PaParaZz1/TemporalShiftModule" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: "pytorch implementation for TemporalShiftModule" + } + repositories: { + url: "https://github.com/niveditarahurkar/CS231N-ActionRecognition" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Course Project for Stanford CS231n Convolutional Neural Networks for Visual Recognition" + } + } + video: {} + } +} +pr_id_to_video: { + key: 199 + value: { + pr_id: 199 + papers: { + paper_id: "sniper-efficient-multi-scale-training" + title: "SNIPER: Efficient Multi-Scale Training" + arxiv_id: "1805.09300" + abstract: "We present SNIPER, an algorithm for performing efficient multi-scale training\nin instance level visual recognition tasks. Instead of processing every pixel\nin an image pyramid, SNIPER processes context regions around ground-truth\ninstances (referred to as chips) at the appropriate scale. For background\nsampling, these context-regions are generated using proposals extracted from a\nregion proposal network trained with a short learning schedule. Hence, the\nnumber of chips generated per image during training adaptively changes based on\nthe scene complexity. SNIPER only processes 30% more pixels compared to the\ncommonly used single scale training at 800x1333 pixels on the COCO dataset.\nBut, it also observes samples from extreme resolutions of the image pyramid,\nlike 1400x2000 pixels. As SNIPER operates on resampled low resolution chips\n(512x512 pixels), it can have a batch size as large as 20 on a single GPU even\nwith a ResNet-101 backbone. Therefore it can benefit from batch-normalization\nduring training without the need for synchronizing batch-normalization\nstatistics across GPUs. SNIPER brings training of instance level recognition\ntasks like object detection closer to the protocol for image classification and\nsuggests that the commonly accepted guideline that it is important to train on\nhigh resolution images for instance level visual recognition tasks might not be\ncorrect. Our implementation based on Faster-RCNN with a ResNet-101 backbone\nobtains an mAP of 47.6% on the COCO dataset for bounding box detection and can\nprocess 5 images per second during inference with a single GPU. Code is\navailable at https://github.com/MahyarNajibi/SNIPER/." + pub_date: { + seconds: 1527033600 + } + authors: "Bharat Singh" + authors: "Mahyar Najibi" + authors: "Larry S. Davis" + repositories: { + url: "https://github.com/starimpact/arm_SNIPER" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 7 + description: "sniper version for arm tiny input and network training." + } + repositories: { + url: "https://github.com/Hwang64/PSIS" + framework: FRAMEWORK_OTHERS + number_of_stars: 75 + description: "Data Augmentation for Object Detection via Progressive and Selective Instance-Switching" + } + repositories: { + is_official: true + url: "https://github.com/MahyarNajibi/SNIPER" + framework: FRAMEWORK_OTHERS + number_of_stars: 2643 + description: "SNIPER / AutoFocus is an efficient multi-scale object detection training / inference algorithm" + } + methods: { + name: "ResNet" + full_name: "Residual Network" + description: "**Residual Networks**, or **ResNets**, learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. Instead of hoping each few stacked layers directly fit a desired underlying mapping, residual nets let these layers fit a residual mapping. They stack [residual blocks](https://paperswithcode.com/method/residual-block) ontop of each other to form network: e.g. a ResNet-50 has fifty layers using these blocks. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}(x)$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}(x):=\\mathcal{H}(x)-x$. The original mapping is recast into $\\mathcal{F}(x)+x$.\r\n\r\nThere is empirical evidence that these types of network are easier to optimize, and can gain accuracy from considerably increased depth." + } + methods: { + name: "RPN" + full_name: "Region Proposal Network" + description: "A **Region Proposal Network**, or **RPN**, is a fully convolutional network that simultaneously predicts object bounds and objectness scores at each position. The RPN is trained end-to-end to generate high-quality region proposals. RPN and algorithms like Fast R-CNN can be merged into a single network by sharing their convolutional features - using the recently popular terminology of neural networks with attention mechanisms, the RPN component tells the unified network where to look.\r\n\r\nRPNs are designed to efficiently predict region proposals with a wide range of scales and aspect ratios. RPNs use anchor boxes that serve as references at multiple scales and aspect ratios. The scheme can be thought of as a pyramid of regression references, which avoids enumerating images or filters of multiple scales or aspect ratios." + } + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "RoIPool" + full_name: "RoIPool" + description: "**Region of Interest Pooling**, or **RoIPool**, is an operation for extracting a small feature map (e.g., $7×7$) from each RoI in detection and segmentation based tasks. Features are extracted from each candidate box, and thereafter in models like Fast R-CNN, are then classified and bounding box regression performed.\r\n\r\nThe actual scaling to, e.g., $7×7$, occurs by dividing the region proposal into equally sized sections, finding the largest value in each section, and then copying these max values to the output buffer. In essence, **RoIPool** is max pooling on a discrete grid based on a box.\r\n\r\nImage Source: [Joyce Xu](https://towardsdatascience.com/deep-learning-for-object-detection-a-comprehensive-review-73930816d8d9)" + } + methods: { + name: "Faster R-CNN" + full_name: "Faster R-CNN" + description: "**Faster R-CNN** is an object detection model that improves on [Fast R-CNN](https://paperswithcode.com/method/fast-r-cnn) by utilising a region proposal network ([RPN](https://paperswithcode.com/method/rpn)) with the CNN model. The RPN shares full-image convolutional features with the detection network, enabling nearly cost-free region proposals. It is a fully convolutional network that simultaneously predicts object bounds and objectness scores at each position. The RPN is trained end-to-end to generate high-quality region proposals, which are used by [Fast R-CNN](https://paperswithcode.com/method/fast-r-cnn) for detection. RPN and Fast R-CNN are merged into a single network by sharing their convolutional features: the RPN component tells the unified network where to look.\r\n\r\nAs a whole, Faster R-CNN consists of two modules. The first module is a deep fully convolutional network that proposes regions, and the second module is the Fast R-CNN detector that uses the proposed regions." + } + methods: { + name: "SNIPER" + full_name: "SNIPER" + description: "**SNIPER** is a multi-scale training approach for instance-level recognition tasks like object detection and instance-level segmentation. Instead of processing all pixels in an image pyramid, SNIPER selectively processes context regions around the ground-truth objects (a.k.a chips). This can help to speed up multi-scale training as it operates on low-resolution chips. Due to its memory-efficient design, SNIPER can benefit from Batch Normalization during training and it makes larger batch-sizes possible for instance-level recognition tasks on a single GPU." + } + methods: { + name: "Bottleneck Residual Block" + full_name: "Bottleneck Residual Block" + description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." + } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + } + video: { + video_id: "EkndN7svgUk" + video_title: "PR-199: SNIPER:Efficient Multi Scale Training" + number_of_likes: 16 + number_of_views: 1384 + published_date: { + seconds: 1570377571 + } + uploader: "visionNoob" + } + } +} +pr_id_to_video: { + key: 200 + value: { + pr_id: 200 + papers: { + paper_id: "online-model-distillation-for-efficient-video" + title: "Online Model Distillation for Efficient Video Inference" + arxiv_id: "1812.02699" + abstract: "High-quality computer vision models typically address the problem of understanding the general distribution of real-world images. However, most cameras observe only a very small fraction of this distribution. This offers the possibility of achieving more efficient inference by specializing compact, low-cost models to the specific distribution of frames observed by a single camera. In this paper, we employ the technique of model distillation (supervising a low-cost student model using the output of a high-cost teacher) to specialize accurate, low-cost semantic segmentation models to a target video stream. Rather than learn a specialized student model on offline data from the video stream, we train the student in an online fashion on the live video, intermittently running the teacher to provide a target for learning. Online model distillation yields semantic segmentation models that closely approximate their Mask R-CNN teacher with 7 to 17$\\times$ lower inference runtime cost (11 to 26$\\times$ in FLOPs), even when the target video's distribution is non-stationary. Our method requires no offline pretraining on the target video stream, achieves higher accuracy and lower cost than solutions based on flow or video object segmentation, and can exhibit better temporal stability than the original teacher. We also provide a new video dataset for evaluating the efficiency of inference over long running video streams." + pub_date: { + seconds: 1544054400 + } + authors: "Ravi Teja Mullapudi" + authors: "Steven Chen" + authors: "Keyi Zhang" + authors: "Deva Ramanan" + authors: "Kayvon Fatahalian" + repositories: { + url: "https://github.com/josephch405/jit-masker" + framework: FRAMEWORK_PYTORCH + number_of_stars: 18 + } + methods: { + name: "Mask R-CNN" + full_name: "Mask R-CNN" + description: "**Mask R-CNN** extends [Faster R-CNN](http://paperswithcode.com/method/faster-r-cnn) to solve instance segmentation tasks. It achieves this by adding a branch for predicting an object mask in parallel with the existing branch for bounding box recognition. In principle, Mask R-CNN is an intuitive extension of Faster R-CNN, but constructing the mask branch properly is critical for good results. \r\n\r\nMost importantly, Faster R-CNN was not designed for pixel-to-pixel alignment between network inputs and outputs. This is evident in how [RoIPool](http://paperswithcode.com/method/roi-pooling), the *de facto* core operation for attending to instances, performs coarse spatial quantization for feature extraction. To fix the misalignment, Mask R-CNN utilises a simple, quantization-free layer, called [RoIAlign](http://paperswithcode.com/method/roi-align), that faithfully preserves exact spatial locations. \r\n\r\nSecondly, Mask R-CNN *decouples* mask and class prediction: it predicts a binary mask for each class independently, without competition among classes, and relies on the network's RoI classification branch to predict the category. In contrast, an [FCN](http://paperswithcode.com/method/fcn) usually perform per-pixel multi-class categorization, which couples segmentation and classification." + } + methods: { + name: "RoIAlign" + full_name: "RoIAlign" + description: "**Region of Interest Align**, or **RoIAlign**, is an operation for extracting a small feature map from each RoI in detection and segmentation based tasks. It removes the harsh quantization of [RoI Pool](https://paperswithcode.com/method/roi-pooling), properly *aligning* the extracted features with the input. To avoid any quantization of the RoI boundaries or bins (using $x/16$ instead of $[x/16]$), RoIAlign uses bilinear interpolation to compute the exact values of the input features at four regularly sampled locations in each RoI bin, and the result is then aggregated (using max or average)." + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + } + video: { + video_id: "BHEncY-f548" + video_title: "PR-200: Online Model Distillation for Efficient Video Inference" + number_of_likes: 16 + number_of_views: 714 + published_date: { + seconds: 1571035103 + } + uploader: "Soyeon Kim" + } + } +} +pr_id_to_video: { + key: 201 + value: { + pr_id: 201 + papers: { + paper_id: "bag-of-tricks-for-image-classification-with" + title: "Bag of Tricks for Image Classification with Convolutional Neural Networks" + arxiv_id: "1812.01187" + abstract: "Much of the recent progress made in image classification research can be\ncredited to training procedure refinements, such as changes in data\naugmentations and optimization methods. In the literature, however, most\nrefinements are either briefly mentioned as implementation details or only\nvisible in source code. In this paper, we will examine a collection of such\nrefinements and empirically evaluate their impact on the final model accuracy\nthrough ablation study. We will show that, by combining these refinements\ntogether, we are able to improve various CNN models significantly. For example,\nwe raise ResNet-50's top-1 validation accuracy from 75.3% to 79.29% on\nImageNet. We will also demonstrate that improvement on image classification\naccuracy leads to better transfer learning performance in other application\ndomains such as object detection and semantic segmentation." + pub_date: { + seconds: 1543881600 + } + authors: "Tong He" + authors: "Zhi Zhang" + authors: "Hang Zhang" + authors: "Zhongyue Zhang" + authors: "Junyuan Xie" + authors: "Mu Li" + repositories: { + url: "https://github.com/Tirth27/Skin-Cancer-Classification-using-Deep-Learning" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Classify Skin cancer from the skin lesion images using Image classification. The dataset for the project is obtained from the Kaggle SIIM-ISIC-Melanoma-Classification competition. " + } + repositories: { + url: "https://github.com/Media-Smart/vedaseg" + framework: FRAMEWORK_PYTORCH + number_of_stars: 382 + description: "A semantic segmentation toolbox based on PyTorch" + } + repositories: { + url: "https://github.com/seermer/TensorFlow2-EfficientNetV2" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 8 + description: "a TensorFlow2(keras model) implementation of EfficientNetV2" + } + repositories: { + url: "https://github.com/rwightman/pytorch-image-models" + framework: FRAMEWORK_PYTORCH + number_of_stars: 11097 + description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" + } + repositories: { + url: "https://github.com/qingyuanchen1997/Bag-of-Tricks" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "The reproduction of paper \"Bag of Tricks for Image Classification with Convolutional Neural Networks\" (based on Pyorch)" + } + repositories: { + url: "https://github.com/jameswang287/Car-Detection" + framework: FRAMEWORK_PYTORCH + description: "Using the Stanford cars dataset and PyTorch/Resnet-34 to predict a car's make and model." + } + repositories: { + is_official: true + url: "https://github.com/dmlc/gluon-cv" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4788 + description: "Gluon CV Toolkit" + } + repositories: { + url: "https://github.com/sherdencooper/tricks-in-deeplearning" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 18 + description: "Using different tricks to improve performance of resetnet. The final accuracy:95.21%" + } + repositories: { + url: "https://github.com/PaddlePaddle/models" + framework: FRAMEWORK_OTHERS + number_of_stars: 6002 + description: "Pre-trained and Reproduced Deep Learning Models (『飞桨』官方模型库,包含多种学术前沿和工业场景验证的深度学习模型)" + } + repositories: { + url: "https://github.com/Dmitrsl/Tools" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } + methods: { + name: "Nesterov Accelerated Gradient" + full_name: "Nesterov Accelerated Gradient" + description: "**Nesterov Accelerated Gradient** is a momentum-based SGD optimizer that \"looks ahead\" to where the parameters will be to calculate the gradient **ex post** rather than **ex ante**:\r\n\r\n$$ v\\_{t} = \\gamma{v}\\_{t-1} + \\eta\\nabla\\_{\\theta}J\\left(\\theta-\\gamma{v\\_{t-1}}\\right) $$\r\n$$\\theta\\_{t} = \\theta\\_{t-1} + v\\_{t}$$\r\n\r\nLike SGD with momentum $\\gamma$ is usually set to $0.9$.\r\n\r\nThe intuition is that the [standard momentum](https://paperswithcode.com/method/sgd-with-momentum) method first computes the gradient at the current location and then takes a big jump in the direction of the updated accumulated gradient. In contrast Nesterov momentum first makes a big jump in the direction of the previous accumulated gradient and then measures the gradient where it ends up and makes a correction. The idea being that it is better to correct a mistake after you have made it. \r\n\r\nImage Source: [Geoff Hinton lecture notes](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)" + } + methods: { + name: "Mixup" + full_name: "Mixup" + description: "**Mixup** is a data augmentation technique that that generates a weighted combinations of random image pairs from the training data. Given two images and their ground truth labels: $\\left(x\\_{i}, y\\_{i}\\right), \\left(x\\_{j}, y\\_{j}\\right)$, a synthetic training example $\\left(\\hat{x}, \\hat{y}\\right)$ is generated as:\r\n\r\n$$ \\hat{x} = \\lambda{x\\_{i}} + \\left(1 − \\lambda\\right){x\\_{j}} $$\r\n$$ \\hat{y} = \\lambda{y\\_{i}} + \\left(1 − \\lambda\\right){y\\_{j}} $$\r\n\r\nwhere $\\lambda \\sim \\text{Beta}\\left(\\alpha = 0.2\\right)$ is independently sampled for each augmented example." + } + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + } + methods: { + name: "Cosine Annealing" + full_name: "Cosine Annealing" + description: "**Cosine Annealing** is a type of learning rate schedule that has the effect of starting with a large learning rate that is relatively rapidly decreased to a minimum value before being increased rapidly again. The resetting of the learning rate acts like a simulated restart of the learning process and the re-use of good weights as the starting point of the restart is referred to as a \"warm restart\" in contrast to a \"cold restart\" where a new set of small random numbers may be used as a starting point.\r\n\r\n$$\\eta\\_{t} = \\eta\\_{min}^{i} + \\frac{1}{2}\\left(\\eta\\_{max}^{i}-\\eta\\_{min}^{i}\\right)\\left(1+\\cos\\left(\\frac{T\\_{cur}}{T\\_{i}}\\pi\\right)\\right)\r\n$$\r\n\r\nWhere where $\\eta\\_{min}^{i}$ and $ \\eta\\_{max}^{i}$ are ranges for the learning rate, and $T\\_{cur}$ account for how many epochs have been performed since the last restart.\r\n\r\nText Source: [Jason Brownlee](https://machinelearningmastery.com/snapshot-ensemble-deep-learning-neural-network/)\r\n\r\nImage Source: [Gao Huang](https://www.researchgate.net/figure/Training-loss-of-100-layer-DenseNet-on-CIFAR10-using-standard-learning-rate-blue-and-M_fig2_315765130)" + } + methods: { + name: "Random Horizontal Flip" + full_name: "Random Horizontal Flip" + description: "**RandomHorizontalFlip** is a type of image data augmentation which horizontally flips a given image with a given probability.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" + } + methods: { + name: "Label Smoothing" + full_name: "Label Smoothing" + description: "**Label Smoothing** is a regularization technique that introduces noise for the labels. This accounts for the fact that datasets may have mistakes in them, so maximizing the likelihood of $\\log{p}\\left(y\\mid{x}\\right)$ directly can be harmful. Assume for a small constant $\\epsilon$, the training set label $y$ is correct with probability $1-\\epsilon$ and incorrect otherwise. Label Smoothing regularizes a model based on a softmax with $k$ output values by replacing the hard $0$ and $1$ classification targets with targets of $\\frac{\\epsilon}{k-1}$ and $1-\\epsilon$ respectively.\r\n\r\nSource: Deep Learning, Goodfellow et al\r\n\r\nImage Source: [When Does Label Smoothing Help?](https://arxiv.org/abs/1906.02629)" + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "Bottleneck Residual Block" + full_name: "Bottleneck Residual Block" + description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." + } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "ResNet-D" + full_name: "ResNet-D" + description: "**ResNet-D** is a modification on the [ResNet](https://paperswithcode.com/method/resnet) architecture that utilises an average pooling tweak for downsampling. The motivation is that in the unmodified ResNet, the 1 × 1 convolution for the downsampling block ignores 3/4 of input feature maps, so this is modified so no information will be ignored" + } + } + video: { + video_id: "D-baIgejA4M" + video_title: "PR-201: Bag of Tricks for Image Classification with Convolutional Neural Networks" + number_of_likes: 47 + number_of_views: 8754 + published_date: { + seconds: 1571580127 + } + uploader: "Jiyang Kang" + } + } +} +pr_id_to_video: { + key: 202 + value: { + pr_id: 202 + papers: { + paper_id: "tafe-net-task-aware-feature-embeddings-for" + title: "Deep Mixture of Experts via Shallow Embedding" + arxiv_id: "1806.01531" + abstract: "Larger networks generally have greater representational power at the cost of\nincreased computational complexity. Sparsifying such networks has been an\nactive area of research but has been generally limited to static regularization\nor dynamic approaches using reinforcement learning. We explore a mixture of\nexperts (MoE) approach to deep dynamic routing, which activates certain experts\nin the network on a per-example basis. Our novel DeepMoE architecture increases\nthe representational power of standard convolutional networks by adaptively\nsparsifying and recalibrating channel-wise features in each convolutional\nlayer. We employ a multi-headed sparse gating network to determine the\nselection and scaling of channels for each input, leveraging exponential\ncombinations of experts within a single convolutional network. Our proposed\narchitecture is evaluated on four benchmark datasets and tasks, and we show\nthat Deep-MoEs are able to achieve higher accuracy with lower computation than\nstandard convolutional networks." + pub_date: { + seconds: 1528156800 + } + authors: "Xin Wang" + authors: "Fisher Yu" + authors: "Lisa Dunlap" + authors: "Yi-An Ma" + authors: "Ruth Wang" + authors: "Azalia Mirhoseini" + authors: "Trevor Darrell" + authors: "Joseph E. Gonzalez" + } + video: { + video_id: "iR7T3lH20gI" + video_title: "PR-202: Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts" + number_of_likes: 14 + number_of_views: 1045 + published_date: { + seconds: 1571582247 + } + uploader: "박성남" + } + } +} +pr_id_to_video: { + key: 203 + value: { + pr_id: 203 + papers: { + paper_id: "class-balanced-loss-based-on-effective-number" + title: "Class-Balanced Loss Based on Effective Number of Samples" + arxiv_id: "1901.05555" + abstract: "With the rapid increase of large-scale, real-world datasets, it becomes\ncritical to address the problem of long-tailed data distribution (i.e., a few\nclasses account for most of the data, while most classes are\nunder-represented). Existing solutions typically adopt class re-balancing\nstrategies such as re-sampling and re-weighting based on the number of\nobservations for each class. In this work, we argue that as the number of\nsamples increases, the additional benefit of a newly added data point will\ndiminish. We introduce a novel theoretical framework to measure data overlap by\nassociating with each sample a small neighboring region rather than a single\npoint. The effective number of samples is defined as the volume of samples and\ncan be calculated by a simple formula $(1-\\beta^{n})/(1-\\beta)$, where $n$ is\nthe number of samples and $\\beta \\in [0,1)$ is a hyperparameter. We design a\nre-weighting scheme that uses the effective number of samples for each class to\nre-balance the loss, thereby yielding a class-balanced loss. Comprehensive\nexperiments are conducted on artificially induced long-tailed CIFAR datasets\nand large-scale datasets including ImageNet and iNaturalist. Our results show\nthat when trained with the proposed class-balanced loss, the network is able to\nachieve significant performance gains on long-tailed datasets." + pub_date: { + seconds: 1547596800 + } + authors: "Yin Cui" + authors: "Menglin Jia" + authors: "Tsung-Yi Lin" + authors: "Yang Song" + authors: "Serge Belongie" + repositories: { + url: "https://github.com/tiagoCuervo/JapaNet" + framework: FRAMEWORK_TENSORFLOW + description: "Detection and classification of Kuzushiji characters for the Kuzushiji Recognition Kaggle challenge using CenterNet as detector and multiple classifiers" + } + repositories: { + is_official: true + url: "https://github.com/richardaecn/class-balanced-loss" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 447 + description: "Class-Balanced Loss Based on Effective Number of Samples. CVPR 2019" + } + repositories: { + url: "https://github.com/frgfm/Holocron" + framework: FRAMEWORK_PYTORCH + number_of_stars: 115 + description: "PyTorch implementations of recent Computer Vision tricks (ReXNet, RepVGG, Unet3p, YOLOv4, CIoU loss, AdaBelief)" + } + repositories: { + url: "https://github.com/vandit15/Class-balanced-loss-pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 510 + description: "Pytorch implementation of the paper \"Class-Balanced Loss Based on Effective Number of Samples\"" + } + repositories: { + url: "https://github.com/statsu1990/yoto_class_balanced_loss" + framework: FRAMEWORK_PYTORCH + number_of_stars: 8 + description: "Unofficial implementation of YOTO (You Only Train Once) applied to Class balanced loss" + } + repositories: { + url: "https://github.com/feidfoe/AdjustBnd4Imbalance" + framework: FRAMEWORK_PYTORCH + number_of_stars: 15 + description: "Adjust Decision Boundary for Class Imbalanced Learning" + } + } + video: { + video_id: "3hL0uVtJrXM" + video_title: "PR-203 : Class-Balanced Loss Based on Effective Number of Samples" + number_of_likes: 15 + number_of_views: 1271 + published_date: { + seconds: 1572183724 + } + uploader: "Sunghoon Joo" + } + } +} +pr_id_to_video: { + key: 204 + value: { + pr_id: 204 + papers: { + paper_id: "learning-deep-representations-by-mutual" + title: "Learning deep representations by mutual information estimation and maximization" + arxiv_id: "1808.06670" + abstract: "In this work, we perform unsupervised learning of representations by\nmaximizing mutual information between an input and the output of a deep neural\nnetwork encoder. Importantly, we show that structure matters: incorporating\nknowledge about locality of the input to the objective can greatly influence a\nrepresentation's suitability for downstream tasks. We further control\ncharacteristics of the representation by matching to a prior distribution\nadversarially. Our method, which we call Deep InfoMax (DIM), outperforms a\nnumber of popular unsupervised learning methods and competes with\nfully-supervised learning on several classification tasks. DIM opens new\navenues for unsupervised learning of representations and is an important step\ntowards flexible formulations of representation-learning objectives for\nspecific end-goals." + pub_date: { + seconds: 1534723200 + } + authors: "R Devon Hjelm" + authors: "Alex Fedorov" + authors: "Samuel Lavoie-Marchildon" + authors: "Karan Grewal" + authors: "Phil Bachman" + authors: "Adam Trischler" + authors: "Yoshua Bengio" + repositories: { + url: "https://github.com/jqhoogland/rgpy" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + description: "Renormalization Group techniques implemented in python with special emphasis on Machine Learning-inspired methods." + } + repositories: { + url: "https://github.com/jtlai0921/infomax" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/HolenYHR/Deepinfo_pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + description: "a pytorch implementation of deepinfo(Learning deep representations by mutual information estimation and maximization)" + } + repositories: { + url: "https://github.com/bojone/infomax" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 118 + description: "extract features by maximizing mutual information" + } + repositories: { + is_official: true + url: "https://github.com/rdevon/DIM" + framework: FRAMEWORK_PYTORCH + number_of_stars: 648 + description: "Deep InfoMax (DIM), or \"Learning Deep Representations by Mutual Information Estimation and Maximization\"" + } + repositories: { + url: "https://github.com/DuaneNielsen/DeepInfomaxPytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 233 + description: "Learning deep representations by mutual information estimation and maximization" + } + repositories: { + url: "https://github.com/createamind/DIM_Commented" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/ifding/simple-Infomax-pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "This is a simple pytorch implementation of Deep-INFOMAX" + } + repositories: { + url: "https://github.com/schzhu/learning-adversarially-robust-representations" + framework: FRAMEWORK_PYTORCH + number_of_stars: 12 + description: "Code for the paper: Learning Adversarially Robust Representations via Worst-Case Mutual Information Maximization (https://arxiv.org/abs/2002.11798)" + } + } + video: { + video_id: "YNicvevmByo" + video_title: "PR-204: Learning deep representations by mutual information estimation and maximization" + number_of_likes: 30 + number_of_views: 2475 + published_date: { + seconds: 1572789342 + } + uploader: "SeongOk Ryu" + } + } +} +pr_id_to_video: { + key: 205 + value: { + pr_id: 205 + papers: { + paper_id: "a-closer-look-at-few-shot-classification-1" + title: "A Closer Look at Few-shot Classification" + arxiv_id: "1904.04232" + abstract: "Few-shot classification aims to learn a classifier to recognize unseen classes during training with limited labeled examples. While significant progress has been made, the growing complexity of network designs, meta-learning algorithms, and differences in implementation details make a fair comparison difficult. In this paper, we present 1) a consistent comparative analysis of several representative few-shot classification algorithms, with results showing that deeper backbones significantly reduce the performance differences among methods on datasets with limited domain differences, 2) a modified baseline method that surprisingly achieves competitive performance when compared with the state-of-the-art on both the \\miniI and the CUB datasets, and 3) a new experimental setting for evaluating the cross-domain generalization ability for few-shot classification algorithms. Our results reveal that reducing intra-class variation is an important factor when the feature backbone is shallow, but not as critical when using deeper backbones. In a realistic cross-domain evaluation setting, we show that a baseline method with a standard fine-tuning practice compares favorably against other state-of-the-art few-shot learning algorithms." + pub_date: { + seconds: 1554681600 + } + authors: "Wei-Yu Chen" + authors: "Yen-Cheng Liu" + authors: "Zsolt Kira" + authors: "Yu-Chiang Frank Wang" + authors: "Jia-Bin Huang" + repositories: { + url: "https://github.com/mikehuisman/revisiting-learned-optimizers" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/yinboc/few-shot-meta-baseline" + framework: FRAMEWORK_PYTORCH + number_of_stars: 301 + description: "A New Meta-Baseline for Few-Shot Learning" + } + repositories: { + url: "https://github.com/cyvius96/few-shot-meta-baseline" + framework: FRAMEWORK_PYTORCH + number_of_stars: 301 + description: "A New Meta-Baseline for Few-Shot Learning" + } + repositories: { + is_official: true + url: "https://github.com/wyharveychen/CloserLookFewShot" + framework: FRAMEWORK_PYTORCH + number_of_stars: 837 + description: "source code to ICLR'19, 'A Closer Look at Few-shot Classification' " + } + } + video: { + video_id: "yyqZ1K5u2_8" + video_title: "PR-205: A Closer Look at Few Shot Classification" + number_of_likes: 26 + number_of_views: 2142 + published_date: { + seconds: 1573496397 + } + uploader: "Taeoh Kim" + } + } +} +pr_id_to_video: { + key: 206 + value: { + pr_id: 206 + papers: { + paper_id: "pointrcnn-3d-object-proposal-generation-and" + title: "PointRCNN: 3D Object Proposal Generation and Detection from Point Cloud" + arxiv_id: "1812.04244" + abstract: "In this paper, we propose PointRCNN for 3D object detection from raw point cloud. The whole framework is composed of two stages: stage-1 for the bottom-up 3D proposal generation and stage-2 for refining proposals in the canonical coordinates to obtain the final detection results. Instead of generating proposals from RGB image or projecting point cloud to bird's view or voxels as previous methods do, our stage-1 sub-network directly generates a small number of high-quality 3D proposals from point cloud in a bottom-up manner via segmenting the point cloud of the whole scene into foreground points and background. The stage-2 sub-network transforms the pooled points of each proposal to canonical coordinates to learn better local spatial features, which is combined with global semantic features of each point learned in stage-1 for accurate box refinement and confidence prediction. Extensive experiments on the 3D detection benchmark of KITTI dataset show that our proposed architecture outperforms state-of-the-art methods with remarkable margins by using only point cloud as input. The code is available at https://github.com/sshaoshuai/PointRCNN." + pub_date: { + seconds: 1544486400 + } + authors: "Shaoshuai Shi" + authors: "Xiaogang Wang" + authors: "Hongsheng Li" + repositories: { + url: "https://github.com/cxy1997/3D_adapt_auto_driving" + framework: FRAMEWORK_PYTORCH + number_of_stars: 51 + } + repositories: { + url: "https://github.com/direcf/pointrcnn_multiclass" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "PointRCNN_multiclass" + } + repositories: { + url: "https://github.com/jskim808/js_pointrcnn" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/carterprice2/Deep_Learning_project" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "Modified 3D object detectors [F-ConvNet and PointRCNN] for Car detection on Kitti dataset" + } + repositories: { + is_official: true + url: "https://github.com/sshaoshuai/PointRCNN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1260 + description: "PointRCNN: 3D Object Proposal Generation and Detection from Point Cloud, CVPR 2019." + } + repositories: { + url: "https://github.com/ModelBunker/PointRCNN-PyTorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: "PointRCNN: 3D Object Proposal Generation and Detection from Point Cloud" + } + repositories: { + url: "https://github.com/sshaoshuai/Pointnet2.PyTorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 288 + description: "A faster implementation of PointNet++ based on PyTorch." + } + repositories: { + url: "https://github.com/sshaoshuai/PointCloudDet3D" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1777 + description: "OpenPCDet Toolbox for LiDAR-based 3D Object Detection." + } + repositories: { + url: "https://github.com/open-mmlab/OpenPCDet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1777 + description: "OpenPCDet Toolbox for LiDAR-based 3D Object Detection." + } + } + video: { + video_id: "sFN_EgCsNzM" + video_title: "PR-206: PointRCNN: 3D Object Proposal Generation and Detection from Point Cloud" + number_of_likes: 38 + number_of_views: 2735 + published_date: { + seconds: 1573396201 + } + uploader: "Doyup Lee" + } + } +} +pr_id_to_video: { + key: 207 + value: { + pr_id: 207 + papers: { + paper_id: "yolov3-an-incremental-improvement" + title: "YOLOv3: An Incremental Improvement" + arxiv_id: "1804.02767" + abstract: "We present some updates to YOLO! We made a bunch of little design changes to\nmake it better. We also trained this new network that's pretty swell. It's a\nlittle bigger than last time but more accurate. It's still fast though, don't\nworry. At 320x320 YOLOv3 runs in 22 ms at 28.2 mAP, as accurate as SSD but\nthree times faster. When we look at the old .5 IOU mAP detection metric YOLOv3\nis quite good. It achieves 57.9 mAP@50 in 51 ms on a Titan X, compared to 57.5\nmAP@50 in 198 ms by RetinaNet, similar performance but 3.8x faster. As always,\nall the code is online at https://pjreddie.com/yolo/" + pub_date: { + seconds: 1523145600 + } + authors: "Joseph Redmon" + authors: "Ali Farhadi" + repositories: { + url: "https://github.com/DevBruce/YOLOv3-TF2" + framework: FRAMEWORK_TENSORFLOW + description: "YOLOv3 implementation with TensorFlow2" + } + repositories: { + url: "https://github.com/Qengineering/YoloV3-ncnn-Jetson-Nano" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "YoloV3 for Jetson Nano" + } + repositories: { + url: "https://github.com/CRIGIM/darknet" + framework: FRAMEWORK_TENSORFLOW + description: "edited darknet" + } + repositories: { + url: "https://github.com/zgcr/simpleAICV-pytorch-ImageNet-COCO-training" + framework: FRAMEWORK_PYTORCH + number_of_stars: 157 + description: "Training examples and results for ImageNet(ILSVRC2012)/COCO2017/VOC2007+VOC2012 datasets.Include ResNet/DarkNet/RegNet/RetinaNet/FCOS/CenterNet/YOLO series." + } + repositories: { + url: "https://github.com/fredotran/traffic-sign-detector-yolov4" + framework: FRAMEWORK_OTHERS + number_of_stars: 4 + description: "This repository contains my upgraded version of using YoloV4 with OpenCV DNN to detect 4 classes of traffic road signs : traffic lights, speed limit signs, crosswalk and stop signs. " + } + repositories: { + url: "https://github.com/thinhhoang95/helipad-yolo" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/ntcuong777/aicc-lightnet" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + } + repositories: { + url: "https://github.com/nilesh0109/PedestrianTracking" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "Yolo-v3 and SORT(kalman filter) based pedestrian detector and tracker" + } + repositories: { + url: "https://github.com/MINED30/Face_Mask_Detection_YOLO" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/albertsokol/yolov3-tf2" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "An implementation of YOLOv3 from scratch in Tensorflow 2.3 " + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + } + methods: { + name: "RetinaNet" + full_name: "RetinaNet" + description: "**RetinaNet** is a one-stage object detection model that utilizes a focal loss function to address class imbalance during training. Focal loss applies a modulating term to the cross entropy loss in order to focus learning on hard negative examples. RetinaNet is a single, unified network composed of a *backbone* network and two task-specific *subnetworks*. The backbone is responsible for computing a convolutional feature map over an entire input image and is an off-the-self convolutional network. The first subnet performs convolutional object classification on the backbone's output; the second subnet performs convolutional bounding box regression. The two subnetworks feature a simple design that the authors propose specifically for one-stage, dense detection. \r\n\r\nWe can see the motivation for focal loss by comparing with two-stage object detectors. Here class imbalance is addressed by a two-stage cascade and sampling heuristics. The proposal stage (e.g., [Selective Search](https://paperswithcode.com/method/selective-search), [EdgeBoxes](https://paperswithcode.com/method/edgeboxes), [DeepMask](https://paperswithcode.com/method/deepmask), [RPN](https://paperswithcode.com/method/rpn)) rapidly narrows down the number of candidate object locations to a small number (e.g., 1-2k), filtering out most background samples. In the second classification stage, sampling heuristics, such as a fixed foreground-to-background ratio, or online hard example mining ([OHEM](https://paperswithcode.com/method/ohem)), are performed to maintain a\r\nmanageable balance between foreground and background.\r\n\r\nIn contrast, a one-stage detector must process a much larger set of candidate object locations regularly sampled across an image. To tackle this, RetinaNet uses a focal loss function, a dynamically scaled cross entropy loss, where the scaling factor decays to zero as confidence in the correct class increases. Intuitively, this scaling factor can automatically down-weight the contribution of easy examples during training and rapidly focus the model on hard examples. \r\n\r\nFormally, the Focal Loss adds a factor $(1 - p\\_{t})^\\gamma$ to the standard cross entropy criterion. Setting $\\gamma>0$ reduces the relative loss for well-classified examples ($p\\_{t}>.5$), putting more focus on hard, misclassified examples. Here there is tunable *focusing* parameter $\\gamma \\ge 0$. \r\n\r\n$$ {\\text{FL}(p\\_{t}) = - (1 - p\\_{t})^\\gamma \\log\\left(p\\_{t}\\right)} $$" + } + methods: { + name: "YOLOv3" + full_name: "YOLOv3" + description: "**YOLOv3** is a real-time, single-stage object detection model that builds on [YOLOv2](https://paperswithcode.com/method/yolov2) with several improvements. Improvements include the use of a new backbone network, Darknet-53 that utilises residual connections, or in the words of the author, \"those newfangled residual network stuff\", as well as some improvements to the bounding box prediction step, and use of three different scales from which to extract features (similar to an FPN)." + } + methods: { + name: "Batch Normalization" + full_name: "Batch Normalization" + description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." + } + methods: { + name: "FPN" + full_name: "Feature Pyramid Network" + description: "A **Feature Pyramid Network**, or **FPN**, is a feature extractor that takes a single-scale image of an arbitrary size as input, and outputs proportionally sized feature maps at multiple levels, in a fully convolutional fashion. This process is independent of the backbone convolutional architectures. It therefore acts as a generic solution for building feature pyramids inside deep convolutional networks to be used in tasks like object detection.\r\n\r\nThe construction of the pyramid involves a bottom-up pathway and a top-down pathway.\r\n\r\nThe bottom-up pathway is the feedforward computation of the backbone ConvNet, which computes a feature hierarchy consisting of feature maps at several scales with a scaling step of 2. For the feature\r\npyramid, one pyramid level is defined for each stage. The output of the last layer of each stage is used as a reference set of feature maps. For [ResNets](https://paperswithcode.com/method/resnet) we use the feature activations output by each stage’s last residual block. \r\n\r\nThe top-down pathway hallucinates higher resolution features by upsampling spatially coarser, but semantically stronger, feature maps from higher pyramid levels. These features are then enhanced with features from the bottom-up pathway via lateral connections. Each lateral connection merges feature maps of the same spatial size from the bottom-up pathway and the top-down pathway. The bottom-up feature map is of lower-level semantics, but its activations are more accurately localized as it was subsampled fewer times." + } + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "Darknet-53" + full_name: "Darknet-53" + description: "**Darknet-53** is a convolutional neural network that acts as a backbone for the [YOLOv3](https://paperswithcode.com/method/yolov3) object detection approach. The improvements upon its predecessor [Darknet-19](https://paperswithcode.com/method/darknet-19) include the use of residual connections, as well as more layers." + } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "Fast-YOLOv3" + full_name: "Fast-YOLOv3" + } + } + video: { + video_id: "HMgcvgRrDcA" + video_title: "PR-207: YOLOv3: An Incremental Improvement" + number_of_likes: 116 + number_of_views: 7262 + published_date: { + seconds: 1574001134 + } + uploader: "JinWon Lee" } } }