reference.bib

%% This BibTeX bibliography file was created using BibDesk.
%% http://bibdesk.sourceforge.net/


%% Created for Renato Vimieiro at 2017-03-07 18:41:38 -0300 


%% Saved with string encoding Unicode (UTF-8) 


@misc{wall2014,
	Author = {Matthew Wall},
	Date-Added = {2017-03-07 21:40:00 +0000},
	Date-Modified = {2017-03-07 21:41:36 +0000},
	Howpublished = {http://www.bbc.com/news/business-26383058},
	Keywords = {Big Data},
	Month = {March},
	Title = {Big Data: Are you ready for blast-off?},
	Year = {2014}}

@incollection{ramakrishnan09,
	Author = {Naren Ramakrishnan and Mohammed Zaki},
	Booktitle = {Biological Data Mining},
	Chapter = {22},
	Date-Added = {2012-07-16 12:43:47 +1000},
	Date-Modified = {2012-07-16 12:52:27 +1000},
	Editor = {Jake Chen and Stefano Lonardi},
	Keywords = {redescription mining, minimal generators},
	Pages = {561--586},
	Publisher = {CRC Press},
	Series = {Chapman \& Hall/CRC Data Mining and Knowledge Discovery Series},
	Title = {{Redescription Mining and Applications in Bioinformatics}},
	Year = {2009}}

@phdthesis{kumar07,
	Author = {D Kumar},
	Date-Added = {2012-07-16 12:32:41 +1000},
	Date-Modified = {2012-07-16 12:33:57 +1000},
	Keywords = {redescription mining},
	School = {Virginia Tech},
	Title = {Redescription mining: algorithms and applications in {B}ioinformatics},
	Year = {2007}}

@inproceedings{zaki05,
	Acmid = {1081912},
	Address = {New York, NY, USA},
	Author = {Zaki, Mohammed J. and Ramakrishnan, Naren},
	Booktitle = {Proceedings of the Eleventh ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
	Date-Added = {2012-07-16 12:25:37 +1000},
	Date-Modified = {2012-07-16 12:27:33 +1000},
	Doi = {10.1145/1081870.1081912},
	Isbn = {1-59593-135-X},
	Keywords = {closed itemsets, data mining, minimal generators, redescription mining},
	Location = {Chicago, Illinois, USA},
	Numpages = {10},
	Pages = {364--373},
	Publisher = {ACM},
	Series = {KDD '05},
	Title = {Reasoning about sets using redescription mining},
	Url = {http://doi.acm.org/10.1145/1081870.1081912},
	Year = {2005},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1081870.1081912},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/1081870.1081912}}

@inproceedings{parida05,
	Acmid = {1619467},
	Author = {Parida, Laxmi and Ramakrishnan, Naren},
	Booktitle = {Proceedings of the 20th National Conference on Artificial Intelligence},
	Date-Added = {2012-07-16 12:23:44 +1000},
	Date-Modified = {2012-07-16 12:24:44 +1000},
	Isbn = {1-57735-236-x},
	Keywords = {redescription mining},
	Location = {Pittsburgh, Pennsylvania},
	Numpages = {8},
	Pages = {837--844},
	Publisher = {AAAI Press},
	Series = {AAAI'05},
	Title = {Redescription mining: structure theory and algorithms},
	Url = {http://dl.acm.org/citation.cfm?id=1619410.1619467},
	Volume = {2},
	Year = {2005},
	Bdsk-Url-1 = {http://dl.acm.org/citation.cfm?id=1619410.1619467}}

@article{webb07,
	Abstract = {Pattern discovery techniques, such as association rule discovery, explore large search spaces of potential patterns to find those that satisfy some user-specified constraints. Due to the large number of patterns considered, they suffer from an extreme risk of type-1 error, that is, of finding patterns that appear due to chance alone to satisfy the constraints on the sample data. This paper proposes techniques to overcome this problem by applying well-established statistical practices. These allow the user to enforce a strict upper limit on the risk of experimentwise error. Empirical studies demonstrate that standard pattern discovery techniques can discover numerous spurious patterns when applied to random data and when applied to real-world data result in large numbers of patterns that are rejected when subjected to sound statistical evaluation. They also reveal that a number of pragmatic choices about how such tests are performed can greatly affect their power.},
	Affiliation = {Monash University Faculty of Information Technology PO Box 75 Clayton Vic. 3800 Australia},
	Author = {Webb, Geoffrey},
	Date-Added = {2012-07-12 11:24:46 +1000},
	Date-Modified = {2012-07-12 12:02:31 +1000},
	Issn = {0885-6125},
	Issue = {1},
	Journal = {Machine Learning},
	Keyword = {Computer Science},
	Keywords = {interestingness measure, frequent itemset mining, association rules},
	Pages = {1-33},
	Publisher = {Springer Netherlands},
	Title = {Discovering Significant Patterns},
	Url = {http://dx.doi.org/10.1007/s10994-007-5006-x},
	Volume = {68},
	Year = {2007},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/s10994-007-5006-x}}

@inproceedings{hamalainen10,
	Abstract = {Statistical dependency analysis is the basis of all empirical science. A commonly occurring problem is to find the most significant dependency rules, which describe either positive or negative dependencies between categorical attributes. For example, in medical science one is interested in genetic factors, which can either predispose or prevent diseases. The requirement of statistical significance is essential, because the discoveries should hold also in the future data. Typically, the significance is estimated either by Fisher's exact test or the #x03C7;2-measure. The problem is computationally very difficult, because the number of all possible dependency rules increases exponentially with the number of attributes. As a solution, different kinds of restrictions and heuristics have been applied, but a general, scalable search method has been missing. In this paper, we introduce an efficient algorithm for searching for the top-K globally optimal dependency rules using Fisher's exact test as a measure function. The rules can express either positive or negative dependencies between a set of positive attributes and a single consequent attribute. The algorithm is based on an application of the branch- and-bound search strategy, supplemented by several pruning properties. Especially, we prove a new lower-bound for the Fisher's p, and introduce a new effective pruning principle. The general search algorithm is applicable to other goodness measures, like the #x03C7;2-measure, as well. According to our experiments on classical benchmark data, the algorithm is well scalable and can efficiently handle even dense and high dimensional data sets. In addition, the quality of rules is significantly better than with the #x03C7;2-measure using the same search algorithm.},
	Author = {H\"am\"al\"ainen, Wilhelmiina},
	Booktitle = {Proceedings of the IEEE 10th International Conference on Data Mining (ICDM 2010)},
	Date-Added = {2012-07-12 11:18:51 +1000},
	Date-Modified = {2012-07-12 11:21:00 +1000},
	Doi = {10.1109/ICDM.2010.143},
	Issn = {1550-4786},
	Keywords = {Fisher exact test; data mining; p-value; frequent pattern mining, interestingness measure},
	Pages = {196 -205},
	Title = {Efficient Discovery of the Top-K Optimal Dependency Rules with Fisher's Exact Test of Significance},
	Year = {2010},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/ICDM.2010.143}}

@article{hamalainen12,
	Abstract = {Statistical dependency analysis is the basis of all empirical science. A commonly occurring problem is to find the most significant dependency rules, which describe either positive or negative dependencies between categorical attributes. In medical science, for example, one is interested in genetic factors, which can either predispose or prevent diseases. The requirement of statistical significance is essential, because the discoveries should hold also in future data. Typically, the significance is estimated either by Fisher's exact test or the χ 2 -measure. The problem is computationally very difficult, because the number of all possible dependency rules increases exponentially with the number of attributes. As a solution, different kinds of restrictions and heuristics have been applied, but a general, scalable search method has been missing. In this paper, we introduce an efficient algorithm, called Kingfisher, for searching for the best non-redundant dependency rules with statistical significance measures. The rules can express either positive or negative dependencies between a set of positive attributes and a single consequent attribute. The algorithm itself is independent from the used goodness measure, but we concentrate on Fisher's exact test and the χ 2 -measure. The algorithm is based on an application of the branch-and-bound search strategy, supplemented by several pruning properties. Especially, we prove a new lower bound for Fisher's p and introduce a new effective pruning principle. According to our experiments on classical benchmark data, the algorithm is well scalable and can efficiently handle even dense and high-dimensional data sets. An interesting observation was that Fisher's exact test did not only produce more reliable rules than the χ 2 -measure, but it also performed the search much faster.},
	Affiliation = {Department of Biosciences, University of Eastern Finland, Joensuu, Finland},
	Author = {H\"am\"al\"ainen, Wilhelmiina},
	Date-Added = {2012-07-12 11:15:10 +1000},
	Date-Modified = {2012-07-12 11:55:08 +1000},
	Issn = {0219-1377},
	Issue = {2},
	Journal = {Knowledge and Information Systems},
	Keyword = {Computer Science},
	Keywords = {frequent itemset mining, p-value, interestingness measure},
	Pages = {383-414},
	Publisher = {Springer London},
	Title = {Kingfisher: an efficient algorithm for searching for both positive and negative dependency rules with statistical significance measures},
	Url = {http://dx.doi.org/10.1007/s10115-011-0432-2},
	Volume = {32},
	Year = {2012},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/s10115-011-0432-2}}

@inproceedings{tan02,
	Acmid = {775053},
	Address = {New York, NY, USA},
	Author = {Tan, Pang-Ning and Kumar, Vipin and Srivastava, Jaideep},
	Booktitle = {Proceedings of the Eighth ACM SIGKDD International Conference on Knowledge Discovery and Data M},
	Date-Added = {2012-07-12 10:58:36 +1000},
	Date-Modified = {2012-07-12 10:59:53 +1000},
	Doi = {10.1145/775047.775053},
	Isbn = {1-58113-567-X},
	Keywords = {association rules, interestingness measure, frequent itemset mining},
	Location = {Edmonton, Alberta, Canada},
	Numpages = {10},
	Pages = {32--41},
	Publisher = {ACM},
	Series = {KDD '02},
	Title = {Selecting the right interestingness measure for association patterns},
	Url = {http://doi.acm.org/10.1145/775047.775053},
	Year = {2002},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/775047.775053},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/775047.775053}}

@inproceedings{li08,
	Address = {New York, NY, USA},
	Author = {Li, Haoyuan and Wang, Yi and Zhang, Dong and Zhang, Ming and Chang, Edward Y.},
	Booktitle = {Proceedings of the 2008 ACM Conference on Recommender Systems},
	Date-Added = {2012-07-12 10:20:50 +1000},
	Date-Modified = {2012-07-12 10:22:02 +1000},
	Doi = {10.1145/1454008.1454027},
	Isbn = {978-1-60558-093-7},
	Keywords = {data mining, frequent itemset mining, parallel frequent pattern mining},
	Location = {Lausanne, Switzerland},
	Numpages = {8},
	Pages = {107--114},
	Publisher = {ACM},
	Series = {RecSys '08},
	Title = {Pfp: parallel fp-growth for query recommendation},
	Url = {http://doi.acm.org/10.1145/1454008.1454027},
	Year = {2008},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1454008.1454027},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/1454008.1454027}}

@article{agarwal01,
	Abstract = {In this paper we propose algorithms for generation of frequent item sets by successive construction of the nodes of a lexicographic tree of item sets. We discuss different strategies in generation and traversal of the lexicographic tree such as breadth-first search, depth-first search, or a combination of the two. These techniques provide different trade-offs in terms of the I/O, memory, and computational time requirements. We use the hierarchical structure of the lexicographic tree to successively project transactions at each node of the lexicographic tree and use matrix counting on this reduced set of transactions for finding frequent item sets. We tested our algorithm on both real and synthetic data. We provide an implementation of the tree projection method which is up to one order of magnitude faster than other recent techniques in the literature. The algorithm has a well-structured data access pattern which provides data locality and reuse of data for multiple levels of the cache. We also discuss methods for parallelization of the TreeProjection algorithm.},
	Author = {Ramesh C. Agarwal and Charu C. Aggarwal and V.V.V. Prasad},
	Date-Added = {2012-07-11 19:07:55 +1000},
	Date-Modified = {2012-07-11 19:08:29 +1000},
	Doi = {10.1006/jpdc.2000.1693},
	Issn = {0743-7315},
	Journal = {Journal of Parallel and Distributed Computing},
	Keywords = {frequent itemset mining, data mining},
	Number = {3},
	Pages = {350 - 371},
	Title = {A Tree Projection Algorithm for Generation of Frequent Item Sets},
	Url = {http://www.sciencedirect.com/science/article/pii/S0743731500916939},
	Volume = {61},
	Year = {2001},
	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/pii/S0743731500916939},
	Bdsk-Url-2 = {http://dx.doi.org/10.1006/jpdc.2000.1693}}

@inproceedings{orlando02,
	Abstract = { The performance of an algorithm that mines frequent sets from transactional databases may severely depend on the specific features of the data being analyzed. Moreover, some architectural characteristics of the computational platform used - e.g. the available main memory - can dramatically change its runtime behavior. In this paper we present DCI (Direct Count Intersect), an efficient algorithm for discovering frequent sets from large databases. Due to the multiple heuristics strategies adopted, DCI can adapt its behavior not only to the features of the specific computing platform, but also to the features of the dataset being mined, so that it results very effective in mining both short and long patterns from sparse and dense datasets. Finally we also discuss the parallelization strategies adopted in the design of ParDCI, a distributed and multi-threaded implementation of DCI.},
	Author = {Orlando, S. and Palmerini, P. and Perego, R. and Silvestri, F.},
	Booktitle = {Proceedings of the IEEE International Conference on Data Mining (ICDM 2002)},
	Date-Added = {2012-07-11 19:00:00 +1000},
	Date-Modified = {2012-07-11 19:03:47 +1000},
	Doi = {10.1109/ICDM.2002.1183921},
	Keywords = {frequent sets; data mining; frequent itemset mining},
	Pages = {338 - 345},
	Title = {Adaptive and resource-aware mining of frequent sets},
	Year = {2002},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/ICDM.2002.1183921}}

@inproceedings{teodoro10,
	Abstract = {Frequent itemset mining (FIM) is a core operation for several data mining applications as association rules computation, correlations, document classification, and many others, which has been extensively studied over the last decades. Moreover, databases are becoming increasingly larger, thus requiring a higher computing power to mine them in reasonable time. At the same time, the advances in high performance computing platforms are transforming them into hierarchical parallel environments equipped with multi-core processors and many-core accelerators, such as GPUs. Thus, fully exploiting these systems to perform FIM tasks poses as a challenging and critical problem that we address in this paper. We present efficient multi-core and GPU accelerated parallelizations of the Tree Projection, one of the most competitive FIM algorithms. The experimental results show that our Tree Projection implementation scales almost linearly in a CPU shared-memory environment after careful optimizations, while the GPU versions are up to 173 times faster than standard the CPU version.},
	Author = {Teodoro, G. and Mariano, N. and Meira, W. and Ferreira, R.},
	Booktitle = {22nd International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD)},
	Date-Added = {2012-07-11 18:50:26 +1000},
	Date-Modified = {2012-07-12 12:01:52 +1000},
	Doi = {10.1109/SBAC-PAD.2010.15},
	Issn = {1550-6533},
	Keywords = {association rules; frequent itemset mining; GPU;data mining},
	Pages = {47 -54},
	Title = {Tree Projection-Based Frequent Itemset Mining on Multicore CPUs and GPUs},
	Year = {2010},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/SBAC-PAD.2010.15}}

@inproceedings{silvestri12,
	Address = {Los Alamitos, CA, USA},
	Author = {Claudio Silvestri and Salvatore Orlando},
	Booktitle = {Euromicro Conference on Parallel, Distributed, and Network-Based Processing},
	Date-Added = {2012-07-11 18:36:54 +1000},
	Date-Modified = {2012-07-11 18:38:59 +1000},
	Doi = {http://doi.ieeecomputersociety.org/10.1109/PDP.2012.94},
	Issn = {1066-6192},
	Keywords = {GPU, closed itemset mining, frequent closed pattern mining, frequent itemset mining},
	Pages = {416-425},
	Publisher = {IEEE Computer Society},
	Title = {{gpuDCI: Exploiting GPUs in Frequent Itemset Mining}},
	Year = {2012},
	Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/PDP.2012.94}}

@techreport{fang08,
	Author = {Wenbin Fang and Ka Keung Lau and Mian Lu and Xiangye Xiao and Chi Kit Lam and Philip Yang Yang and Bingsheng He and Qiong Luo and Pedro V. Sander and Ke Yang},
	Date-Added = {2012-07-11 18:24:03 +1000},
	Date-Modified = {2012-07-11 18:27:32 +1000},
	Institution = {Hong Kong University of Science and Technology},
	Keywords = {GPU, frequent itemset mining, frequent pattern mining},
	Number = {HKUST-CS08-07},
	Title = {Parallel data mining on graphics processors},
	Year = {2008}}

@inproceedings{fang09,
	Address = {New York, NY, USA},
	Author = {Fang, Wenbin and Lu, Mian and Xiao, Xiangye and He, Bingsheng and Luo, Qiong},
	Booktitle = {Proceedings of the Fifth International Workshop on Data Management on New Hardware},
	Date-Added = {2012-07-11 18:21:01 +1000},
	Date-Modified = {2012-07-11 18:22:31 +1000},
	Doi = {10.1145/1565694.1565702},
	Isbn = {978-1-60558-701-1},
	Keywords = {GPU, frequent itemset mining, frequent pattern mining},
	Location = {Providence, Rhode Island},
	Numpages = {9},
	Pages = {34--42},
	Publisher = {ACM},
	Series = {DaMoN '09},
	Title = {Frequent itemset mining on graphics processors},
	Url = {http://doi.acm.org/10.1145/1565694.1565702},
	Year = {2009},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1565694.1565702},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/1565694.1565702}}

@article{berretta2007,
	Author = {Berretta, R. and Mendes, A. and Moscato, P.},
	Date-Added = {2012-06-12 15:07:07 +1000},
	Date-Modified = {2012-06-12 15:09:11 +1000},
	Journal = {Journal of Research and Practice in Information Technology},
	Keywords = {abk feature selection},
	Number = {4},
	Pages = {287--299},
	Publisher = {Australian Computer Society},
	Title = {Selection of discriminative genes in microarray experiments using mathematical programming},
	Volume = {39},
	Year = {2007}}

@incollection{cotta04,
	Abstract = {We deal with two important problems in pattern recognition that arise in the analysis of large datasets. While most feature subset selection methods use statistical techniques to preprocess the labeled datasets, these methods are generally not linked with the combinatorial properties of the final solutions. We prove that it is NP -hard to obtain an appropriate set of thresholds that will transform a given dataset into a binary instance of a robust feature subset selection problem. We address this problem using an evolutionary algorithm that learns the appropriate value of the thresholds. The empirical evaluation shows that robust subset of genes can be obtained. This evaluation is done using real data corresponding to the gene expression of lymphomas.},
	Author = {Cotta, Carlos and Sloper, Christian and Moscato, Pablo},
	Booktitle = {Applications of Evolutionary Computing},
	Date-Added = {2012-06-12 14:57:53 +1000},
	Date-Modified = {2012-06-12 14:59:07 +1000},
	Editor = {Raidl, G\"unther and Cagnoni, Stefano and Branke, J\"urgen and Corne, David and Drechsler, Rolf and Jin, Yaochu and Johnson, Colin and Machado, Penousal and Marchiori, Elena and Rothlauf, Franz and Smith, George and Squillero, Giovanni},
	Isbn = {978-3-540-21378-9},
	Keywords = {abk feature selection},
	Pages = {21-30},
	Publisher = {Springer Berlin / Heidelberg},
	Series = {Lecture Notes in Computer Science},
	Title = {Evolutionary Search of Thresholds for Robust Feature Set Selection: Application to the Analysis of Microarray Data},
	Url = {http://dx.doi.org/10.1007/978-3-540-24653-4_3},
	Volume = {3005},
	Year = {2004},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/978-3-540-24653-4_3}}

@inproceedings{moscato2005,
	Author = {Moscato, P. and Mathieson, L. and Mendes, A. and Berretta, R.},
	Booktitle = {Proceedings of the Twenty-eighth Australasian conference on Computer Science-Volume 38},
	Date-Added = {2012-06-12 14:53:51 +1000},
	Date-Modified = {2012-06-12 14:55:09 +1000},
	Keywords = {abk feature selection},
	Organization = {Australian Computer Society},
	Pages = {371--379},
	Title = {The Electronic Primaries: Predicting the {US} presidency using feature selection with safe data reduction},
	Year = {2005}}

@incollection{ravetti09,
	Abstract = {In this chapter we present a method based on the ( α , β )- k -feature set problem for identifying relevant attributes in high-dimensional datasets for classification purposes. We present a case-study of biomedical interest. Using the gene expression of thousands of genes, we show that the method can give a reduced set that can identify samples as belonging to prostate cancer tumors or not. We thus address the need of finding novel methods that can deal with classification problems that involve feature selection from several thousand features, while we only have on the order of one hundred samples. The methodology appears to be very robust in this prostate cancer case study. It has lead to the identification of a set of differentially expressed genes that are highly predictive of the cells transition to a more malignant type, thus departing from the profile which is characteristic of its originating tissue. Although the method is presented with a particular bioinformatics application in mind, it can clearly be used in other domains. A biological analysis illustrates on the relevance of the genes found, and links to the most current developments in prostate cancer biomarker studies.},
	Affiliation = {Medicine, The University of Newcastle, Australian Research Council Centre of Excellence in Bioinformatics Centre for Bioinformatics, Biomarker Discovery and Information-Based Callaghan NSW 2308 Australia},
	Author = {Ravetti, Mart{\'\i}n and Berretta, Regina and Moscato, Pablo},
	Booktitle = {Foundations of Computational Intelligence Volume 5},
	Date-Added = {2012-06-12 14:47:40 +1000},
	Date-Modified = {2012-06-12 15:14:48 +1000},
	Editor = {Abraham, Ajith and Hassanien, Aboul-Ella and Sn\'a\v{s}el, V\'aclav},
	Isbn = {978-3-642-01535-9},
	Keywords = {abk feature selection},
	Pages = {149-175},
	Publisher = {Springer Berlin / Heidelberg},
	Series = {Studies in Computational Intelligence},
	Title = {{Novel Biomarkers for Prostate Cancer Revealed by $\left(\alpha,\beta\right)$-$k$-Feature Sets}},
	Url = {http://dx.doi.org/10.1007/978-3-642-01536-6_7},
	Volume = {205},
	Year = {2009},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/978-3-642-01536-6_7}}

@article{ravetti08,
	Abstract = {<sec>
<title>Background</title>
<p>Alzheimer's disease (AD) is a progressive brain disease with a huge cost to human lives. The impact of the disease is also a growing concern for the governments of developing countries, in particular due to the increasingly high number of elderly citizens at risk. Alzheimer's is the most common form of dementia, a common term for memory loss and other cognitive impairments. There is no current cure for AD, but there are drug and non-drug based approaches for its treatment. In general the drug-treatments are directed at slowing the progression of symptoms. They have proved to be effective in a large group of patients but success is directly correlated with identifying the disease carriers at its early stages. This justifies the need for timely and accurate forms of diagnosis via molecular means. We report here a 5-protein biomarker molecular signature that achieves, on average, a 96% total accuracy in predicting clinical AD. The signature is composed of the abundances of IL-1α, IL-3, EGF, TNF-α and G-CSF.</p>
</sec><sec>
<title>Methodology/Principal Findings</title>
<p>Our results are based on a recent molecular dataset that has attracted worldwide attention. Our paper illustrates that improved results can be obtained with the abundance of only five proteins. Our methodology consisted of the application of an integrative data analysis method. This four step process included: a) abundance quantization, b) feature selection, c) literature analysis, d) selection of a classifier algorithm which is independent of the feature selection process. These steps were performed without using any sample of the test datasets. For the first two steps, we used the application of Fayyad and Irani's discretization algorithm for selection and quantization, which in turn creates an instance of the (alpha-beta)-k-Feature Set problem; a numerical solution of this problem led to the selection of only 10 proteins.</p>
</sec><sec>
<title>Conclusions/Significance</title>
<p>the previous study has provided an extremely useful dataset for the identification of AD biomarkers. However, our subsequent analysis also revealed several important facts worth reporting:</p>
<p>1. A 5-protein signature (which is a subset of the 18-protein signature of Ray <italic>et al.</italic>) has the same overall performance (when using the same classifier).</p>
<p>2. Using more than 20 different classifiers available in the widely-used Weka software package, our 5-protein signature has, on average, a smaller prediction error indicating the independence of the classifier and the robustness of this set of biomarkers (i.e. 96% accuracy when predicting AD against non-demented control).</p>
<p>3. Using very simple classifiers, like Simple Logistic or Logistic Model Trees, we have achieved the following results on 92 samples: 100 percent success to predict Alzheimer's Disease and 92 percent to predict Non Demented Control on the AD dataset.</p>
</sec>},
	Author = {G\'omez Ravetti, Mart\'in AND Moscato, Pablo},
	Date-Added = {2012-06-12 14:41:31 +1000},
	Date-Modified = {2012-07-12 11:54:20 +1000},
	Doi = {10.1371/journal.pone.0003111},
	Journal = {PLoS ONE},
	Keywords = {Alzheimer disease, abk feature selection},
	Number = {9},
	Pages = {e3111},
	Publisher = {Public Library of Science},
	Title = {Identification of a 5-Protein Biomarker Molecular Signature for Predicting Alzheimer's Disease},
	Url = {http://dx.doi.org/10.1371%2Fjournal.pone.0003111},
	Volume = {3},
	Year = {2008},
	Bdsk-Url-1 = {http://dx.doi.org/10.1371/journal.pone.0003111}}

@book{hey09,
	Abstract = {{A collection of essays expanding on the vision of pioneering computer scientist Jim Gray for a new, fourth paradigm of discovery based on data-intensive science.}},
	Author = {Hey, Anthony J. G. and Tansley, Stewart and Tolle, Kristin M.},
	Booktitle = {The Fourth Paradigm: Data-Intensive Scientific Discovery},
	Citeulike-Article-Id = {6347440},
	Citeulike-Linkout-0 = {http://research.microsoft.com/en-us/collaboration/fourthparadigm/},
	Citeulike-Linkout-1 = {http://www.worldcat.org/isbn/9780982544204},
	Date-Added = {2012-06-11 15:33:50 +1000},
	Date-Modified = {2012-06-11 15:34:55 +1000},
	Howpublished = {http://research.microsoft.com/en-us/collaboration/fourthparadigm/},
	Isbn = {9780982544204},
	Keywords = {scientific methodology},
	Posted-At = {2009-12-21 11:47:52},
	Priority = {2},
	Publisher = {Microsoft Research},
	Title = {{The Fourth Paradigm: Data-Intensive Scientific Discovery}},
	Url = {http://research.microsoft.com/en-us/collaboration/fourthparadigm/},
	Year = {2009},
	Bdsk-Url-1 = {http://research.microsoft.com/en-us/collaboration/fourthparadigm/}}

@misc{lohr12,
	Author = {Steve Lohr},
	Date-Added = {2012-06-11 14:46:13 +1000},
	Date-Modified = {2012-07-12 11:56:52 +1000},
	Howpublished = {{\url{http://www.nytimes.com/2012/02/12/sunday-review/big-datas-impact-in-the-world.html?_r=1}}},
	Keywords = {big data, digital revolution},
	Lastchecked = {June 11, 2012},
	Note = {{(Accessed on June 11, 2012)}},
	Title = {The Age of Big Data},
	Url = {http://www.nytimes.com/2012/02/12/sunday-review/big-datas-impact-in-the-world.html?_r=1},
	Year = {2012},
	Bdsk-Url-1 = {http://www.nytimes.com/2012/02/12/sunday-review/big-datas-impact-in-the-world.html?_r=1}}

@misc{insel2012,
	Author = {Thomas Insel},
	Date-Added = {2012-06-11 14:34:56 +1000},
	Date-Modified = {2012-06-11 15:49:06 +1000},
	Howpublished = {\url{http://www.nimh.nih.gov/about/director/2012/an-emerging-era-of-big-data.shtml}},
	Keywords = {big data, digital revolution},
	Lastchecked = {June 11, 2012},
	Month = {February},
	Note = {{(Accessed on June 11, 2012)}},
	Title = {An Emerging Era of Big Data},
	Url = {http://www.nimh.nih.gov/about/director/2012/an-emerging-era-of-big-data.shtml},
	Year = {2012},
	Bdsk-Url-1 = {http://www.nimh.nih.gov/about/director/2012/an-emerging-era-of-big-data.shtml}}

@article{hebert2007,
	Address = {Amsterdam, The Netherlands, The Netherlands},
	Author = {H\'{e}bert, C\'{e}line and Bretto, Alain and Cr\'{e}milleux, Bruno},
	Date-Added = {2012-06-07 12:32:54 +1000},
	Date-Modified = {2012-06-07 12:38:00 +1000},
	Issn = {0169-2968},
	Journal = {Fundamenta Informaticae},
	Keywords = {hypergraph dualization, minimal transversals, hypergraph, hitting set, formal concept analysis},
	Number = {4},
	Numpages = {19},
	Pages = {415--433},
	Publisher = {IOS Press},
	Title = {A Data Mining Formalization to Improve Hypergraph Minimal Transversal Computation},
	Url = {http://dl.acm.org/citation.cfm?id=1366548.1366552},
	Volume = {80},
	Year = {2007},
	Bdsk-Url-1 = {http://dl.acm.org/citation.cfm?id=1366548.1366552}}

@article{murakami11,
	Author = {Keisuke Murakami and Takeaki Uno},
	Date-Added = {2012-06-07 11:31:14 +1000},
	Date-Modified = {2012-06-07 11:33:45 +1000},
	Journal = {CoRR},
	Keywords = {hypergraph, minimal transversals, hitting set, emerging patterns, maximal frequent itemsets, hypergraph dualization},
	Title = {Efficient Algorithms for Dualizing Large-Scale Hypergraphs},
	Url = {http://arxiv.org/abs/1102.3813},
	Volume = {abs/1102.3813},
	Year = {2011},
	Bdsk-Url-1 = {http://arxiv.org/abs/1102.3813}}

@incollection{ken03,
	Abstract = {In this paper, we give a new algorithm for enumerating all maximal frequent sets using dualization. Frequent sets in transaction data has been used for computing association rules. Maximal frequent sets are important in representing frequent sets in a compact form, thus many researchers have proposed algorithms for enumerating maximal frequent sets. Among these algorithms, some researchers proposed algorithms for enumerating both maximal frequent sets and minimal infrequent sets in a primal-dual way by using a computation of the minimal transversal for a hypergraph, or in other words, hypergraph dualization. We give an improvement for this kind of algorithms in terms of the number of queries of frequency and the space complexity. Our algorithm checks each minimal infrequent set just once, while the existing algorithms check more than once, possibly so many times. Our algorithm does not store the minimal infrequent sets in memory, while the existing algorithms have to store them. The main idea of the improvement is that minimal infrequent sets computed from maximal frequent sets by dualization is still minimal infrequent even if we add a set to the current maximal frequent sets. We analyze the query complexity and the space complexity of our algorithm theoretically, and experimentally evaluate the algorithm to show that the computation time on average is in the order of the multiplication of the number of maximal frequent sets and the number of minimal infrequent sets.},
	Author = {Satoh, Ken and Uno, Takeaki},
	Booktitle = {Discovery Science},
	Date-Added = {2012-06-07 11:26:26 +1000},
	Date-Modified = {2012-06-07 11:28:07 +1000},
	Editor = {Grieser, Gunter and Tanaka, Yuzuru and Yamamoto, Akihiro},
	Isbn = {978-3-540-20293-6},
	Keywords = {maximal frequent itemsets, Maximal frequent patterns, maximal itemset mining, hypergraph, minimal transversals, hitting set},
	Pages = {256-268},
	Publisher = {Springer Berlin / Heidelberg},
	Series = {Lecture Notes in Computer Science},
	Title = {Enumerating Maximal Frequent Sets Using Irredundant Dualization},
	Url = {http://dx.doi.org/10.1007/978-3-540-39644-4_22},
	Volume = {2843},
	Year = {2003},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/978-3-540-39644-4_22}}

@inproceedings{bailey03,
	Abstract = { Computing the minimal transversals of a hypergraph is an important problem in computer science that has significant applications in data mining. We present a new algorithm for computing hypergraph transversals and highlight their close connection to an important class of patterns known as emerging patterns. We evaluate our technique on a number of large datasets and show that it outperforms previous approaches by a factor of 9-29 times.},
	Author = {Bailey, J. and Manoukian, T. and Kotagiri Ramamohanarao},
	Booktitle = {Third IEEE International Conference on Data Mining},
	Date-Added = {2012-06-06 12:38:01 +1000},
	Date-Modified = {2012-06-06 12:39:21 +1000},
	Doi = {10.1109/ICDM.2003.1250958},
	Keywords = {data mining; emerging patterns; minimal transversals; hypergraph; hitting set},
	Pages = {485 - 488},
	Title = {A fast algorithm for computing hypergraph transversals and its application in mining emerging patterns},
	Year = {2003},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/ICDM.2003.1250958}}

@article{vazquez09,
	Abstract = {BACKGROUND:Identifying effective drug combinations that significantly improve over single agents is a challenging problem. Pairwise combinations already represent a huge screening effort. Beyond two drug combinations the task seems unfeasible.RESULTS:In this work we introduce a method to uncover drug combinations with a putative effective response when presented to a heterogeneous population of malignant agents (strains), such as cancer cell lines or viruses. Using data quantifying the effect of single drugs over several individual strains, we search for minimal drug combinations that successfully target all strains. We show that the latter problem can be mapped to a minimal hitting set problem in mathematics. We illustrate this approach using data for the NCI60 panel of tumor derived cell lines, uncovering 14 anticancer drug combinations.CONCLUSION:The drug-response graph and the associated minimal hitting set method can be used to uncover effective drug combinations in anticancer drug screens and drug development programs targeting heterogeneous populations of infectious agents such as HIV.},
	Author = {Vazquez, Alexei},
	Date-Added = {2012-06-06 12:28:25 +1000},
	Date-Modified = {2012-06-06 12:29:08 +1000},
	Doi = {10.1186/1752-0509-3-81},
	Issn = {1752-0509},
	Journal = {BMC Systems Biology},
	Keywords = {hitting set, minimal transversals, hypergraph},
	Number = {1},
	Pages = {81},
	Pubmedid = {19660129},
	Title = {Optimal drug combinations and minimal hitting sets},
	Url = {http://www.biomedcentral.com/1752-0509/3/81},
	Volume = {3},
	Year = {2009},
	Bdsk-Url-1 = {http://www.biomedcentral.com/1752-0509/3/81},
	Bdsk-Url-2 = {http://dx.doi.org/10.1186/1752-0509-3-81}}

@article{mellor2010,
	Abstract = {
                <p>Therapies consisting of a combination of agents are an attractive proposition,
                    especially in the context of diseases such as cancer, which can manifest with a
                    variety of tumor types in a single case. However uncovering usable drug
                    combinations is expensive both financially and temporally. By employing
                    computational methods to identify candidate combinations with a greater
                    likelihood of success we can avoid these problems, even when the amount of data
                    is prohibitively large. H<sc>itting</sc> S<sc>et</sc> is a combinatorial problem
                    that has useful application across many fields, however as it is
                    <italic>NP</italic>-complete it is traditionally considered hard to solve
                    exactly. We introduce a more general version of the problem
                        (<italic>α,β,d</italic>)-H<sc>itting</sc> S<sc>et</sc>,
                    which allows more precise control over how and what the hitting set targets.
                    Employing the framework of Parameterized Complexity we show that despite being
                        <italic>NP</italic>-complete, the
                        (<italic>α,β,d</italic>)-H<sc>itting</sc> S<sc>et</sc>
                    problem is fixed-parameter tractable with a kernel of size <italic>O</italic>(α<italic>dk<sup>d</sup></italic>) when we parameterize by the size <italic>k</italic> of the
                    hitting set and the maximum number α of the minimum number of hits,
                    and taking the maximum degree <italic>d</italic> of the target sets as a
                    constant. We demonstrate the application of this problem to multiple drug
                    selection for cancer therapy, showing the flexibility of the problem in
                    tailoring such drug sets. The fixed-parameter tractability result indicates that
                    for low values of the parameters the problem can be solved quickly using exact
                    methods. We also demonstrate that the problem is indeed practical, with
                    computation times on the order of 5 seconds, as compared to previous Hitting Set
                    applications using the same dataset which exhibited times on the order of 1 day,
                    even with relatively relaxed notions for what constitutes a low value for the
                    parameters. Furthermore the existence of a kernelization for
                        (<italic>α,β,d</italic>)-H<sc>itting</sc> S<sc>et</sc>
                    indicates that the problem is readily scalable to large datasets.</p>
            },
	Author = {Mellor, Drew AND Prieto, Elena AND Mathieson, Luke AND Moscato, Pablo},
	Date-Added = {2012-06-06 12:22:12 +1000},
	Date-Modified = {2012-07-12 11:57:30 +1000},
	Doi = {10.1371/journal.pone.0013055},
	Journal = {PLoS ONE},
	Keywords = {hitting set, minimal transversals, hypergraph},
	Number = {10},
	Pages = {e13055},
	Publisher = {Public Library of Science},
	Title = {{A Kernelisation Approach for Multiple \emph{d}-Hitting Set and Its Application in Optimal Multi-Drug Therapeutic Combinations}},
	Url = {http://dx.doi.org/10.1371%2Fjournal.pone.0013055},
	Volume = {5},
	Year = {2010},
	Bdsk-Url-1 = {http://dx.doi.org/10.1371/journal.pone.0013055}}

@book{berge73,
	Author = {Claude Berge},
	Date-Added = {2012-06-05 12:06:50 +1000},
	Date-Modified = {2012-06-05 12:12:22 +1000},
	Keywords = {hypergraph, graph, minimal transversals},
	Publisher = {Elsevier},
	Series = {North-Holland Mathematical Library},
	Title = {Graphs and Hypergraphs},
	Url = {http://www.sciencedirect.com/science/article/pii/S0924650909703265},
	Volume = {6},
	Year = {1973},
	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/pii/S0924650909703265}}

@phdthesis{li01_thesis,
	Author = {Jinyan Li},
	Date-Added = {2012-06-04 17:34:27 +1000},
	Date-Modified = {2012-06-04 17:37:14 +1000},
	Keywords = {emerging patterns, jumping emerging patterns, associative classification, classification},
	Read = {Yes},
	School = {University of Melbourne},
	Title = {Mining emerging patterns to construct accurate and efficient classifiers},
	Year = {2001}}

@article{vimieiro12,
	Abstract = {Disjunctive minimal generators were proposed by Zhao, Zaki, and Ramakrishnan (2006). They defined disjunctive closed itemsets and disjunctive minimal generators through the disjunctive support function. We prove that the disjunctive support function is compatible with the closure operator presented by Zhao et al. (2006). Such compatibility allows us to adapt the original version of the Titanic algorithm, proposed by Stumme, Taouil, Bastide, Pasquier, and Lakhal (2002) to mine iceberg concept lattices and closed itemsets, to mine disjunctive minimal generators. We present TitanicOR, a new breadth-first algorithm for mining disjunctive minimal generators. We evaluate the performance of our method with both synthetic and real data sets and compare TitanicOR's performance with the performance of BLOSOM (Zhao et al., 2006), the state of the art method and sole algorithm available prior to TitanicOR for mining disjunctive minimal generators. We show that TitanicOR's breadth-first approach is up to two orders of magnitude faster than BLOSOM's depth-first approach.},
	Author = {Vimieiro, Renato and Moscato, Pablo},
	Doi = {10.1016/j.eswa.2012.01.141},
	Issn = {0957-4174},
	Journal = {Expert Systems with Applications},
	Keywords = {BLOSOM,Boolean expressions,Closed itemsets,Disjunctions,Frequent pattern mining,Minimal generators,TitanicOR},
	Number = {9},
	Pages = {8228--8238},
	Title = {{Mining disjunctive minimal generators with TitanicOR}},
	Url = {http://www.sciencedirect.com/science/article/pii/S0957417412001613},
	Volume = {39},
	Year = {2012},
	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/pii/S0957417412001613},
	Bdsk-Url-2 = {http://dx.doi.org/10.1016/j.eswa.2012.01.141}}

@article{vimieiro04,
	Author = {Vimieiro, Renato and Z\'{a}rate, LE and Silva, Jpd},
	Journal = {Intelligence and Soft},
	Keywords = {formal concept anal,neural networks,rule extraction},
	Pages = {334--339},
	Title = {{Rule extraction from trained neural networks via formal concept analysis}},
	Url = {http://www.actapress.com/Abstract.aspx?paperId=18596},
	Year = {2004},
	Bdsk-Url-1 = {http://www.actapress.com/Abstract.aspx?paperId=18596}}

@inproceedings{loekito06,
	Address = {New York, NY, USA},
	Author = {Loekito, Elsa and Bailey, James},
	Booktitle = {Proceedings of the 12th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
	Doi = {http://doi.acm.org/10.1145/1150402.1150438},
	Isbn = {1-59593-339-5},
	Keywords = {classification, data mining, disjunctive emerging patterns, disjunctive patterns, emerging patterns, zero-suppressed binary decision diagrams,contrast patterns},
	Pages = {307--316},
	Publisher = {ACM},
	Series = {KDD '06},
	Title = {{Fast mining of high dimensional expressive contrast patterns using zero-suppressed binary decision diagrams}},
	Url = {http://doi.acm.org/10.1145/1150402.1150438},
	Year = {2006},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1150402.1150438}}

@inproceedings{loekito09,
	Address = {Berlin, Heidelberg},
	Author = {Loekito, Elsa and Bailey, James},
	Booktitle = {Proceedings of the 13th Pacific-Asia Conference on Advances in Knowledge Discovery and Data Mining},
	Doi = {http://dx.doi.org/10.1007/978-3-642-01307-2\_44},
	Isbn = {978-3-642-01306-5},
	Keywords = {classification, data mining, disjunctive emerging patterns, disjunctive patterns, emerging patterns, quantitative association rules,expressive contrasts},
	Pages = {483--490},
	Publisher = {Springer-Verlag},
	Series = {PAKDD '09},
	Title = {{Using Highly Expressive Contrast Patterns for Classification - Is It Worthwhile?}},
	Url = {http://dx.doi.org/10.1007/978-3-642-01307-2\_44},
	Year = {2009},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/978-3-642-01307-2%5C_44}}

@inproceedings{wang03,
	Address = {New York, NY, USA},
	Author = {Wang, Jianyong and Han, Jiawei and Pei, Jian},
	Booktitle = {Proceedings of the Ninth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
	Doi = {http://doi.acm.org/10.1145/956750.956779},
	Isbn = {1-58113-737-0},
	Keywords = {frequent closed itemsets, mining methods and algorithms,association rules},
	Pages = {236--245},
	Publisher = {ACM},
	Series = {KDD '03},
	Title = {{{CLOSET+}: searching for the best strategies for mining frequent closed itemsets}},
	Url = {http://doi.acm.org/10.1145/956750.956779},
	Year = {2003},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/956750.956779}}

@article{grahne05,
	Abstract = {Efficient algorithms for mining frequent itemsets are crucial for
mining association rules as well as for many other data mining tasks. Methods
for mining frequent itemsets have been implemented using a prefix-tree
structure, known as an FP-tree, for storing compressed information about
frequent itemsets. Numerous experimental results have demonstrated that these
algorithms perform extremely well. In this paper, we present a novel FP-array
technique that greatly reduces the need to traverse FP-trees, thus obtaining
significantly improved performance for FP-tree-based algorithms. Our technique
works especially well for sparse data sets. Furthermore, we present new
algorithms for mining all, maximal, and closed frequent itemsets. Our algorithms
use the FP-tree data structure in combination with the FP-array technique
efficiently and incorporate various optimization techniques. We also present
experimental results comparing our methods with existing algorithms. The results
show that our methods are the fastest for many cases. Even though the algorithms
consume much memory when the data sets are sparse, they are still the fastest
ones when the minimum support is low. Moreover, they are always among the
fastest algorithms and consume less memory than other methods when the data sets
are dense.},
	Author = {Grahne, G and Zhu, J},
	Doi = {10.1109/TKDE.2005.166},
	Issn = {1041-4347},
	Journal = {IEEE Transactions on Knowledge and Data Engineering},
	Keywords = {association rules; data mining; frequent itemset m},
	Number = {10},
	Pages = {1347--1362},
	Title = {{Fast algorithms for frequent itemset mining using {FP}-trees}},
	Volume = {17},
	Year = {2005},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/TKDE.2005.166}}

@inproceedings{cong05,
	Address = {New York, NY, USA},
	Author = {Cong, Gao and Tan, Kian-Lee and Tung, Anthony K H and Xu, Xin},
	Booktitle = {Proceedings of the 2005 ACM SIGMOD International Conference on Management of Data},
	Doi = {http://doi.acm.org/10.1145/1066157.1066234},
	Isbn = {1-59593-060-4},
	Pages = {670--681},
	Publisher = {ACM},
	Series = {SIGMOD '05},
	Title = {{Mining top-K covering rule groups for gene expression data}},
	Url = {http://doi.acm.org/10.1145/1066157.1066234},
	Year = {2005},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1066157.1066234}}

@inproceedings{cheng07,
	Abstract = {The application of frequent patterns in classification
appeared in sporadic studies and achieved initial success in the
classification of relational data, text documents and graphs. In this
paper, we conduct a systematic exploration of frequent pattern-based
classification, and provide solid reasons supporting this methodology.
It was well known that feature combinations (patterns) could capture
more underlying semantics than single features. However, inclusion of
infrequent patterns may not significantly improve the accuracy due to
their limited predictive power. By building a connection between pattern
frequency and discriminative measures such as information gain and
Fisher score, we develop a strategy to set minimum support in frequent
pattern mining for generating useful patterns. Based on this strategy,
coupled with a proposed feature selection algorithm, discriminative
frequent patterns can be generated for building high quality
classifiers. We demonstrate that the frequent pattern-based
classification framework can achieve good scalability and high accuracy
in classifying large datasets. Empirical studies indicate that
significant improvement in classification accuracy is achieved (up to
12\% in UCI datasets) using the so-selected discriminative frequent
patterns.},
	Author = {Cheng, Hong and Yan, Xifeng and Han, Jiawei and Hsu, Chih-Wei},
	Booktitle = {Proceedings of the 23rd IEEE International Conference on Data Engineering, ICDE 2007},
	Doi = {10.1109/ICDE.2007.367917},
	Keywords = {discriminative pattern mining;frequent pattern min},
	Pages = {716--725},
	Title = {{Discriminative Frequent Pattern Analysis for Effective Classification}},
	Year = {2007},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/ICDE.2007.367917}}

@article{wang06,
	Abstract = {Many studies have shown that rule-based classifiers perform well in
classifying categorical and sparse high-dimensional databases. However, a
fundamental limitation with many rule-based classifiers is that they find the
rules by employing various heuristic methods to prune the search space and
select the rules based on the sequential database covering paradigm. As a
result, the final set of rules that they use may not be the globally best rules
for some instances in the training database. To make matters worse, these
algorithms fail to fully exploit some more effective search space pruning
methods in order to scale to large databases. In this paper, we present a new
classifier, HARMONY, which directly mines the final set of classification rules.
HARMONY uses an instance-centric rule-generation approach and it can assure
that, for each training instance, one of the highest-confidence rules covering
this instance is included in the final rule set, which helps in improving the
overall accuracy of the classifier. By introducing several novel search
strategies and pruning methods into the rule discovery process, HARMONY also has
high efficiency and good scalability. Our thorough performance study with some
large text and categorical databases has shown that HARMONY outperforms many
well-known classifiers in terms of both accuracy and computational efficiency
and scales well with regard to the database size},
	Author = {Wang, Jianyong and Karypis, George.},
	Doi = {10.1109/TKDE.2006.179},
	Issn = {1041-4347},
	Journal = {IEEE Transactions on Knowledge and Data Engineering},
	Keywords = {HARMONY;data mining;FP-Growth;associative classifi},
	Number = {11},
	Pages = {1497--1511},
	Title = {{On Mining Instance-Centric Classification Rules}},
	Volume = {18},
	Year = {2006},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/TKDE.2006.179}}

@inproceedings{li01_CMAR,
	Abstract = {Previous studies propose that associative classification has high
classification accuracy and strong flexibility at handling unstructured data.
However, it still suffers from the huge set of mined rules and sometimes biased
classification or overfitting since the classification is based on only a single
high-confidence rule. The authors propose a new associative classification
method, CMAR, i.e., Classification based on Multiple Association Rules. The
method extends an efficient frequent pattern mining method, FP-growth,
constructs a class distribution-associated FP-tree, and mines large databases
efficiently. Moreover, it applies a CR-tree structure to store and retrieve
mined association rules efficiently, and prunes rules effectively based on
confidence, correlation and database coverage. The classification is performed
based on a weighted chi;2 analysis using multiple strong association rules. Our
extensive experiments on 26 databases from the UCI machine learning database
repository show that CMAR is consistent, highly effective at classification of
various kinds of databases and has better average classification accuracy in
comparison with CBA and C4.5. Moreover, our performance study shows that the
method is highly efficient and scalable in comparison with other reported
associative classification methods},
	Author = {Li, Wenmin and Han, Jiawei and Pei, Jian},
	Booktitle = {Proceedings of the IEEE International Conference on Data Mining (ICDM 2001)},
	Doi = {10.1109/ICDM.2001.989541},
	Keywords = {CMAR;FP-growth;associative classification;frequent},
	Pages = {369--376},
	Title = {{{CMAR}: accurate and efficient classification based on multiple class-association rules}},
	Year = {2001},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/ICDM.2001.989541}}

@book{quinlan1993,
	Address = {San Francisco, CA, USA},
	Author = {Quinlan, J Ross},
	Isbn = {1-55860-238-0},
	Keywords = {data mining,classification},
	Publisher = {Morgan Kaufmann Publishers Inc.},
	Title = {{C4.5: programs for machine learning}},
	Year = {1993}}

@incollection{dong99_CAEP,
	Abstract = {Emerging patterns (EPs) are itemsets whose supports change
significantly from one dataset to another; they were recently proposed to
capture multi-attribute contrasts between data classes, or trends over time. In
this paper we propose a new classifier, CAEP, using the following main ideas
based on EPs: (i) Each EP can sharply differentiate the class membership of a
(possibly small) fraction of instances containing the EP, due to the big
difference between its supports in the opposing classes; we define the
differentiating power of the EP in terms of the supports and their ratio, on
instances containing the EP. (ii) For each instance t , by aggregating the
differentiating power of a fixed, automatically selected set of EPs, a score is
obtained for each class. The scores for all classes are normalized and the
largest score determines t 's class. CAEP is suitable for many applications,
even those with large volumes of high (e.g. 45) dimensional data; it does not
depend on dimension reduction on data; and it is usually equally accurate on all
classes even if their populations are unbalanced. Experiments show that CAEP has
consistent good predictive accuracy, and it almost always outperforms C4.5 and
CBA. By using efficient, border-based algorithms (developed elsewhere) to
discover EPs, CAEP scales up on data volume and dimensionality. Observing that
accuracy on the whole dataset is too coarse description of classifiers, we also
used a more accurate measure, sensitivity and precision , to better characterize
the performance of classifiers. CAEP is also very good under this measure.},
	Author = {Dong, Guozhu and Zhang, Xiuzhen and Wong, Limsoon and Li, Jinyan},
	Booktitle = {Discovery Science},
	Editor = {Arikawa, Setsuo and Furukawa, Koichi},
	Isbn = {978-3-540-66713-1},
	Keywords = {associative classification, emerging patterns, frequent itemset mining, frequent pattern mining, jumping emerging patterns, maximal frequent itemsets,data mining},
	Pages = {737},
	Publisher = {Springer Berlin / Heidelberg},
	Series = {Lecture Notes in Computer Science},
	Title = {{{CAEP}: Classification by Aggregating Emerging Patterns}},
	Url = {http://dx.doi.org/10.1007/3-540-46846-3\_4},
	Volume = {1721},
	Year = {1999},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/3-540-46846-3%5C_4}}

@article{li01,
	Abstract = {Classification aims to discover a model from training data that
can be used to predict the class of test instances. In this paper, we propose
the use of jumping emerging patterns (JEPs) as the basis for a new classifier
called the JEP-Classifier . Each JEP can capture some crucial difference
between a pair of datasets. Then, aggregating all JEPs of large supports can
produce a more potent classification power. Procedurally, the JEP-Classifier
learns the pair-wise features (sets of JEPs) contained in the training data,
and uses the collective impacts contributed by the most expressive pair-wise
features to determine the class labels of the test data. Using only the most
expressive JEPs in the JEP-Classifier strengthens its resistance to noise in
the training data, and reduces its complexity (as there are usually a very
large number of JEPs). We use two algorithms for constructing the
JEP-Classifier which are both scalable and efficient. These algorithms make
use of the border representation to efficiently store and manipulate JEPs. We
also present experimental results which show that the JEP-Classifier achieves
much higher testing accuracies than the association-based classifier of (Liu
et al, 1998), which was reported to outperform C4.5 in general.},
	Author = {Li, Jinyan and Dong, Guozhu and Ramamohanarao, Kotagiri},
	Issn = {0219-1377},
	Journal = {Knowledge and Information Systems},
	Keywords = {associative classification, emerging patterns, frequent itemset mining, frequent pattern mining, jumping emerging patterns, maximal frequent itemsets,data mining},
	Number = {2},
	Pages = {131--145},
	Publisher = {Springer London},
	Title = {{Making Use of the Most Expressive Jumping Emerging Patterns for Classification}},
	Url = {http://dx.doi.org/10.1007/PL00011662},
	Volume = {3},
	Year = {2001},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/PL00011662}}

@inproceedings{liu98,
	Author = {Liu, Bing and Hsu, Wynne and Ma, Yiming},
	Booktitle = {Proceedings of the Fourth International Conference on Knowledge Discovery and Data Mining (KDD-98)},
	Editor = {Agrawal, Rakesh and Stolorz, Paul E and Piatetsky-Shapiro, Gregory},
	Keywords = {CBA, discriminative pattern mining, frequent itemset mining; data mining, frequent pattern mining,associative classification},
	Publisher = {AAAI Press},
	Title = {{Integrating Classification and Association Rule Mining}},
	Year = {1998}}

@article{guizhen06,
	Abstract = {In this paper we study the complexity-theoretic aspects of mining maximal frequent patterns, 
from the perspective of counting the number of all distinct solutions. 
We present the first formal proof that the problem of counting the number of maximal frequent itemsets 
in a database of transactions, given an arbitrary support threshold, is \#P-complete, thereby providing 
theoretical evidence that the problem of mining maximal frequent itemsets is NP-hard. We also extend our 
complexity analysis to other similar data mining problems that deal with complex data structures, such as 
sequences, trees, and graphs. We investigate several variants of these mining problems in which the 
patterns of interest are subsequences, subtrees, or subgraphs, and show that the associated problems of 
counting the number of maximal frequent patterns are all either \#P-complete or \#P-hard.},
	Author = {Yang, Guizhen},
	Doi = {10.1016/j.tcs.2006.05.029},
	Issn = {0304-3975},
	Journal = {Theoretical Computer Science},
	Keywords = {\#P-complete,Complexity,Data mining,Maximal frequent patterns},
	Number = {1-3},
	Pages = {63--85},
	Title = {{Computational aspects of mining maximal frequent patterns}},
	Url = {http://www.sciencedirect.com/science/article/pii/S0304397506003355},
	Volume = {362},
	Year = {2006},
	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/pii/S0304397506003355},
	Bdsk-Url-2 = {http://dx.doi.org/10.1016/j.tcs.2006.05.029}}

@article{piatetsky-shapiro03,
	Address = {New York, NY, USA},
	Author = {Piatetsky-Shapiro, Gregory and Tamayo, Pablo},
	Date-Modified = {2012-07-12 11:58:30 +1000},
	Doi = {http://doi.acm.org/10.1145/980972.980974},
	Issn = {1931-0145},
	Journal = {SIGKDD Explorations Newsletter},
	Number = {2},
	Pages = {1--5},
	Publisher = {ACM},
	Title = {{Microarray data mining: facing the challenges}},
	Url = {http://doi.acm.org/10.1145/980972.980974},
	Volume = {5},
	Year = {2003},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/980972.980974}}

@article{dries10,
	Abstract = {We adapt Mitchell's version space algorithm for mining k-CNF formulas.
Advantages of this algorithm are that it runs in a single pass over the data, is
conceptually simple, can be used for missing value prediction, and has
interesting theoretical properties, while an empirical evaluation on
classification tasks yields competitive predictive results.},
	Author = {Dries, A and {De Raedt}, L and Nijssen, S},
	Doi = {10.1109/TKDE.2009.152},
	Issn = {1041-4347},
	Journal = {IEEE Transactions on Knowledge and Data Engineering},
	Keywords = {Boolean expressions;conjunctive normal form;data m},
	Month = may,
	Number = {5},
	Pages = {743--748},
	Title = {{Mining Predictive k-{CNF} Expressions}},
	Volume = {22},
	Year = {2010},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/TKDE.2009.152}}

@inproceedings{mannila_toivonen96,
	Author = {Mannila, Heikki and Toivonen, Hannu},
	Booktitle = {Proceedings of the Second International Conference on Knowledge Discovery and Data Mining},
	Editor = {Simoudis, Evangelos and Han, Jia W and Fayyad, Usama},
	Keywords = {dijunctive pattern mining, frequent itemset mining, frequent pattern mining,data mining},
	Pages = {189--194},
	Publisher = {AAAI Press},
	Title = {{Multiple Uses of Frequent Sets and Condensed Representations: Extended Abstract}},
	Year = {1996}}

@article{hughes00,
	Abstract = {Ascertaining the impact of uncharacterized perturbations on
the cell is a fundamental problem in biology. Here, we describe how a
single assay can be used to monitor hundreds of different cellular
functions simultaneously. We constructed a reference database or
"compendium" of expression profiles corresponding to 300 diverse
mutations and chemical treatments in S. cerevisiae, and we show that the
cellular pathways affected can be determined by pattern matching, even
among very subtle profiles. The utility of this approach is validated by
examining profiles caused by deletions of uncharacterized genes: we
identify and experimentally confirm that eight uncharacterized open
reading frames encode proteins required for sterol metabolism, cell wall
function, mitochondrial respiration, or protein synthesis. We also show
that the compendium can be used to characterize pharmacological
perturbations by identifying a novel target of the commonly used drug
dyclonine.},
	Address = {Rosetta Inpharmatics, Inc., Kirkland, Washington 98034, USA.},
	Author = {Hughes, T R and Marton, M J and Jones, A R and Roberts, C J and Stoughton, R and Armour, C D and Bennett, H A and Coffey, E and Dai, H and He, Y D and Kidd, M J and King, A M and Meyer, M R and Slade, D and Lum, P Y and Stepaniants, S B and Shoemaker, D D and Gachotte, D and Chakraburtty, K and Simon, J and Bard, M and Friend, S H},
	Doi = {10.1016/S0092-8674(00)00015-5},
	Issn = {0092-8674},
	Journal = {Cell},
	Keywords = {networks, yeastDataset,datasets},
	Number = {1},
	Pages = {109--126},
	Title = {{Functional discovery via a compendium of expression profiles}},
	Url = {http://www.cell.com/retrieve/pii/S0092867400000155},
	Volume = {102},
	Year = {2000},
	Bdsk-Url-1 = {http://www.cell.com/retrieve/pii/S0092867400000155},
	Bdsk-Url-2 = {http://dx.doi.org/10.1016/S0092-8674(00)00015-5}}

@inproceedings{cong04b,
	Abstract = { Microarray data typically contains a large number of
columns and a small number of rows, which poses a great challenge for
existing frequent (closed) pattern mining algorithms that discover
patterns in item enumeration space. In this paper, we propose two
algorithms that explore the row enumeration space to mine frequent
closed patterns. Several experiments on real-life gene expression data
show that the algorithms are faster than existing algorithms, including
CLOSET, CHARM, CLOSET+ and CARPENTER.},
	Author = {Cong, Gao and Tan, Kian-Lee and Tung, A K H and Pan, Feng},
	Booktitle = {ICDM '04: Proceedings of the Fourth IEEE International Conference on Data Mining},
	Doi = {10.1109/ICDM.2004.10070},
	Keywords = {frequent closed pattern mining; item enumeration s},
	Pages = {363--366},
	Title = {{Mining frequent closed patterns in microarray data}},
	Year = {2004},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/ICDM.2004.10070}}

@article{mcintosh07,
	Abstract = {We present an association rule mining method for mining high
confidence rules, which describe interesting gene relationships from
microarray datasets. Microarray datasets typically contain an order of
magnitude more genes than experiments, rendering many data mining
methods impractical as they are optimised for sparse datasets. A new
family of row-enumeration rule mining algorithms have emerged to
facilitate mining in dense datasets. These algorithms rely on pruning
infrequent relationships to reduce the search space by using the support
measure. This major shortcoming results in the pruning of many
potentially interesting rules with low support but high confidence. We
propose a new row-enumeration rule mining method, MaxConf, to mine high
confidence rules from microarray data. MaxConf is a support-free
algorithm which directly uses the confidence measure to effectively
prune the search space. Experiments on three microarray datasets show
that MaxConf outperforms support-based rule mining with respect to
scalability and rule extraction. Furthermore, detailed biological
analyses demonstrate the effectiveness of our approach -- the rules
discovered by MaxConf are substantially more interesting and meaningful
compared with support-based methods.},
	Address = {Los Alamitos, CA, USA},
	Author = {McIntosh, Tara and Chawla, Sanjay},
	Date-Modified = {2012-07-12 11:57:09 +1000},
	Doi = {http://dx.doi.org/10.1109/tcbb.2007.1050},
	Issn = {1545-5963},
	Journal = {IEEE/ACM Transactions on Computational Biology and Bioinformatics},
	Keywords = {association rules, frequent itemset mining, frequent pattern mining, high confidence rule mining, microarray analysis, vertical itemset mining,Data mining},
	Number = {4},
	Pages = {611--623},
	Publisher = {IEEE Computer Society Press},
	Title = {{High Confidence Rule Mining for Microarray Analysis}},
	Url = {http://dx.doi.org/10.1109/tcbb.2007.1050},
	Volume = {4},
	Year = {2007},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/tcbb.2007.1050}}

@article{delgado01,
	Abstract = {The discovery of new knowledge by mining medical databases is crucial in order to make an 
effective use of stored data, enhancing patient management tasks. One of the main objectives of data 
mining methods is to provide a clear and understandable description of patterns held in data. We 
introduce a new approach to find association rules among quantitative values in relational databases. 
The semantics of such rules are improved by introducing imprecise terms in both the antecedent and the 
consequent, as these terms are the most commonly used in human conversation and reasoning. The terms 
are modeled by means of fuzzy sets defined in the appropriate domains. However, the mining task is 
performed on the precise data. These ``fuzzy association rules'' are more informative than rules relating 
precise values. We also introduce a new measure of accuracy, based on Shortliffe and Buchanan's certainty 
factors [Shortliffe E, Buchanan B. Math Biosci 1975;23:351--79]. Also, the semantics of the usual measure of 
usefulness of an association rule, called support are discussed and some new criteria are introduced. 
Our new measures have been shown to be more understandable and appropriate than ordinary ones. Several 
experiments on large medical databases show that our new approach can provide useful knowledge with better 
semantics in this field.},
	Author = {Delgado, Miguel and S\'{a}nchez, Daniel and Mart\'in-Bautista, Mar\'ia J and Vila, Mar\'ia-Amparo},
	Date-Modified = {2012-07-12 11:52:36 +1000},
	Doi = {10.1016/S0933-3657(00)00092-0},
	Issn = {0933-3657},
	Journal = {Artificial Intelligence in Medicine},
	Keywords = {frequent pattern mining,data mining,fuzzy association rules},
	Number = {1-3},
	Pages = {241--245},
	Title = {{Mining association rules with improved semantics in medical databases}},
	Url = {http://www.sciencedirect.com/science/article/pii/S0933365700000920},
	Volume = {21},
	Year = {2001},
	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/pii/S0933365700000920},
	Bdsk-Url-2 = {http://dx.doi.org/10.1016/S0933-3657(00)00092-0}}

@article{doddi01,
	Author = {Doddi, Srinivas and Marathe, Achla and Ravi, S S and Torney, David C},
	Doi = {10.1080/14639230117529},
	Journal = {Informatics for Health and Social Care},
	Number = {1},
	Pages = {25--33},
	Title = {{Discovery of association rules in medical data}},
	Url = {http://informahealthcare.com/doi/abs/10.1080/14639230117529},
	Volume = {26},
	Year = {2001},
	Bdsk-Url-1 = {http://informahealthcare.com/doi/abs/10.1080/14639230117529},
	Bdsk-Url-2 = {http://dx.doi.org/10.1080/14639230117529}}

@incollection{stilou01,
	Abstract = {Data mining is the process of discovering interesting knowledge, such as patterns, 
associations, changes, anomalies and significant structures, from large amounts of data stored 
in databases, data warehouses, or other information repositories. Mining Associations is one of 
the techniques involved in the process mentioned above and used in this paper. Association is 
the discovery of association relationships or correlations among a set of items. The algorithm 
that was implemented is a basic algorithm for mining association rules, known as a priori. 
In Healthcare, association rules are considered to be quite useful as they offer the possibility 
to conduct intelligent diagnosis and extract invaluable information and build important knowledge 
bases quickly and automatically. The problem of identifying new, unexpected and interesting patterns 
in medical databases in general, and diabetic data repositories in specific, is considered in 
this paper. We have applied the apriori algorithm to a database containing records of diabetic 
patients and attempted to extract association rules from the stored real parameters. The results 
indicate that the methodology followed may be of good value to the diagnostic procedure, 
especially when large data volumes are involved. The followed process and the implemented system 
offer an efficient and effective tool in the management of diabetes. Their clinical relevance and 
utility await the results of prospective clinical studies currently under investigation.},
	Author = {Stilou, S and Bamidis, P D and Maglaveras, N and Pappas, C},
	Booktitle = {{MEDINFO} 2001},
	Isbn = {978-1-58603-194-7},
	Keywords = {frequent itemset mining, mining biological data,frequent pattern mining},
	Pages = {1399--1403},
	Publisher = {IOS Press},
	Series = {Studies in health technology and informatics},
	Title = {{Mining association rules from clinical databases: an intelligent diagnostic process in healthcare}},
	Volume = {84/2001},
	Year = {2001}}

@article{creighton03,
	Abstract = {Motivation: Global gene expression profiling, both at
the transcript level and at the protein level, can be a
valuable tool in the understanding of genes, biological
networks, and cellular states. As larger and larger gene 
expression data sets become available, data mining techniques can
be applied to identify patterns of interest in the data.
Association rules, used widely in the area of market basket
analysis, can be applied to the analysis of expression data as
well. Association rules can reveal biologically relevant 
associations between different genes or between environmental
effects and gene expression. An association rule has the form
LHS ⇒ RHS, where LHS and
RHS are disjoint sets of items, the RHS set
being likely to occur whenever the LHS set occurs.
Items in gene expression data can include genes that are highly
expressed or repressed, as well as relevant facts describing the
cellular environment of the genes (e.g. the diagnosis of a tumor
sample from which a profile was obtained).Results: We demonstrate an algorithm for efficiently
mining association rules from gene expression data, using the
data set from Hughes et al. (2000,
Cell, 102, 109--126) of 300 expression
profiles for yeast. Using the algorithm, we find numerous rules 
in the data. A cursory analysis of some of these rules reveals
numerous associations between certain genes, many of which make
sense biologically, others suggesting new hypotheses that may
warrant further investigation. In a data set derived from the
yeast data set, but with the expression values for each
transcript randomly shifted with respect to the experiments, no
rules were found, indicating that most all of the rules mined
from the actual data set are not likely to have occurred by
chance. Availability: An implementation of the algorithm using
Microsoft SQL Server with Access 2000 is available at http://dot.ped.med.umich.edu:2000/pub/assoc\_rules/assoc\_rules.zip.
Our results from mining the yeast data set are available at 
http://dot.ped.med.umich.edu:2000/pub/assoc\_rules/yeast\_results.zip.Contact: ccreight@umich.edu },
	Author = {Creighton, Chad and Hanash, Samir},
	Doi = {10.1093/bioinformatics/19.1.79},
	Journal = {Bioinformatics},
	Number = {1},
	Pages = {79--86},
	Title = {{Mining gene expression databases for association rules}},
	Url = {http://bioinformatics.oxfordjournals.org/content/19/1/79.abstract},
	Volume = {19},
	Year = {2003},
	Bdsk-Url-1 = {http://bioinformatics.oxfordjournals.org/content/19/1/79.abstract},
	Bdsk-Url-2 = {http://dx.doi.org/10.1093/bioinformatics/19.1.79}}

@article{chen06,
	Abstract = {BACKGROUND:AMP-activated protein kinase (AMPK) has emerged as a significant signaling intermediary that 
regulates metabolisms in response to energy demand and supply. An investigation into the degree of activation and 
deactivation of AMPK subunits under exercise can provide valuable data for understanding AMPK. In particular, the 
effect of AMPK on muscle cellular energy status makes this protein a promising pharmacological target for disease 
treatment. As more AMPK regulation data are accumulated, data mining techniques can play an important role in 
identifying frequent patterns in the data. Association rule mining, which is commonly used in market basket analysis, 
can be applied to AMPK regulation.RESULTS:This paper proposes a framework that can identify the potential correlation, 
either between the state of isoforms of alpha, beta and gamma subunits of AMPK, or between stimulus factors and the 
state of isoforms. Our approach is to apply item constraints in the closed interpretation to the itemset generation 
so that a threshold is specified in terms of the amount of results, rather than a fixed threshold value for all 
itemsets of all sizes. The derived rules from experiments are roughly analyzed. It is found that most of the extracted 
association rules have biological meaning and some of them were previously unknown. They indicate direction for further 
research.CONCLUSION:Our findings indicate that AMPK has a great impact on most metabolic actions that are related to energy 
demand and supply. Those actions are adjusted via its subunit isoforms under specific physical training. Thus, there are 
strong co-relationships between AMPK subunit isoforms and exercises. Furthermore, the subunit isoforms are correlated with 
each other in some cases. The methods developed here could be used when predicting these essential relationships and enable 
an understanding of the functions and metabolic pathways regarding AMPK.},
	Author = {Chen, Qingfeng and Chen, Yi-Ping},
	Doi = {10.1186/1471-2105-7-394},
	Issn = {1471-2105},
	Journal = {BMC Bioinformatics},
	Number = {1},
	Pages = {394},
	Title = {{Mining frequent patterns for AMP-activated protein kinase regulation on skeletal muscle}},
	Url = {http://www.biomedcentral.com/1471-2105/7/394},
	Volume = {7},
	Year = {2006},
	Bdsk-Url-1 = {http://www.biomedcentral.com/1471-2105/7/394},
	Bdsk-Url-2 = {http://dx.doi.org/10.1186/1471-2105-7-394}}

@article{gyenesei07,
	Abstract = {Motivation: Association pattern discovery (APD) methods have been successfully applied to gene expression data. 
They find groups of co-regulated genes in which the genes are either up- or down-regulated throughout the identified conditions. 
These methods, however, fail to identify similarly expressed genes whose expressions change between up- and down-regulation from one condition to another. 
In order to discover these hidden patterns, we propose the concept of mining co-regulated gene profiles. 
Co-regulated gene profiles contain two gene sets such that genes within the same set behave identically (up or down)
while genes from different sets display contrary behavior. To reduce and group the large number of similar resulting patterns, 
we propose a new similarity measure that can be applied together with hierarchical clustering methods.Results: 
We tested our proposed method on two well-known yeast microarray data sets. 
Our implementation mined the data effectively and discovered patterns of co-regulated genes that are hidden to traditional APD methods. 
The high content of biologically relevant information in these patterns is demonstrated by the significant 
enrichment of co-regulated genes with similar functions. Our experimental results show that the Mining Attribute Profile (MAP) 
method is an efficient tool for the analysis of gene expression data and competitive with bi-clustering techniques.
Contact: ulrich.wagner@fgcz.ethz.ch
Supplementary information: Supplementary data and an executable demo program of the MAP implementation are freely available at http://www.fgcz.ch/publications/map},
	Author = {Gyenesei, Attila and Wagner, Ulrich and Barkow-Oesterreicher, Simon and Stolte, Etzard and Schlapbach, Ralph},
	Doi = {10.1093/bioinformatics/btm276},
	Journal = {Bioinformatics},
	Number = {15},
	Pages = {1927--1935},
	Title = {{Mining co-regulated gene profiles for the detection of functional associations in gene expression data}},
	Url = {http://bioinformatics.oxfordjournals.org/content/23/15/1927.abstract},
	Volume = {23},
	Year = {2007},
	Bdsk-Url-1 = {http://bioinformatics.oxfordjournals.org/content/23/15/1927.abstract},
	Bdsk-Url-2 = {http://dx.doi.org/10.1093/bioinformatics/btm276}}

@incollection{gyenesei06,
	Abstract = {Frequent pattern discovery has become a popular solution to many
scientific and industrial problems in a range of different datasets.
Traditional algorithms, developed for binary (or Boolean) attributes, can be
applied to such data with a prerequisite of transforming non-binary
(continuous or categorical) attribute domains into binary ones. As a
consequence of this binarization, the discovered patterns no longer reflect
the associations between attributes but the relations between their binned
independent values, and thus, interactions between the original attributes
may be lost. In this paper we propose to overcome this limitation by
introducing the concept of mining frequent attribute profiles that describes
the relationships between the original attributes. By this concept,
previously hidden interactions can be discovered and redundant patterns that
are identified by traditional methods are eliminated. A novel algorithm,
called MAP, has been developed for m ining a ttribute p rofiles that can be
potentially applied to diverse data domains. The effectiveness of the
proposed method is shown by using gene expression or microarray data.},
	Author = {Gyenesei, Attila and Schlapbach, Ralph and Stolte, Etzard and Wagner, Ulrich},
	Booktitle = {Knowledge Discovery in Databases: PKDD 2006},
	Editor = {F\"{u}rnkranz, Johannes and Scheffer, Tobias and Spiliopoulou, Myra},
	Isbn = {978-3-540-45374-1},
	Keywords = {frequent itemset mining, mining biological data,frequent pattern mining},
	Pages = {528--535},
	Publisher = {Springer Berlin / Heidelberg},
	Series = {Lecture Notes in Computer Science},
	Title = {{Frequent Pattern Discovery Without Binarization: Mining Attribute Profiles}},
	Url = {http://dx.doi.org/10.1007/11871637\_52},
	Volume = {4213},
	Year = {2006},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/11871637%5C_52}}

@inproceedings{pan04,
	Address = {Los Alamitos, CA, USA},
	Author = {Pan, Feng and Tung, Anthony K H and Cong, Gao and Xu, Xin},
	Booktitle = {Proceedings of the 16th International Conference on Scientific and Statistical Database Management},
	Doi = {http://doi.ieeecomputersociety.org/10.1109/SSDM.2004.1311190},
	Issn = {1099-3371},
	Keywords = {closed itemset mining, frequent itemset mining, frequent pattern mining, vertical itemset mining,Cobbler},
	Pages = {21},
	Publisher = {IEEE Computer Society},
	Title = {{COBBLER: Combining Column and Row Enumeration for Closed Pattern Discovery}},
	Volume = {0},
	Year = {2004},
	Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/SSDM.2004.1311190}}

@techreport{riveros2011,
	Author = {Riveros, Carlos and Vimieiro, Renato and Holliday, Elizabeth G and Oldmeadow, Christopher and Wang, Jie Jin and Mitchell, Paul and Attia, John and Scott, Rodney and Moscato, Pablo},
	Institution = {Centre for Bioinformatics, Biomarker Discovery and Information-Based Medicine, The University of Newcastle, Callaghan, New South Wales, Australia},
	Keywords = {boolean expressions,AMD},
	Title = {{Identification of genome-wide {SNP-SNP} and {SNP}-clinical {B}oolean interactions in {Age-related Macular Degeneration}}},
	Year = {2011}}

@inproceedings{fayyad93,
	Author = {Fayyad, Usama M and Irani, Keki B},
	Booktitle = {Proceedings of the 13th International Joint Conference on Artificial Intelligence},
	Keywords = {MDL, entropy,discretization},
	Pages = {1022--1029},
	Title = {{Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning}},
	Year = {1993}}

@article{fang2011,
	Author = {Fang, Gang and Wang, Wen and Oatley, Benjamin and Ness, Brian Van and Steinbach, Michael and Kumar, Vipin},
	Journal = {CoRR},
	Keywords = {data mining, frequent pattern mining,discriminative pattern mining},
	Title = {{Characterizing Discriminative Patterns}},
	Volume = {abs/1102.4},
	Year = {2011}}

@article{lockstone2007,
	Abstract = {The mechanisms by which trisomy 21 leads to the
characteristic Down syndrome (DS) phenotype are unclear. We used whole
genome microarrays to characterize for the first time the transcriptome
of human adult brain tissue (dorsolateral prefrontal cortex) from seven
DS subjects and eight controls. These data were coanalyzed with a
publicly available dataset from fetal DS tissue and functional profiling
was performed to identify the biological processes central to DS and
those that may be related to late onset pathologies, particularly
Alzheimer disease neuropathology. A total of 685 probe sets were
differentially expressed between adult DS and control brains at a
stringent significance threshold (adjusted p value (q){\^{A}} \&lt;{\^{A}}
0.005), 70\% of these being up-regulated in DS. Over 25\% of genes on
chromosome 21 were differentially expressed in comparison to a median of
4.4\% for all chromosomes. The unique profile of up-regulation on
chromosome 21, consistent with primary dosage effects, was accompanied
by widespread transcriptional disruption. The critical Alzheimer disease
gene, APP, located on chromosome 21, was not found to be up-regulated in
adult brain by microarray or QPCR analysis. However, numerous other
genes functionally linked to APP processing were dysregulated.
Functional profiling of genes dysregulated in both fetal and adult
datasets identified categories including development (notably Notch
signaling and Dlx family genes), lipid transport, and cellular
proliferation. In the adult brain these processes were concomitant with
cytoskeletal regulation and vesicle trafficking categories, and
increased immune response and oxidative stress response, which are
likely linked to the development of Alzheimer pathology in individuals
with DS.},
	Author = {Lockstone, H E and Harris, L W and Swatton, J E and Wayland, M T and Holland, A J and Bahn, S},
	Doi = {10.1016/j.ygeno.2007.08.005},
	Issn = {0888-7543},
	Journal = {Genomics},
	Keywords = {GDS2941, datasets,Alzheimer disease},
	Number = {6},
	Pages = {647--660},
	Title = {{Gene expression profiling in the adult Down syndrome brain}},
	Url = {http://www.sciencedirect.com/science/article/pii/S0888754307002054},
	Volume = {90},
	Year = {2007},
	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/pii/S0888754307002054},
	Bdsk-Url-2 = {http://dx.doi.org/10.1016/j.ygeno.2007.08.005}}

@article{lesnick2007,
	Abstract = {<title>Author Summary</title><sec
id="st1"><title/><p>Complex diseases are common disorders that are
believed to have many causes. Examples include Alzheimer disease,
diabetes mellitus, nicotine and alcohol dependence, and several cancers.
This study represents a paradigm shift from single gene to pathway
studies of complex diseases. We present the example of Parkinson disease
(PD) and a complex array of chemical signals that wires the brain during
fetal development (the axon guidance pathway). We mined a dataset that
studied hundreds of thousands of DNA variations (single nucleotide
polymorphisms [SNPs]) in persons with and without PD and identified SNPs
that were assigned to axon-guidance pathway genes. We then identified
sets of SNPs that were highly predictive of PD susceptibility, survival
free of PD, and age at onset of PD. The effect sizes and the statistical
significance observed for the pathway were far greater than for any
single gene. We validated our findings for the pathway using a second
SNP dataset for PD and also a dataset for PD that studied RNA
variations. There is prior evidence that the axon guidance pathway might
play a role in other brain disorders (e.g., Alzheimer disease, Tourette
syndrome, dyslexia, epilepsy, and schizophrenia). A genomic pathway
approach may lead to important breakthroughs for many complex
diseases.</p></sec>},
	Author = {Lesnick, Timothy G and Papapetropoulos, Spiridon and Mash, Deborah C and Ffrench-Mullen, Jarlath and Shehadeh, Lina and de Andrade, Mariza and Henley, John R and Rocca, Walter A and Ahlskog, J Eric and Maraganore, Demetrius M},
	Doi = {10.1371/journal.pgen.0030098},
	Journal = {PLoS Genetics},
	Keywords = {GDS2821, datasets,Parkinson's Disease},
	Number = {6},
	Pages = {e98},
	Publisher = {Public Library of Science},
	Title = {{A Genomic Pathway Approach to a Complex Disease: Axon Guidance and Parkinson Disease}},
	Url = {http://dx.plos.org/10.1371\%2Fjournal.pgen.0030098},
	Volume = {3},
	Year = {2007},
	Bdsk-Url-1 = {http://dx.plos.org/10.1371%5C/journal.pgen.0030098},
	Bdsk-Url-2 = {http://dx.doi.org/10.1371/journal.pgen.0030098}}

@article{chandran2007,
	Abstract = {BACKGROUND:Prostate cancer is characterized by heterogeneity
in the clinical course that often does not correlate with morphologic
features of the tumor. Metastasis reflects the most adverse outcome of
prostate cancer, and to date there are no reliable morphologic features
or serum biomarkers that can reliably predict which patients are at
higher risk of developing metastatic disease. Understanding the
differences in the biology of metastatic and organ confined primary
tumors is essential for developing new prognostic markers and
therapeutic targets.METHODS:Using Affymetrix oligonucleotide arrays, we
analyzed gene expression profiles of 24 androgen-ablation resistant
metastatic samples obtained from 4 patients and a previously published
dataset of 64 primary prostate tumor samples. Differential gene
expression was analyzed after removing potentially uninformative stromal
genes, addressing the differences in cellular content between primary
and metastatic tumors.RESULTS:The metastatic samples are highly
heterogenous in expression; however, differential expression analysis
shows that 415 genes are upregulated and 364 genes are downregulated at
least 2 fold in every patient with metastasis. The expression profile of
metastatic samples reveals changes in expression of a unique set of
genes representing both the androgen ablation related pathways and other
metastasis related gene networks such as cell adhesion, bone remodelling
and cell cycle. The differentially expressed genes include metabolic
enzymes, transcription factors such as Forkhead Box M1 (FoxM1) and cell
adhesion molecules such as Osteopontin (SPP1).CONCLUSION:We hypothesize
that these genes have a role in the biology of metastatic disease and
that they represent potential therapeutic targets for prostate cancer.},
	Author = {Chandran, Uma and Ma, Changqing and Dhir, Rajiv and Bisceglia, Michelle and Lyons-Weiler, Maureen and Liang, Wenjing and Michalopoulos, George and Becich, Michael and Monzon, Federico},
	Doi = {10.1186/1471-2407-7-64},
	Issn = {1471-2407},
	Journal = {BMC Cancer},
	Keywords = {GDS2545, datasets,Prostate Cancer},
	Number = {1},
	Pages = {64},
	Title = {{Gene expression profiles of prostate cancer reveal involvement of multiple molecular pathways in the metastatic process}},
	Url = {http://www.biomedcentral.com/1471-2407/7/64},
	Volume = {7},
	Year = {2007},
	Bdsk-Url-1 = {http://www.biomedcentral.com/1471-2407/7/64},
	Bdsk-Url-2 = {http://dx.doi.org/10.1186/1471-2407-7-64}}

@article{scherzer2007,
	Abstract = {Parkinson's disease (PD) progresses relentlessly and affects
five million people worldwide. Laboratory tests for PD are critically
needed for developing treatments designed to slow or prevent progression
of the disease. We performed a transcriptome-wide scan in 105
individuals to interrogate the molecular processes perturbed in cellular
blood of patients with early-stage PD. The molecular multigene marker
here identified is associated with risk of PD in 66 samples of the
training set comprising healthy and disease controls [third tertile
cross-validated odds ratio of 5.7 (P for trend 0.005)]. It is further
validated in 39 independent test samples [third tertile odds ratio of
5.1 (P for trend 0.04)]. Insights into disease-linked processes
detectable in peripheral blood are offered by 22 unique genes
differentially expressed in patients with PD versus healthy individuals.
These include the cochaperone ST13, which stabilizes heat-shock protein
70, a modifier of {\OE}\pm-synuclein misfolding and toxicity. ST13
messenger RNA copies are lower in patients with PD (mean ¬\pm SE 0.59
¬\pm 0.05) than in controls (0.96 ¬\pm 0.09) (P = 0.002) in two
independent populations. Thus, gene expression signals measured in blood
can facilitate the development of biomarkers for PD.},
	Author = {Scherzer, Clemens R and Eklund, Aron C and Morse, Lee J and Liao, Zhixiang and Locascio, Joseph J and Fefer, Daniel and Schwarzschild, Michael A and Schlossmacher, Michael G and Hauser, Michael A and Vance, Jeffery M and Sudarsky, Lewis R and Standaert, David G and Growdon, John H and Jensen, Roderick V and Gullans, Steven R},
	Doi = {10.1073/pnas.0610204104},
	Journal = {Proceedings of the National Academy of Sciences},
	Keywords = {GDS2519, datasets,Parkinson's Disease},
	Number = {3},
	Pages = {955--960},
	Title = {{Molecular markers of early Parkinson's disease based on gene expression in blood}},
	Url = {http://www.pnas.org/content/104/3/955.abstract},
	Volume = {104},
	Year = {2007},
	Bdsk-Url-1 = {http://www.pnas.org/content/104/3/955.abstract},
	Bdsk-Url-2 = {http://dx.doi.org/10.1073/pnas.0610204104}}

@article{richardson2006,
	Abstract = {Sporadic basal-like cancers (BLC) are a distinct class of
human breast cancers that are phenotypically similar to BRCA1-associated
cancers. Like BRCA1-deficient tumors, most BLC lack markers of a normal
inactive X chromosome (Xi). Duplication of the active X chromosome and
loss of Xi characterized almost half of BLC cases tested. Others
contained biparental but nonheterochromatinized X chromosomes or gains
of X chromosomal DNA. These abnormalities did not lead to a global
increase in X chromosome transcription but were associated with
overexpression of a small subset of X chromosomal genes. Other, equally
aneuploid, but non-BLC rarely displayed these X chromosome
abnormalities. These results suggest that X chromosome abnormalities
contribute to the pathogenesis of BLC, both inherited and sporadic.},
	Author = {Richardson, Andrea L and Wang, Zhigang C and {De Nicolo}, Arcangela and Lu, Xin and Brown, Myles and Miron, Alexander and Liao, Xiaodong and Iglehart, J Dirk and Livingston, David M and Ganesan, Shridar},
	Date-Modified = {2012-07-12 11:59:22 +1000},
	Doi = {10.1016/j.ccr.2006.01.013},
	Journal = {Cancer Cell},
	Keywords = {GDS2250, datasets,Breast Cancer},
	Number = {2},
	Pages = {121--132},
	Pmid = {16473279},
	Title = {{X chromosomal abnormalities in basal-like human breast cancer}},
	Volume = {9},
	Year = {2006},
	Bdsk-Url-1 = {http://dx.doi.org/10.1016/j.ccr.2006.01.013}}

@article{Nindl2006,
	Abstract = {Carcinogenesis is a multi-step process indicated by several
genes up- or down-regulated during tumor progression. This study
examined and identified differentially expressed genes in cutaneous
squamous cell carcinoma (SCC).},
	Author = {Nindl, Ingo and Dang, Chantip and Forschner, Tobias and Kuban, Ralf J and Meyer, Thomas and Sterry, Wolfram and Stockfleth, Eggert},
	Doi = {10.1186/1476-4598-5-30},
	Journal = {Molecular {C}ancer},
	Keywords = {GDS2200, datasets,Melanoma},
	Pages = {30},
	Pmid = {16893473},
	Title = {{Identification of differentially expressed genes in cutaneous squamous cell carcinoma by microarray expression profiling}},
	Volume = {5},
	Year = {2006},
	Bdsk-Url-1 = {http://dx.doi.org/10.1186/1476-4598-5-30}}

@article{strunnikova2005,
	Abstract = {The pathogenesis of age-related macular degeneration (AMD)
is still unknown but there is growing evidence that a combination of
both oxidative injury and genetic factors may play a role. One particle
hypothesis proposes that dysregulation of multiple genes in response to
an oxidative injury could contribute to the development of AMD. While
direct examination of ocular cells from AMD patients is difficult, AMD
also appears to have a systemic component. Therefore, as is the case
with other central nervous diseases, peripheral sites may also manifest
any underlying genetic abnormalities. For the present study,
biopsy-derived fibroblasts from 4 patients with the early form and 4
patients with the late form of AMD and 3 age-matched control patients
were grown in culture and treated with a nonlethal dose of the oxidative
stimulus menadione. Gene expression patterns were quantitatively and
qualitatively examined using Human Genome U95A GeneChips (Affymetrix)
and verified by real-time PCR analysis. In response to the oxidative
injury 755 genes were found to be upregulated at least twofold in one of
the patients groups. Cluster analysis of expression profiles detected
six patterns of dysregulation initiated by oxidative injury specific for
the disease groups (98 genes total). Clusters of genes dysregulated by
the sublethal oxidative injury in either early and/or late AMD groups
were further categorized by overrepresentation of GO
{\^{a}}€{\oe}biological process{\^{a}}€ categories using Expression
Analysis Systematic Explorer (EASE) software. This approach demonstrated
that four major functional gene groups including inflammatory/innate
immune response, transcriptional regulation, cell cycle, and
proliferation were significantly overrepresented (Fisher test ranging
from 0.0393 to 0.00018) in both AMD patients groups in response to the
oxidative injury. Despite the small number of patients in the study,
specific biological and statistical differences in gene expression
profiles between control and AMD patients were identified but only in
the presence of an environmental stimulus.},
	Author = {Strunnikova, Nataly and Hilmer, Sara and Flippin, Jessica and Robinson, Michael and Hoffman, Eric and Csaky, Karl G},
	Doi = {10.1016/j.freeradbiomed.2005.04.029},
	Issn = {0891-5849},
	Journal = {Free Radical Biology and Medicine},
	Keywords = {GDS963, datasets,Age-related macular degeneration},
	Number = {6},
	Pages = {781--796},
	Title = {{Differences in gene expression profiles in dermal fibroblasts from control and patients with age-related macular degeneration elicited by oxidative injury}},
	Url = {http://www.sciencedirect.com/science/article/pii/S0891584905002595},
	Volume = {39},
	Year = {2005},
	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/pii/S0891584905002595},
	Bdsk-Url-2 = {http://dx.doi.org/10.1016/j.freeradbiomed.2005.04.029}}

@misc{ucirepo2010,
	Author = {Frank, A and Asuncion, A},
	Howpublished = {http://archive.ics.uci.edu/ml},
	Institution = {University of California, Irvine, School of Information and Computer Sciences},
	Keywords = {datasets,UCI data},
	Title = {{{UCI} Machine Learning Repository}},
	Year = {2010}}

@inproceedings{towell1990,
	Author = {Towell, G G and Shavlik, J W and Noordewier, M O},
	Booktitle = {Proceedings of the Eighth National Conference on Artificial Intelligence},
	Keywords = {datasets,Promoters},
	Pages = {861--866},
	Title = {{Refinement of approximate domain theories by knowledge-based neural networks}},
	Year = {1990}}

@article{brunet2004,
	Abstract = {We describe here the use of nonnegative matrix factorization
(NMF), an algorithm based on decomposition by parts that can reduce the
dimension of expression data from thousands of genes to a handful of
metagenes. Coupled with a model selection mechanism, adapted to work for
any stochastic clustering algorithm, NMF is an efficient method for
identification of distinct molecular patterns and provides a powerful
method for class discovery. We demonstrate the ability of NMF to recover
meaningful biological information from cancer-related microarray data.
NMF appears to have advantages over other methods such as hierarchical
clustering or self-organizing maps. We found it less sensitive to a
priori selection of genes or initial conditions and able to detect
alternative or context-dependent patterns of gene expression in complex
biological systems. This ability, similar to semantic polysemy in text,
provides a general method for robust molecular pattern discovery.},
	Author = {Brunet, Jean-Philippe and Tamayo, Pablo and Golub, Todd R and Mesirov, Jill P},
	Date-Modified = {2012-06-18 17:14:24 +1000},
	Doi = {10.1073/pnas.0308531101},
	Journal = {Proceedings of the National Academy of Sciences},
	Keywords = {ALL-AML,datasets},
	Number = {12},
	Pages = {4164--4169},
	Pmid = {15016911},
	Read = {Yes},
	Title = {{Metagenes and molecular pattern discovery using matrix factorization}},
	Volume = {101},
	Year = {2004},
	Bdsk-Url-1 = {http://dx.doi.org/10.1073/pnas.0308531101}}

@article{pomeroy2002,
	Abstract = {Embryonal tumours of the central nervous system (CNS)
represent a heterogeneous group of tumours about which little is known
biologically, and whose diagnosis, on the basis of morphologic
appearance alone, is controversial. Medulloblastomas, for example, are
the most common malignant brain tumour of childhood, but their
pathogenesis is unknown, their relationship to other embryonal CNS
tumours is debated, and patients' response to therapy is difficult to
predict. We approached these problems by developing a classification
system based on DNA microarray gene expression data derived from 99
patient samples. Here we demonstrate that medulloblastomas are
molecularly distinct from other brain tumours including primitive
neuroectodermal tumours (PNETs), atypical teratoid/rhabdoid tumours
(AT/RTs) and malignant gliomas. Previously unrecognized evidence
supporting the derivation of medulloblastomas from cerebellar granule
cells through activation of the Sonic Hedgehog (SHH) pathway was also
revealed. We show further that the clinical outcome of children with
medulloblastomas is highly predictable on the basis of the gene
expression profiles of their tumours at diagnosis.},
	Author = {Pomeroy, Scott L and Tamayo, Pablo and Gaasenbeek, Michelle and Sturla, Lisa M and Angelo, Michael and McLaughlin, Margaret E and Kim, John Y H and Goumnerova, Liliana C and Black, Peter M and Lau, Ching and Allen, Jeffrey C and Zagzag, David and Olson, James M and Curran, Tom and Wetmore, Cynthia and Biegel, Jaclyn A and Poggio, Tomaso and Mukherjee, Shayan and Rifkin, Ryan and Califano, Andrea and Stolovitzky, Gustavo and Louis, David N and Mesirov, Jill P and Lander, Eric S and Golub, Todd R},
	Date-Modified = {2012-07-12 11:58:52 +1000},
	Doi = {10.1038/415436a},
	Journal = {Nature},
	Keywords = {datasets,Embryo},
	Number = {6870},
	Pages = {436--442},
	Pmid = {11807556},
	Title = {{Prediction of central nervous system embryonal tumour outcome based on gene expression}},
	Volume = {415},
	Year = {2002},
	Bdsk-Url-1 = {http://dx.doi.org/10.1038/415436a}}

@article{alon1999,
	Abstract = {Oligonucleotide arrays can provide a broad picture of the
state of the cell, by monitoring the expression level of thousands of
genes at the same time. It is of interest to develop techniques for
extracting useful information from the resulting data sets. Here we
report the application of a two-way clustering method for analyzing a
data set consisting of the expression patterns of different cell types.
Gene expression in 40 tumor and 22 normal colon tissue samples was
analyzed with an Affymetrix oligonucleotide array complementary to more
than 6,500 human genes. An efficient two-way clustering algorithm was
applied to both the genes and the tissues, revealing broad coherent
patterns that suggest a high degree of organization underlying gene
expression in these tissues. Coregulated families of genes clustered
together, as demonstrated for the ribosomal proteins. Clustering also
separated cancerous from noncancerous tissue and cell lines from in vivo
tissues on the basis of subtle distributed patterns of genes even when
expression of individual genes varied only slightly between the tissues.
Two-way clustering thus may be of use both in classifying genes into
functional groups and in classifying tissues based on gene expression.},
	Author = {Alon, U and Barkai, N and Notterman, D A and Gish, K and Ybarra, S and Mack, D and Levine, A J},
	Journal = {Proceedings of the National Academy of Sciences},
	Keywords = {datasets,Colon},
	Month = jun,
	Number = {12},
	Pages = {6745--6750},
	Pmid = {10359783},
	Title = {{Broad patterns of gene expression revealed by clustering analysis of tumor and normal colon tissues probed by oligonucleotide arrays}},
	Volume = {96},
	Year = {1999}}

@article{golub1999,
	Abstract = {Although cancer classification has improved over the past 30
years, there has been no general approach for identifying new cancer
classes (class discovery) or for assigning tumors to known classes
(class prediction). Here, a generic approach to cancer classification
based on gene expression monitoring by DNA microarrays is described and
applied to human acute leukemias as a test case. A class discovery
procedure automatically discovered the distinction between acute myeloid
leukemia (AML) and acute lymphoblastic leukemia (ALL) without previous
knowledge of these classes. An automatically derived class predictor was
able to determine the class of new leukemia cases. The results
demonstrate the feasibility of cancer classification based solely on
gene expression monitoring and suggest a general strategy for
discovering and predicting cancer classes for other types of cancer,
independent of previous biological knowledge.},
	Author = {Golub, T R and Slonim, D K and Tamayo, P and Huard, C and Gaasenbeek, M and Mesirov, J P and Coller, H and Loh, M L and Downing, J R and Caligiuri, M A and Bloomfield, C D and Lander, E S},
	Journal = {Science},
	Keywords = {datasets,Leukemia},
	Month = oct,
	Number = {5439},
	Pages = {531--537},
	Pmid = {10521349},
	Title = {{Molecular classification of cancer: class discovery and class prediction by gene expression monitoring}},
	Volume = {286},
	Year = {1999}}

@article{alizadeh2000,
	Abstract = {Diffuse large B-cell lymphoma (DLBCL), the most common
subtype of non-Hodgkin's lymphoma, is clinically heterogeneous: 40\% of
patients respond well to current therapy and have prolonged survival,
whereas the remainder succumb to the disease. We proposed that this
variability in natural history reflects unrecognized molecular
heterogeneity in the tumours. Using DNA microarrays, we have conducted a
systematic characterization of gene expression in B-cell malignancies.
Here we show that there is diversity in gene expression among the
tumours of DLBCL patients, apparently reflecting the variation in tumour
proliferation rate, host response and differentiation state of the
tumour. We identified two molecularly distinct forms of DLBCL which had
gene expression patterns indicative of different stages of B-cell
differentiation. One type expressed genes characteristic of germinal
centre B cells ('germinal centre B-like DLBCL'); the second type
expressed genes normally induced during in vitro activation of
peripheral blood B cells ('activated B-like DLBCL'). Patients with
germinal centre B-like DLBCL had a significantly better overall survival
than those with activated B-like DLBCL. The molecular classification of
tumours on the basis of gene expression can thus identify previously
undetected and clinically significant subtypes of cancer.},
	Author = {Alizadeh, A A and Eisen, M B and Davis, R E and Ma, C and Lossos, I S and Rosenwald, A and Boldrick, J C and Sabet, H and Tran, T and Yu, X and Powell, J I and Yang, L and Marti, G E and Moore, T and {Hudson Jr}, J and Lu, L and Lewis, D B and Tibshirani, R and Sherlock, G and Chan, W C and Greiner, T C and Weisenburger, D D and Armitage, J O and Warnke, R and Levy, R and Wilson, W and Grever, M R and Byrd, J C and Botstein, D and Brown, P O and Staudt, L M},
	Doi = {10.1038/35000501},
	Journal = {Nature},
	Keywords = {datasets,Lymphoma},
	Month = feb,
	Number = {6769},
	Pages = {503--511},
	Pmid = {10676951},
	Title = {{Distinct types of diffuse large B-cell lymphoma identified by gene expression profiling}},
	Volume = {403},
	Year = {2000},
	Bdsk-Url-1 = {http://dx.doi.org/10.1038/35000501}}

@inproceedings{gunopulos97,
	Address = {New York, NY, USA},
	Author = {Gunopulos, Dimitrios and Mannila, Heikki and Khardon, Roni and Toivonen, Hannu},
	Booktitle = {Proceedings of the sixteenth ACM SIGACT-SIGMOD-SIGART symposium on Principles of database systems},
	Doi = {http://doi.acm.org/10.1145/263661.263684},
	Isbn = {0-89791-910-6},
	Pages = {209--216},
	Publisher = {ACM},
	Series = {PODS '97},
	Title = {{Data mining, hypergraph transversals, and machine learning (extended abstract)}},
	Url = {http://doi.acm.org/10.1145/263661.263684},
	Year = {1997},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/263661.263684}}

@incollection{tsang11,
	Abstract = {Most association rule mining techniques concentrate on finding
frequent rules. However, rare association rules are in some cases more
interesting than frequent association rules since rare rules represent
unexpected or unknown associations. All current algorithms for rare association
rule mining use an Apriori level-wise approach which has computationally
expensive candidate generation and pruning steps. We propose RP-Tree, a method
for mining a subset of rare association rules using a tree structure, and an
information gain component that helps to identify the more interesting
association rules. Empirical evaluation using a range of real world datasets
shows that RP-Tree itemset and rule generation is more time efficient than
modified versions of FP-Growth and ARIMA, and discovers 92-100\% of all the
interesting rare association rules.},
	Author = {Tsang, Sidney and Koh, Yun and Dobbie, Gillian},
	Booktitle = {Data Warehousing and Knowledge Discovery},
	Editor = {Cuzzocrea, Alfredo and Dayal, Umeshwar},
	Isbn = {978-3-642-23543-6},
	Pages = {277--288},
	Publisher = {Springer Berlin / Heidelberg},
	Series = {Lecture Notes in Computer Science},
	Title = {{RP-Tree: Rare Pattern Tree Mining}},
	Url = {http://dx.doi.org/10.1007/978-3-642-23544-3\_21},
	Volume = {6862},
	Year = {2011},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/978-3-642-23544-3%5C_21}}

@inproceedings{szathmary07,
	Abstract = {We describe here a general approach for rare itemset mining. While
mining literature has been almost exclusively focused on frequent itemsets, in
many practical situations rare ones are of higher interest (e.g., in medical
databases, rare combinations of symptoms might provide useful insights for the
physicians). Based on an examination of the relevant substructures of the mining
space, our approach splits the rare itemset mining task into two steps, i.e.,
frequent itemset part traversal and rare itemset listing. We propose two
algorithms for step one, a naive and an optimized one, respectively, and another
algorithm for step two. We also provide some empirical evidence about the
performance gains due to the optimized traversal.},
	Author = {Szathmary, L and Napoli, A and Valtchev, P},
	Booktitle = {Proceedings of the 19th IEEE International Conference on Tools with Artificial Intelligence},
	Doi = {10.1109/ICTAI.2007.30},
	Issn = {1082-3409},
	Keywords = {pattern mining;rare itemset mining;data mining;inf},
	Pages = {305--312},
	Title = {{Towards Rare Itemset Mining}},
	Volume = {1},
	Year = {2007},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/ICTAI.2007.30}}

@article{mannila_toivonen97,
	Abstract = {One of the basic problems in knowledge discovery in databases (KDD)
is the following: given a data set r, a class L of sentences for defining
subgroups of r, and a selection predicate, find all sentences of L deemed
interesting by the selection predicate. We analyze the simple levelwise
algorithm for finding all such descriptions. We give bounds for the number of
database accesses that the algorithm makes. For this, we introduce the concept
of the border of a theory, a notion that turns out to be surprisingly powerful
in analyzing the algorithm. We also consider the verification problem of a KDD
process: given r and a set of sentences S \subseteq L determine whether S is
exactly the set of interesting statements about r. We show strong connections
between the verification problem and the hypergraph transversal problem. The
verification problem arises in a natural way when using sampling to speed up the
pattern discovery step in KDD.},
	Author = {Mannila, Heikki and Toivonen, Hannu},
	Issn = {1384-5810},
	Journal = {Data Mining and Knowledge Discovery},
	Keywords = {maximal itemset mining; infrequent itemset mining;},
	Number = {3},
	Pages = {241--258},
	Publisher = {Springer Netherlands},
	Title = {{Levelwise Search and Borders of Theories in Knowledge Discovery}},
	Url = {http://dx.doi.org/10.1023/A:1009796218281},
	Volume = {1},
	Year = {1997},
	Bdsk-Url-1 = {http://dx.doi.org/10.1023/A:1009796218281}}

@incollection{boros02,
	Abstract = {Let A be an m × n binary matrix, t \in {1,{\ldots} ,m} be a
threshold, and $\epsilon$> 0 be a positive parameter. We show that given a
family of O(n$\epsilon$) maximal t-frequent column sets for A, it is NP-complete
to decide whether A has any further maximal t-frequent sets, or not, even when
the number of such additional maximal t-frequent column sets may be
exponentially large. In contrast, all minimal t-infrequent sets of columns of A
can be enumerated in incremental quasi-polynomial time. The proof of the latter
result follows from the inequality $\alpha$\leq (m-t+1)$\beta$, where $\alpha$
and beta are respectively the numbers of all maximal t-frequent and all minimal
t-infrequent sets of columns of the matrix A. We also discuss the complexity of
generating all closed t-frequent column sets for a given binary matrix.},
	Author = {Boros, E and Gurvich, V and Khachiyan, L and Makino, K},
	Booktitle = {STACS 2002},
	Editor = {Alt, Helmut and Ferreira, Afonso},
	Isbn = {978-3-540-43283-8},
	Keywords = {infrequent itemset mining; maximal itemset mining;},
	Pages = {733},
	Publisher = {Springer Berlin / Heidelberg},
	Series = {Lecture Notes in Computer Science},
	Title = {{On the Complexity of Generating Maximal Frequent and Minimal Infrequent Sets}},
	Url = {http://dx.doi.org/10.1007/3-540-45841-7\_10},
	Volume = {2285},
	Year = {2002},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/3-540-45841-7%5C_10}}

@inproceedings{haglin07,
	Author = {Haglin, David J and Manning, Anna M},
	Booktitle = {Proceedings of the 2007 International Conference on Data Mining},
	Editor = {Stahlbock, Robert and Crone, Sven F and Lessmann, Stefan},
	Isbn = {1-60132-031-0},
	Keywords = {rare itemset mining; infrequent itemset mining},
	Pages = {141--147},
	Publisher = {CSREA Press},
	Title = {{On Minimal Infrequent Itemset Mining}},
	Year = {2007}}

@proceedings{DBLP:conf/dmin/2007,
	Booktitle = {DMIN},
	Editor = {Stahlbock, Robert and Crone, Sven F and Lessmann, Stefan},
	Isbn = {1-60132-031-0},
	Publisher = {CSREA Press},
	Title = {{Proceedings of the 2007 International Conference on Data Mining, DMIN 2007, June 25-28, 2007, Las Vegas, Nevada, USA}},
	Year = {2007}}

@inproceedings{cong04,
	Address = {New York, NY, USA},
	Author = {Cong, Gao and Tung, Anthony K H and Xu, Xin and Pan, Feng and Yang, Jiong},
	Booktitle = {Proceedings of the 2004 ACM SIGMOD International Conference on Management of Data},
	Doi = {http://doi.acm.org/10.1145/1007568.1007587},
	Isbn = {1-58113-859-8},
	Pages = {143--154},
	Publisher = {ACM},
	Series = {SIGMOD '04},
	Title = {{FARMER: finding interesting rule groups in microarray datasets}},
	Url = {http://doi.acm.org/10.1145/1007568.1007587},
	Year = {2004},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1007568.1007587}}

@article{eisen98,
	Abstract = {A system of cluster analysis for genome-wide expression data from DNA microarray hybridization is described that uses standard statistical algorithms to arrange genes according to similarity in pattern of gene expression. The output is displayed graphically, conveying the clustering and the underlying expression data simultaneously in a form intuitive for biologists. We have found in the budding yeast Saccharomyces cerevisiae that clustering gene expression data groups together efficiently genes of known similar function, and we find a similar tendency in human data. Thus patterns seen in genome-wide expression experiments can be interpreted as indications of the status of cellular processes. Also, coexpression of genes of known function with poorly characterized or novel genes may provide a simple means of gaining leads to the functions of many genes for which information is not available currently.},
	Author = {Eisen, Michael B and Spellman, Paul T and Brown, Patrick O and Botstein, David},
	Journal = {Proceedings of the National Academy of Sciences},
	Keywords = {yeastDataset},
	Number = {25},
	Pages = {14863--14868},
	Title = {{Cluster analysis and display of genome-wide expression patterns}},
	Url = {http://www.pnas.org/content/95/25/14863.abstract},
	Volume = {95},
	Year = {1998},
	Bdsk-Url-1 = {http://www.pnas.org/content/95/25/14863.abstract}}

@article{soinov03,
	Abstract = {


BACKGROUND:

Microarray experiments are generating datasets that can help in reconstructing gene networks. One of the most important problems in network reconstruction is finding, for each gene in the network, which genes can affect it and how. We use a supervised learning approach to address this question by building decision-tree-related classifiers, which predict gene expression from the expression data of other genes.


RESULTS:

We present algorithms that work for continuous expression levels and do not require a priori discretization. We apply our method to publicly available data for the budding yeast cell cycle. The obtained classifiers can be presented as simple rules defining gene interrelations. In most cases the extracted rules confirm the existing knowledge about cell-cycle gene expression, while hitherto unknown relationships can be treated as new hypotheses.


CONCLUSIONS:

All the relations between the considered genes are consistent with the facts reported in the literature. This indicates that the approach presented here is valid and that the resulting rules can be used as elements for building and explaining gene networks.


},
	Author = {Soinov, Lev and Krestyaninova, Maria and Brazma, Alvis},
	Doi = {10.1186/gb-2003-4-1-r6},
	Issn = {1465-6906},
	Journal = {Genome Biology},
	Keywords = {yeastDataset},
	Number = {1},
	Pages = {R6},
	Title = {{Towards reconstruction of gene networks from expression data by supervised learning}},
	Url = {http://genomebiology.com/2003/4/1/R6},
	Volume = {4},
	Year = {2003},
	Bdsk-Url-1 = {http://genomebiology.com/2003/4/1/R6},
	Bdsk-Url-2 = {http://dx.doi.org/10.1186/gb-2003-4-1-r6}}

@article{brown00,
	Abstract = {We introduce a method of functionally classifying genes by using gene expression data from DNA microarray hybridization experiments. The method is based on the theory of support vector machines (SVMs). SVMs are considered a supervised computer learning method because they exploit prior knowledge of gene function to identify unknown genes of similar function from expression data. SVMs avoid several problems associated with unsupervised clustering methods, such as hierarchical clustering and self-organizing maps. SVMs have many mathematical features that make them attractive for gene expression analysis, including their flexibility in choosing a similarity function, sparseness of solution when dealing with large data sets, the ability to handle large feature spaces, and the ability to identify outliers. We test several SVMs that use different similarity metrics, as well as some other supervised learning methods, and find that the SVMs best identify sets of genes with a common function using expression data. Finally, we use SVMs to predict functional roles for uncharacterized yeast ORFs based on their expression data.},
	Author = {Brown, Michael P S and Grundy, William Noble and Lin, David and Cristianini, Nello and Sugnet, Charles Walsh and Furey, Terrence S and Ares, Manuel and Haussler, David},
	Doi = {10.1073/pnas.97.1.262},
	Journal = {Proceedings of the National Academy of Sciences},
	Keywords = {yeastDataset},
	Number = {1},
	Pages = {262--267},
	Title = {{Knowledge-based analysis of microarray gene expression data by using support vector machines}},
	Url = {http://www.pnas.org/content/97/1/262.abstract},
	Volume = {97},
	Year = {2000},
	Bdsk-Url-1 = {http://www.pnas.org/content/97/1/262.abstract},
	Bdsk-Url-2 = {http://dx.doi.org/10.1073/pnas.97.1.262}}

@article{zaki10,
	Author = {Zaki, M J and Ramakrishnan, N and Zhao, L},
	Journal = {International Journal of Knowledge Discovery in Bioinformatics (IJKDB)},
	Keywords = {BLOSOM},
	Number = {3},
	Pages = {68--96},
	Publisher = {Citeseer},
	Title = {{Mining Frequent Boolean Expressions: Application to Gene Expression and Regulatory Modeling}},
	Volume = {1},
	Year = {2010}}

@inproceedings{grahne03,
	Annote = {Also as RPI Technical Report 03-04},
	Author = {Grahne, G\"{o}sta and Zhu, Jianfei},
	Booktitle = {FIMI},
	Editor = {Goethals, Bart and Zaki, Mohammed J},
	Publisher = {CEUR Workshop Proceedings},
	Title = {{Efficiently Using Prefix-trees in Mining Frequent Itemsets}},
	Url = {http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-90/},
	Year = {2003},
	Bdsk-Url-1 = {http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-90/}}

@inproceedings{zaki02,
	Author = {Zaki, Mohammed Javeed and Hsiao, Ching-Jiu},
	Booktitle = {Proceedings of the Second SIAM International Conference on Data Mining},
	Editor = {Grossman, Robert L and Han, Jiawei and Kumar, Vipin and Mannila, Heikki and Motwani, Rajeev},
	Isbn = {0-89871-517-2},
	Publisher = {SIAM},
	Title = {{CHARM}: An Efficient Algorithm for Closed Itemset Mining},
	Year = {2002}}

@inproceedings{agrawal93,
	Address = {New York, NY, USA},
	Author = {Agrawal, Rakesh and Imieli\'nski, Tomasz and Swami, Arun},
	Booktitle = {SIGMOD '93: Proceedings of the 1993 ACM SIGMOD International Conference on Management of Data},
	Doi = {http://doi.acm.org/10.1145/170035.170072},
	Isbn = {0-89791-592-5},
	Pages = {207--216},
	Publisher = {ACM},
	Title = {{Mining association rules between sets of items in large databases}},
	Year = {1993},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/170035.170072}}

@inproceedings{agrawal94,
	Address = {San Francisco, CA, USA},
	Author = {Agrawal, Rakesh and Srikant, Ramakrishnan},
	Booktitle = {VLDB '94: Proceedings of the 20th International Conference on Very Large Data Bases},
	Isbn = {1-55860-153-8},
	Pages = {487--499},
	Publisher = {Morgan Kaufmann Publishers Inc.},
	Title = {{Fast Algorithms for Mining Association Rules in Large Databases}},
	Year = {1994}}

@article{alves09,
	Abstract = {Establishing an association between variables is always of interest
in genomic studies. Generation of DNA microarray gene expression
data introduces a variety of data analysis issues not encountered
in traditional molecular biology or medicine. Frequent pattern mining
(FPM) has been applied successfully in business and scientific data
for discovering interesting association patterns, and is becoming
a promising strategy in microarray gene expression analysis. We review
the most relevant FPM strategies, as well as surrounding main issues
when devising efficient and practical methods for gene association
analysis (GAA). We observed that, so far, scalability achieved by
efficient methods does not imply biological soundness of the discovered
association patterns, and vice versa. Ideally, GAA should employ
a balanced mining model taking into account best practices employed
by methods reviewed in this survey. Integrative approaches, in which
biological knowledge plays an important role within the mining process,
are becoming more reliable.},
	Author = {Alves, Ronnie and Rodriguez-Baena, Domingo S and Aguilar-Ruiz, Jesus S},
	Doi = {10.1093/bib/bbp042},
	Journal = {Briefings in Bioinformatics},
	Pages = {bbp042},
	Title = {{Gene association analysis: a survey of frequent pattern mining from gene expression data}},
	Url = {http://bib.oxfordjournals.org/cgi/content/abstract/bbp042v1},
	Year = {2009},
	Bdsk-Url-1 = {http://bib.oxfordjournals.org/cgi/content/abstract/bbp042v1},
	Bdsk-Url-2 = {http://dx.doi.org/10.1093/bib/bbp042}}

@inproceedings{bastide00a,
	Address = {London, UK},
	Author = {Bastide, Yves and Pasquier, Nicolas and Taouil, Rafik and Stumme, Gerd and Lakhal, Lotfi},
	Booktitle = {CL '00: Proceedings of the First International Conference on Computational Logic},
	Isbn = {3-540-67797-6},
	Pages = {972--986},
	Publisher = {Springer-Verlag},
	Title = {{Mining Minimal Non-redundant Association Rules Using Frequent Closed Itemsets}},
	Year = {2000}}

@article{bastide00,
	Address = {New York, NY, USA},
	Author = {Bastide, Yves and Taouil, Rafik and Pasquier, Nicolas and Stumme, Gerd and Lakhal, Lotfi},
	Doi = {http://doi.acm.org/10.1145/380995.381017},
	Issn = {1931-0145},
	Journal = {SIGKDD Explorations Newsletter},
	Number = {2},
	Pages = {66--75},
	Publisher = {ACM},
	Title = {{Mining frequent patterns with counting inference}},
	Volume = {2},
	Year = {2000},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/380995.381017}}

@inproceedings{bayardo98,
	Address = {New York, NY, USA},
	Author = {{Bayardo Jr.}, Roberto J},
	Booktitle = {SIGMOD '98: Proceedings of the 1998 ACM SIGMOD International Conference on Management of Data},
	Doi = {http://doi.acm.org/10.1145/276304.276313},
	Isbn = {0-89791-995-5},
	Pages = {85--93},
	Publisher = {ACM},
	Title = {{Efficiently mining long patterns from databases}},
	Year = {1998},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/276304.276313}}

@article{becquet02,
	Abstract = {BACKGROUND: The association-rules discovery (ARD) technique has yet
to be applied to gene-expression data analysis. Even in the absence
of previous biological knowledge, it should identify sets of genes
whose expression is correlated. The first association-rule miners
appeared six years ago and proved efficient at dealing with sparse
and weakly correlated data. A huge international research effort
has led to new algorithms for tackling difficult contexts and these
are particularly suited to analysis of large gene-expression matrices.
To validate the ARD technique we have applied it to freely available
human serial analysis of gene expression (SAGE) data. RESULTS: The
approach described here enables us to designate sets of strong association
rules. We normalized the SAGE data before applying our association
rule miner. Depending on the discretization algorithm used, different
properties of the data were highlighted. Both common and specific
interpretations could be made from the extracted rules. In each and
every case the extracted collections of rules indicated that a very
strong co-regulation of mRNA encoding ribosomal proteins occurs in
the dataset. Several rules associating proteins involved in signal
transduction were obtained and analyzed, some pointing to yet-unexplored
directions. Furthermore, by examining a subset of these rules, we
were able both to reassign a wrongly labeled tag, and to propose
a function for an expressed sequence tag encoding a protein of unknown
function. CONCLUSIONS: We show that ARD is a promising technique
that turns out to be complementary to existing gene-expression clustering
techniques.},
	Address = {Equipe Signalisations et identit\'{e}s cellulaires, Centre de G\'{e}n\'{e}tique Mol\'{e}culaire et Cellulaire CNRS UMR 5534, Universit\'{e} Claude Bernard Lyon 1, 16 rue Dubois, F-69622 Villeurbanne cedex, France.},
	Author = {Becquet, C and Blachon, S and Jeudy, B and Boulicaut, J F and Gandrillon, O},
	Doi = {http://dx.doi.org/10.1186/gb-2002-3-12-research0067},
	Issn = {1465-6914},
	Journal = {Genome Biology},
	Number = {12},
	Title = {{Strong-association-rule mining for large-scale gene-expression data analysis: a case study on human SAGE data.}},
	Volume = {3},
	Year = {2002},
	Bdsk-Url-1 = {http://dx.doi.org/10.1186/gb-2002-3-12-research0067}}

@inproceedings{burdick01,
	Address = {Washington, DC, USA},
	Author = {Burdick, Douglas and Calimlim, Manuel and Gehrke, Johannes},
	Booktitle = {Proceedings of the 17th International Conference on Data Engineering},
	Isbn = {0-7695-1001-9},
	Pages = {443--452},
	Publisher = {IEEE Computer Society},
	Title = {{MAFIA: A Maximal Frequent Itemset Algorithm for Transactional Databases}},
	Year = {2001}}

@book{davey02,
	Author = {Davey, B A and Priestley, H A},
	Isbn = {0521784514},
	Publisher = {Cambridge University Press},
	Title = {{Introduction to Lattices and Order}},
	Year = {2002}}

@article{dong05,
	Address = {New York, NY, USA},
	Author = {Dong, Guozhu and Li, Jinyan},
	Doi = {http://dx.doi.org/10.1007/s10115-004-0178-1},
	Issn = {0219-1377},
	Journal = {Knowledge and Information Systems},
	Number = {2},
	Pages = {178--202},
	Publisher = {Springer-Verlag New York, Inc.},
	Title = {{Mining border descriptions of emerging patterns from dataset pairs}},
	Volume = {8},
	Year = {2005},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/s10115-004-0178-1}}

@inproceedings{dong99,
	Address = {New York, NY, USA},
	Author = {Dong, Guozhu and Li, Jinyan},
	Booktitle = {Proceedings of the fifth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD '99},
	Doi = {http://doi.acm.org/10.1145/312129.312191},
	Isbn = {1-58113-143-7},
	Pages = {43--52},
	Publisher = {ACM},
	Title = {{Efficient mining of emerging patterns: discovering trends and differences}},
	Year = {1999},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/312129.312191}}

@article{fayyad96,
	Author = {Fayyad, Usama and Piatetsky-shapiro, Gregory and Smyth, Padhraic},
	Journal = {AI Magazine},
	Pages = {37--54},
	Title = {{From Data Mining to Knowledge Discovery in Databases}},
	Volume = {17},
	Year = {1996}}

@article{fayyad92,
	Address = {Hingham, MA, USA},
	Author = {Fayyad, Usama M and Irani, Keki B},
	Doi = {http://dx.doi.org/10.1023/A:1022638503176},
	Issn = {0885-6125},
	Journal = {Machine Learning},
	Number = {1},
	Pages = {87--102},
	Publisher = {Kluwer Academic Publishers},
	Title = {{On the Handling of Continuous-Valued Attributes in Decision Tree Generation}},
	Volume = {8},
	Year = {1992},
	Bdsk-Url-1 = {http://dx.doi.org/10.1023/A:1022638503176}}

@article{frawley92,
	Author = {Frawley, William J and Piatetsky-Shapiro, Gregory and Matheus, Christopher J},
	Journal = {AI Magazine},
	Number = {3},
	Pages = {57--70},
	Title = {{Knowledge Discovery in Databases: An Overview}},
	Volume = {13},
	Year = {1992}}

@book{ganter97,
	Address = {Secaucus, NJ, USA},
	Annote = {Translator-Franzke, C.},
	Author = {Ganter, Bernhard and Wille, Rudolf},
	Isbn = {3540627715},
	Publisher = {Springer-Verlag New York, Inc.},
	Title = {{Formal Concept Analysis: Mathematical Foundations}},
	Year = {1997}}

@article{geng06,
	Address = {New York, NY, USA},
	Author = {Geng, Liqiang and Hamilton, Howard J},
	Doi = {http://doi.acm.org/10.1145/1132960.1132963},
	Issn = {0360-0300},
	Journal = {ACM Computing Surveys},
	Number = {3},
	Pages = {9},
	Publisher = {ACM},
	Title = {{Interestingness measures for data mining: A survey}},
	Volume = {38},
	Year = {2006},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1132960.1132963}}

@proceedings{2003-fimi,
	Annote = {Also as RPI Technical Report 03-04},
	Author = {Goethals, Bart and Zaki, Mohammed J},
	Editor = {Goethals, Bart and Zaki, Mohammed J},
	Publisher = {CEUR Workshop Proceedings},
	Title = {{Proceedings of the 1st IEEE ICDM Workshop on Frequent Itemset Mining Implementations}},
	Url = {http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-90/},
	Year = {2003},
	Bdsk-Url-1 = {http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-90/}}

@article{fimi03,
	Author = {Goethals, Bart and Zaki, Mohammed J},
	Journal = {SIGKDD Explorations Newsletter},
	Month = jun,
	Number = {1},
	Pages = {109--117},
	Title = {{Advances in Frequent Itemset Mining Implementations: report on {FIMI'03}}},
	Volume = {6},
	Year = {2004}}

@article{gouda05,
	Address = {Hingham, MA, USA},
	Author = {Gouda, Karam and Zaki, Mohammed J},
	Doi = {http://dx.doi.org/10.1007/s10618-005-0002-x},
	Issn = {1384-5810},
	Journal = {Data Mining and Knowledge Discovery},
	Number = {3},
	Pages = {223--242},
	Publisher = {Kluwer Academic Publishers},
	Title = {{GenMax: An Efficient Algorithm for Mining Maximal Frequent Itemsets}},
	Volume = {11},
	Year = {2005},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/s10618-005-0002-x}}

@article{hamrouni09,
	Address = {Amsterdam, The Netherlands, The Netherlands},
	Author = {Hamrouni, T and {Ben Yahia}, S and {Mephu Nguifo}, E},
	Doi = {http://dx.doi.org/10.1016/j.datak.2009.05.001},
	Issn = {0169-023X},
	Journal = {Data and Knowledge Engineering},
	Number = {10},
	Pages = {1091--1111},
	Publisher = {Elsevier Science Publishers B. V.},
	Title = {{Sweeping the disjunctive search space towards mining new exact concise representations of frequent itemsets}},
	Volume = {68},
	Year = {2009},
	Bdsk-Url-1 = {http://dx.doi.org/10.1016/j.datak.2009.05.001}}

@article{han07,
	Author = {Han, J and Cheng, H and Xin, D and Yan, X},
	Date-Modified = {2012-06-11 21:50:39 +1000},
	Editor = {Springer},
	Journal = {Data Mining and Knowledge Discovery},
	Keywords = {data mining, Frequent pattern mining, frequent closed itemsets, frequent itemset mining},
	Number = {1},
	Pages = {55--86},
	Title = {{Frequent Pattern Mining: Current Status and Future Directions}},
	Volume = {15},
	Year = {2007}}

@inproceedings{han00,
	Address = {New York, NY, USA},
	Author = {Han, Jiawei and Pei, Jian and Yin, Yiwen},
	Booktitle = {SIGMOD '00: Proceedings of the 2000 ACM SIGMOD International Conference on Management of Data},
	Doi = {http://doi.acm.org/10.1145/342009.335372},
	Isbn = {1-58113-217-4},
	Pages = {1--12},
	Publisher = {ACM},
	Title = {{Mining frequent patterns without candidate generation}},
	Year = {2000},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/342009.335372}}

@article{han04,
	Address = {Hingham, MA, USA},
	Author = {Han, Jiawei and Pei, Jian and Yin, Yiwen and Mao, Runying},
	Doi = {http://dx.doi.org/10.1023/B:DAMI.0000005258.31418.83},
	Issn = {1384-5810},
	Journal = {Data Mining and Knowledge Discovery},
	Number = {1},
	Pages = {53--87},
	Publisher = {Kluwer Academic Publishers},
	Title = {{Mining Frequent Patterns without Candidate Generation: A Frequent-Pattern Tree Approach}},
	Volume = {8},
	Year = {2004},
	Bdsk-Url-1 = {http://dx.doi.org/10.1023/B:DAMI.0000005258.31418.83}}

@inproceedings{li05,
	Address = {New York, NY, USA},
	Author = {Li, Haiquan and Li, Jinyan and Wong, Limsoon and Feng, Mengling and Tan, Yap-Peng},
	Booktitle = {{PODS '05: Proceedings of the twenty-fourth ACM SIGMOD-SIGACT-SIGART symposium on Principles of database systems}},
	Doi = {http://doi.acm.org/10.1145/1065167.1065215},
	Isbn = {1-59593-062-0},
	Pages = {368--377},
	Publisher = {ACM},
	Title = {{Relative risk and odds ratio: a data mining perspective}},
	Year = {2005},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1065167.1065215}}

@inproceedings{li07,
	Address = {New York, NY, USA},
	Author = {Li, Jinyan and Liu, Guimei and Wong, Limsoon},
	Booktitle = {Proceedings of the 13th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD '07},
	Doi = {http://doi.acm.org/10.1145/1281192.1281240},
	Isbn = {978-1-59593-609-7},
	Pages = {430--439},
	Publisher = {ACM},
	Title = {{Mining statistically important equivalence classes and delta-discriminative emerging patterns}},
	Year = {2007},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1281192.1281240}}

@article{lim00,
	Address = {Hingham, MA, USA},
	Author = {Lim, Tjen-Sien and Loh, Wei-Yin and Shih, Yu-Shan},
	Date-Modified = {2012-07-12 11:56:04 +1000},
	Doi = {10.1023/A:1007608224229},
	Issn = {0885-6125},
	Journal = {Machine Learning},
	Keywords = {classification tree, decision tree, neural net, statistical classifier},
	Number = {3},
	Numpages = {26},
	Pages = {203--228},
	Publisher = {Kluwer Academic Publishers},
	Title = {{A Comparison of Prediction Accuracy, Complexity, and Training Time of Thirty-Three Old and New Classification Algorithms}},
	Url = {http://dx.doi.org/10.1023/A:1007608224229},
	Volume = {40},
	Year = {2000},
	Bdsk-Url-1 = {http://dx.doi.org/10.1023/A:1007608224229}}

@article{liu09,
	Address = {New York, NY, USA},
	Author = {Liu, Hongyan and Wang, Xiaoyu and He, Jun and Han, Jiawei and Xin, Dong and Shao, Zheng},
	Doi = {http://dx.doi.org/10.1016/j.ins.2008.11.033},
	Issn = {0020-0255},
	Journal = {Information Sciences},
	Number = {7},
	Pages = {899--924},
	Publisher = {Elsevier Science Inc.},
	Title = {{Top-down mining of frequent closed patterns from very high dimensional data}},
	Volume = {179},
	Year = {2009},
	Bdsk-Url-1 = {http://dx.doi.org/10.1016/j.ins.2008.11.033}}

@inproceedings{liu08,
	Address = {Washington, DC, USA},
	Author = {Liu, Lu and Chen, Yin and Shan, Siqing and Yin, Lu},
	Booktitle = {FSKD '08: Proceedings of the 2008 Fifth International Conference on Fuzzy Systems and Knowledge Discovery},
	Doi = {http://dx.doi.org/10.1109/FSKD.2008.8},
	Isbn = {978-0-7695-3305-6},
	Pages = {591--595},
	Publisher = {IEEE Computer Society},
	Title = {{Mining Condensed and Lossless Association Rules by Pruning Redundancy}},
	Year = {2008},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/FSKD.2008.8}}

@phdthesis{lucchese08,
	Address = {Italy},
	Author = {Lucchese, Claudio},
	School = {Universit\`{a} Ca'\/ Foscari di Venezia},
	Title = {{High performance closed itemsets mining inspired by emerging computer architectures}},
	Year = {2008}}

@inproceedings{nanavati01,
	Address = {New York, NY, USA},
	Author = {Nanavati, Amit A and Chitrapura, Krishna P and Joshi, Sachindra and Krishnapuram, Raghu},
	Booktitle = {CIKM '01: Proceedings of the Tenth International Conference on Information and Knowledge Management},
	Doi = {http://doi.acm.org/10.1145/502585.502666},
	Isbn = {1-58113-436-3},
	Pages = {482--489},
	Publisher = {ACM},
	Title = {{Mining generalised disjunctive association rules}},
	Year = {2001},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/502585.502666}}

@article{omiecinski03,
	Address = {Los Alamitos, CA, USA},
	Author = {Omiecinski, Edward R},
	Doi = {http://doi.ieeecomputersociety.org/10.1109/TKDE.2003.1161582},
	Issn = {1041-4347},
	Journal = {IEEE Transactions on Knowledge and Data Engineering},
	Pages = {57--69},
	Publisher = {IEEE Computer Society},
	Title = {{Alternative Interest Measures for Mining Associations in Databases}},
	Volume = {15},
	Year = {2003},
	Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/TKDE.2003.1161582}}

@inproceedings{owens09,
	Address = {New York, NY, USA},
	Author = {Owens, Clifford Conley and Murali, T M and Ramakrishnan, Naren},
	Booktitle = {SAC '09: Proceedings of the 2009 ACM symposium on Applied Computing},
	Doi = {http://doi.acm.org/10.1145/1529282.1529609},
	Isbn = {978-1-60558-166-8},
	Pages = {1467--1474},
	Publisher = {ACM},
	Title = {{Capturing truthiness: mining truth tables in binary datasets}},
	Year = {2009},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1529282.1529609}}

@article{pal05,
	Address = {Oxford, UK},
	Author = {Pal, Ranadip and Datta, Aniruddha and Fornace, Albert J and Bittner, Michael L and Dougherty, Edward R},
	Doi = {http://dx.doi.org/10.1093/bioinformatics/bti214},
	Issn = {1367-4803},
	Journal = {Bioinformatics},
	Number = {8},
	Pages = {1542--1549},
	Publisher = {Oxford University Press},
	Title = {{Boolean relationships among genes responsive to ionizing radiation in the NCI 60 ACDS}},
	Volume = {21},
	Year = {2005},
	Bdsk-Url-1 = {http://dx.doi.org/10.1093/bioinformatics/bti214}}

@inproceedings{pan03,
	Author = {Pan, Feng and Cong, Gao and Tung, Anthony K H and Yang, Jiong and Zaki, Mohammed},
	Booktitle = {Proceedings of the ninth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
	Date-Modified = {2012-07-12 11:58:03 +1000},
	Title = {{CARPENTER}: Finding Closed Patterns in Long Biological Datasets},
	Year = {2003}}

@inproceedings{pasquier99,
	Address = {London, UK},
	Author = {Pasquier, Nicolas and Bastide, Yves and Taouil, Rafik and Lakhal, Lotfi},
	Booktitle = {ICDT '99: Proceedings of the 7th International Conference on Database Theory},
	Isbn = {3-540-65452-6},
	Pages = {398--416},
	Publisher = {Springer-Verlag},
	Title = {{Discovering Frequent Closed Itemsets for Association Rules}},
	Year = {1999}}

@article{pasquier05,
	Address = {Hingham, MA, USA},
	Author = {Pasquier, Nicolas and Taouil, Rafik and Bastide, Yves and Stumme, Gerd and Lakhal, Lotfi},
	Doi = {http://dx.doi.org/10.1007/s10844-005-0266-z},
	Issn = {0925-9902},
	Journal = {Journal of Intelligent Information Systems},
	Number = {1},
	Pages = {29--60},
	Publisher = {Kluwer Academic Publishers},
	Title = {{Generating a Condensed Representation for Association Rules}},
	Volume = {24},
	Year = {2005},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/s10844-005-0266-z}}

@inproceedings{pei00,
	Address = {Dallas, TX, USA},
	Author = {Pei, Jian and Han, Jiawei and Mao, Runying},
	Booktitle = {{ACM} {SIGMOD} Workshop on Research Issues in Data Mining and Knowledge Discovery},
	Title = {{{CLOSET}: An efficient algorithm for mining frequent closed itemsets}},
	Year = {2000}}

@inproceedings{rimsa09,
	Address = {Berlin, Heidelberg},
	Author = {Rimsa, Andrei and Z\'{a}rate, Luis E and Song, Mark A},
	Booktitle = {ICFCA '09: Proceedings of the 7th International Conference on Formal Concept Analysis},
	Doi = {http://dx.doi.org/10.1007/978-3-642-01815-2\_15},
	Isbn = {978-3-642-01814-5},
	Pages = {194--209},
	Publisher = {Springer-Verlag},
	Title = {{Handling Large Formal Context Using BDD --- Perspectives and Limitations}},
	Year = {2009},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/978-3-642-01815-2%5C_15}}

@inproceedings{rioult03,
	Address = {New York, NY, USA},
	Author = {Rioult, Fran\c{c}ois and Boulicaut, Jean-Fran\c{c}ois and Cr\'{e}milleux, Bruno and Besson, J\'{e}r\'{e}my},
	Booktitle = {DMKD '03: Proceedings of the 8th ACM SIGMOD workshop on Research issues in data mining and knowledge discovery},
	Doi = {http://doi.acm.org/10.1145/882082.882099},
	Pages = {73--79},
	Publisher = {ACM},
	Title = {{Using transposition for pattern discovery from microarray data}},
	Year = {2003},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/882082.882099}}

@article{sahoo08,
	Author = {Sahoo, Debashis and Dill, David L and Gentles, Andrew J and Tibshirani, Robert and Plevritis, Sylvia K},
	Date-Modified = {2012-07-12 11:59:58 +1000},
	Doi = {http://dx.doi.org/10.1186/gb-2008-9-10-r157},
	Issn = {1465-6906},
	Journal = {Genome Biology},
	Pages = {R157+},
	Title = {{Boolean implication networks derived from large scale, whole genome microarray datasets}},
	Volume = {9},
	Year = {2008},
	Bdsk-Url-1 = {http://dx.doi.org/10.1186/gb-2008-9-10-r157}}

@article{saigo07,
	Address = {Oxford, UK},
	Author = {Saigo, Hiroto and Uno, Takeaki and Tsuda, Koji},
	Doi = {http://dx.doi.org/10.1093/bioinformatics/btm353},
	Issn = {1367-4803},
	Journal = {Bioinformatics},
	Number = {18},
	Pages = {2455--2462},
	Publisher = {Oxford University Press},
	Title = {{Mining complex genotypic features for predicting HIV-1 drug resistance}},
	Volume = {23},
	Year = {2007},
	Bdsk-Url-1 = {http://dx.doi.org/10.1093/bioinformatics/btm353}}

@inproceedings{savasere98,
	Address = {Washington, DC, USA},
	Author = {Savasere, Ashoka and Omiecinski, Edward and Navathe, Shamkant B},
	Booktitle = {ICDE '98: Proceedings of the Fourteenth International Conference on Data Engineering},
	Isbn = {0-8186-8289-2},
	Pages = {494--502},
	Publisher = {IEEE Computer Society},
	Title = {{Mining for Strong Negative Associations in a Large Database of Customer Transactions}},
	Year = {1998}}

@article{silverstein98,
	Address = {Hingham, MA, USA},
	Author = {Silverstein, Craig and Brin, Sergey and Motwani, Rajeev},
	Doi = {http://dx.doi.org/10.1023/A:1009713703947},
	Issn = {1384-5810},
	Journal = {Data Mining and Knowledge Discovery},
	Number = {1},
	Pages = {39--68},
	Publisher = {Kluwer Academic Publishers},
	Title = {{Beyond Market Baskets: Generalizing Association Rules to Dependence Rules}},
	Volume = {2},
	Year = {1998},
	Bdsk-Url-1 = {http://dx.doi.org/10.1023/A:1009713703947}}

@inproceedings{srikant95,
	Address = {San Francisco, CA, USA},
	Author = {Srikant, Ramakrishnan and Agrawal, Rakesh},
	Booktitle = {VLDB '95: Proceedings of the 21th International Conference on Very Large Data Bases},
	Isbn = {1-55860-379-4},
	Pages = {407--419},
	Publisher = {Morgan Kaufmann Publishers Inc.},
	Title = {{Mining Generalized Association Rules}},
	Year = {1995}}

@article{stumme02,
	Address = {Amsterdam, The Netherlands, The Netherlands},
	Author = {Stumme, Gerd and Taouil, Rafik and Bastide, Yves and Pasquier, Nicolas and Lakhal, Lotfi},
	Doi = {http://dx.doi.org/10.1016/S0169-023X(02)00057-5},
	Issn = {0169-023X},
	Journal = {Data and Knowledge Engineering},
	Number = {2},
	Pages = {189--222},
	Publisher = {Elsevier Science Publishers B. V.},
	Title = {{Computing iceberg concept lattices with TITANIC}},
	Volume = {42},
	Year = {2002},
	Bdsk-Url-1 = {http://dx.doi.org/10.1016/S0169-023X(02)00057-5}}

@inproceedings{szathmary08,
	Address = {Berlin, Heidelberg},
	Author = {Szathmary, Laszlo and Valtchev, Petko and Napoli, Amedeo and Godin, Robert},
	Booktitle = {DS '08: Proceedings of the 11th International Conference on Discovery Science},
	Doi = {http://dx.doi.org/10.1007/978-3-540-88411-8\_15},
	Isbn = {978-3-540-88410-1},
	Pages = {136--147},
	Publisher = {Springer-Verlag},
	Title = {{Constructing Iceberg Lattices from Frequent Closures Using Generators}},
	Year = {2008},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/978-3-540-88411-8%5C_15}}

@book{tan05,
	Address = {Boston, MA, USA},
	Author = {Tan, Pang-Ning and Steinbach, Michael and Kumar, Vipin},
	Isbn = {0321321367},
	Publisher = {Addison-Wesley Longman Publishing Co., Inc.},
	Title = {{Introduction to Data Mining}},
	Year = {2005}}

@article{terlecki07,
	Address = {New York, NY, USA},
	Author = {Terlecki, Pawel and Walczak, Krzysztof},
	Doi = {http://dx.doi.org/10.1016/j.ins.2007.07.018},
	Issn = {0020-0255},
	Journal = {Information Sciences},
	Number = {24},
	Pages = {5675--5690},
	Publisher = {Elsevier Science Inc.},
	Title = {{Jumping emerging patterns with negation in transaction databases - Classification and discovery}},
	Volume = {177},
	Year = {2007},
	Bdsk-Url-1 = {http://dx.doi.org/10.1016/j.ins.2007.07.018}}

@article{thabtha07,
	Abstract = { ABSTRACT Associative classification mining is a promising approach
in data mining that utilizes the association rule discovery techniques
to construct classification systems, also known as associative classifiers.
In the last few years, a number of associative classification algorithms
have been proposed, i.e. CPAR, CMAR, MCAR, MMAC and others. These
algorithms employ several different rule discovery, rule ranking,
rule pruning, rule prediction and rule evaluation methods. This paper
focuses on surveying and comparing the state-of-the-art associative
classification techniques with regards to the above criteria. Finally,
future directions in associative classification, such as incremental
learning and mining low-quality data sets, are also highlighted in
this paper. },
	Author = {Thabtah, Fadi},
	Doi = {10.1017/S0269888907001026},
	Journal = {The Knowledge Engineering Review},
	Number = {01},
	Pages = {37--65},
	Title = {{A review of associative classification mining}},
	Url = {http://journals.cambridge.org/action/displayAbstract?fromPage=online\&aid=1029324\&fulltextType=RA\&fileId=S0269888907001026},
	Volume = {22},
	Year = {2007},
	Bdsk-Url-1 = {http://journals.cambridge.org/action/displayAbstract?fromPage=online%5C&aid=1029324%5C&fulltextType=RA%5C&fileId=S0269888907001026},
	Bdsk-Url-2 = {http://dx.doi.org/10.1017/S0269888907001026}}

@inproceedings{uno05,
	Address = {New York, NY, USA},
	Author = {Uno, Takeaki and Kiyomi, Masashi and Arimura, Hiroki},
	Booktitle = {OSDM '05: Proceedings of the 1st international workshop on open source data mining},
	Doi = {http://doi.acm.org/10.1145/1133905.1133916},
	Isbn = {1-59593-210-0},
	Pages = {77--86},
	Publisher = {ACM},
	Title = {{LCM ver.3: collaboration of array, bitmap and prefix tree for frequent itemset mining}},
	Year = {2005},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1133905.1133916}}

@article{varadan06,
	Abstract = {Computational analysis of gene expression data from microarrays has
been useful for medical diagnosis and prognosis. The ability to analyze
such data at the level of biological modules, rather than individual
genes, has been recognized as important for improving our understanding
of disease-related pathways. It has proved difficult, however, to
infer pathways from microarray data by deriving modules of multiple
synergistically interrelated genes, rather than individual genes.
Here we propose a systems-based approach called Entropy Minimization
and Boolean Parsimony (EMBP) that identifies, directly from gene
expression data, modules of genes that are jointly associated with
disease. Furthermore, the technique provides insight into the underlying
biomolecular logic by inferring a logic function connecting the joint
expression levels in a gene module with the outcome of disease. Coupled
with biological knowledge, this information can be useful for identifying
disease-related pathways, suggesting potential therapeutic approaches
for interfering with the functions of such pathways. We present an
example providing such gene modules associated with prostate cancer
from publicly available gene expression data, and we successfully
validate the results on additional independently derived data. Our
results indicate a link between prostate cancer and cellular damage
from oxidative stress combined with inhibition of apoptotic mechanisms
normally triggered by such damage.},
	Address = {Department of Electrical Engineering and Center for Computational Biology and Bioinformatics (C2B2), Columbia University, New York, New York, USA.},
	Author = {Varadan, Vinay and Anastassiou, Dimitris},
	Doi = {http://dx.doi.org/10.1371/journal.pcbi.0020068},
	Issn = {1553-7358},
	Journal = {PLoS Computational Biology},
	Month = jun,
	Number = {6},
	Title = {{Inference of disease-related molecular logic from systems-based microarray analysis.}},
	Volume = {2},
	Year = {2006},
	Bdsk-Url-1 = {http://dx.doi.org/10.1371/journal.pcbi.0020068}}

@misc{guardian,
	Annote = {{A}ccessed 6 July 2010},
	Author = {Wray, Richard},
	Month = may,
	Publisher = {\emph{The Guardian}},
	Title = {{Internet data heads for 500bn gigabytes: World's digital content equivalent to stack of books stretching from Earth to Pluto 10 times}},
	Url = {http://www.guardian.co.uk/business/2009/may/18/digital-content-expansion},
	Year = {2009},
	Bdsk-Url-1 = {http://www.guardian.co.uk/business/2009/may/18/digital-content-expansion}}

@article{wu04,
	Address = {New York, NY, USA},
	Author = {Wu, Xindong and Zhang, Chengqi and Zhang, Shichao},
	Doi = {http://doi.acm.org/10.1145/1010614.1010616},
	Issn = {1046-8188},
	Journal = {ACM Transactions on Information Systems},
	Number = {3},
	Pages = {381--405},
	Publisher = {ACM},
	Title = {{Efficient mining of both positive and negative association rules}},
	Volume = {22},
	Year = {2004},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1010614.1010616}}

@article{zaki04,
	Address = {Hingham, MA, USA},
	Author = {Zaki, Mohammed J},
	Doi = {http://dx.doi.org/10.1023/B:DAMI.0000040429.96086.c7},
	Issn = {1384-5810},
	Journal = {Data Mining and Knowledge Discovery},
	Number = {3},
	Pages = {223--248},
	Publisher = {Kluwer Academic Publishers},
	Title = {{Mining Non-Redundant Association Rules}},
	Volume = {9},
	Year = {2004},
	Bdsk-Url-1 = {http://dx.doi.org/10.1023/B:DAMI.0000040429.96086.c7}}

@article{zaki00,
	Author = {Zaki, Mohammed J},
	Doi = {10.1109/69.846291},
	Journal = {IEEE Transactions on Knowledge and Data Engineering},
	Number = {3},
	Pages = {372--390},
	Title = {{Scalable Algorithms for Association Mining}},
	Volume = {12},
	Year = {2000},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/69.846291}}

@phdthesis{zhao06a,
	Address = {Troy, NY, USA},
	Annote = {Adviser-Zaki, Mohammed Javeed},
	Author = {Zhao, Lizhuang},
	Date-Modified = {2012-07-12 12:03:03 +1000},
	Isbn = {978-0-549-04423-9},
	School = {Rensselaer Polytechnic Institute},
	Title = {{Mining subspace and Boolean patterns from data}},
	Year = {2006}}

@inproceedings{zhao06,
	Address = {New York, NY, USA},
	Author = {Zhao, Lizhuang and Zaki, Mohammed J and Ramakrishnan, Naren},
	Booktitle = {Proceedings of the 12th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD'06},
	Date-Modified = {2012-07-12 12:03:27 +1000},
	Doi = {http://doi.acm.org/10.1145/1150402.1150511},
	Isbn = {1-59593-339-5},
	Pages = {827--832},
	Publisher = {ACM},
	Title = {{{BLOSOM}: a framework for mining arbitrary Boolean expressions}},
	Year = {2006},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1150402.1150511}}

@article{thummalapenta11,
	Abstract = {To improve software quality, static or dynamic defect-detection tools accept programming rules as 
input and detect their violations in software as defects. As these programming rules are often not well documented 
in practice, previous work developed various approaches that mine programming rules as frequent patterns from program source code. 
Then these approaches use static or dynamic defect-detection techniques to detect pattern violations in source code under analysis. 
However, these existing approaches often produce many false positives due to various factors. 
To reduce false positives produced by these mining approaches, we develop a novel approach, called Alattin, that includes new mining 
algorithms and a technique for detecting neglected conditions based on our mining algorithm. Our new mining algorithms mine patterns 
in four pattern formats: conjunctive, disjunctive, exclusive-disjunctive, and combinations of these patterns. 
We show the benefits and limitations of these four pattern formats with respect to false positives and false negatives among 
detected violations by applying those patterns to the problem of detecting neglected conditions.},
	Annote = {10.1007/s10515-011-0086-z},
	Author = {Thummalapenta, Suresh and Xie, Tao},
	Issn = {0928-8910},
	Journal = {Automated Software Engineering},
	Pages = {1--31},
	Publisher = {Springer Netherlands},
	Title = {{Alattin: mining alternative patterns for defect detection}},
	Url = {http://dx.doi.org/10.1007/s10515-011-0086-z},
	Year = {2011},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/s10515-011-0086-z}}

@inproceedings{goel10,
	Abstract = {We propose a novel technique to mine powerful and generalized boolean relations among flip-flops in a sequential circuit for sequential equivalence checking. In contrast to traditional learning methods, our mining algorithm can detect inductive invariants as well as illegal state cubes. These invariants can be arbitrary boolean expressions and can thus prune a large don't care space during equivalence checking. Experimental results demonstrate that these general invariants can be very effective for sequential equivalence checking of circuits with no or very few equivalent signals between them, with low computational costs.},
	Author = {Goel, N and Hsiao, M S and Ramakrishnan, N and Zaki, M J},
	Booktitle = {Proceedings of the 19th IEEE Asian Test Symposium},
	Doi = {10.1109/ATS.2010.81},
	Issn = {1081-7735},
	Keywords = {flip-flops;mining complex Boolean expressions;sequ},
	Pages = {442--447},
	Title = {{Mining Complex Boolean Expressions for Sequential Equivalence Checking}},
	Year = {2010},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/ATS.2010.81}}