Skip to content

Commit

Permalink
Many changes
Browse files Browse the repository at this point in the history
  • Loading branch information
cangermueller committed May 17, 2015
1 parent b4caba7 commit 9a39738
Show file tree
Hide file tree
Showing 80 changed files with 3,729 additions and 694 deletions.
11 changes: 5 additions & 6 deletions R/argparse.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@

library(argparse)

ap <- ArgumentParser(description='Concatenates several files')
ap$add_argument('files', metavar='FILE', nargs='+', help='Input files to be concatenated')
ap$add_argument('--output-file', dest='outfile', metavar='FILE', help='Output file')
ap$add_argument('--verbose', action='store_true', help='Be verbose')
ap$add_argument('--level', dtype='integer', default='100', help='Some number')
p <- ArgumentParser(description='Description')
p$add_argument('in_file', help='Input file')
p$add_argument('-o', '--out_file', help='Output file')
p$add_argument('--verbose', action='store_true', help='More detailed log messages')

args <- ap$parse_args(commandArgs(TRUE))
args <- p$parse_args(commandArgs(TRUE))
print(args)
16 changes: 16 additions & 0 deletions R/biomart.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
library(biomaRt)

mart <- useMart('ensembl')
mart <- useDataset('mmusculus_gene_ensembl', mart=mart)
attr <- c('ensembl_gene_id', 'mgi_id', 'mgi_symbol')
q <- getBM(attr=attr, mart=mart)

getBM(attr=, mart=)
attr=c('ensembl_gene_id') // retrieved fields
filter=c('ensembl_gene_id') // filter fields
value=c('ENSMUSG00000064336') // filter ensembl_gene_id == ENSMUSG00000064336

listMarts()
listDatasets()
listAttributes()
listFilters()
27 changes: 27 additions & 0 deletions R/brewer.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,30 @@
'RdYlGn'
colorRampPalette(pal)(200) // interpolate color palette to 200 colors



brewer_cols <- function(pal='Spectral', rev=F) {
ncol <- brewer.pal.info[pal, 'maxcolors']
col <- colorRampPalette(brewer.pal(ncol, pal))(50)
if (rev) {
col <- rev(col)
}
return (col)
}

## Sequential
cols <- list()
cols[[length(cols)+1]] <- brewer_cols('YlGnBu')
cols[[length(cols)+1]] <- brewer_cols('YlGn')
cols[[length(cols)+1]] <- brewer_cols('Reds')
cols[[length(cols)+1]] <- brewer_cols('RdPu')
cols[[length(cols)+1]] <- brewer_cols('Purples')
cols[[length(cols)+1]] <- brewer_cols('PuBuGn')
cols[[length(cols)+1]] <- brewer_cols('PuBu')
cols[[length(cols)+1]] <- brewer_cols('OrRd')
cols[[length(cols)+1]] <- brewer_cols('Greens')
cols[[length(cols)+1]] <- brewer_cols('GnBu')
cols[[length(cols)+1]] <- brewer_cols('BuPu')
cols[[length(cols)+1]] <- brewer_cols('BuGn')
cols[[length(cols)+1]] <- brewer_cols('Blues')
cols[[length(cols)+1]] <- brewer_cols('Spectral', rev=T)
7 changes: 5 additions & 2 deletions R/clust.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,11 @@ use=
- method=single/complete/average
- hc$order // order or rows
* plot(hc, ...)
xlab=NA, ylab=NA, main=NA, sub=NA

xlab=NA
ylab=NA
main=NA
sub=NA

* members = cutree(hc, k=, h=)
- return cluster assignment for hierachical clustering
- k: number of clusters
Expand Down
22 changes: 22 additions & 0 deletions R/cor.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
cor(M, method=, use=, ...)
* between columns of M
method=pearson, spearman, kendall
use=
* everything: NA produce column with NA
* all: NA throws error
* complete.obs: remove rows that contain >0 NA (all samples)
* pairwise.complete.obs: remove only rows for pairwise comparison (different rows for different pairs)

# cor to dist
dist = 0.5 * (1 - abs(r))

# cor.test
cor.test(x, y, ...)
alternative=two.sided (default), less, greater

# weighted
library(weights)
weighted.mean(x) // default stats package
wtd.var(x, w=NULL)
wtd.cor(x, y, w=NULL)

23 changes: 17 additions & 6 deletions R/corrplot.txt
Original file line number Diff line number Diff line change
@@ -1,16 +1,27 @@
# corrplot
corrplot(matrix, method=, ...)
# methods
color

# parameters
is.corr=FALSE
order=c('origional|hclust') // hclust correct?
method=color, num
is.corr=FALSE // if matrix is not correlation matrix (e.g. distance)
order='original'
hclust: hclust(as.dist(matrix)) // which is WRONG if matrix is correlation matrix
addrect=5 // rectangles, only if order='hclust'
tl.col='black' // color labels
col=color_palette // c(col, col) do obtain full range
cl.lim=c(0, 1)

# correplot.mixed
corrplot.mixed(matrix, lower='number', upper='circle')
square, circle, ellipse, shade, color, number

# colors
col1 = colorRampPalette(c('white', 'blue'))
col=col1(200)

# If m is correlation matrix
corrplot(m)
plot(hclust(as.dist(1 - m)))

# if m is distance matrix
corrplot(m, is.corr=F)
plot(hclust(as.dist(m)))

124 changes: 100 additions & 24 deletions R/dplyr.txt
Original file line number Diff line number Diff line change
@@ -1,42 +1,106 @@
# misc
plyr: only data.frames
library(plyr) // load plyr first
library(dplyr)
options(...)
dplyr.width=200 // output width tbl_df in pixels
dplyr.print_min=10 // minimum # records to print; default=10

# tbl_df
* removes rownames -> use add_rownames('name')
tbl = tbl_df(df) // convert to tbl_df; nicer print
print(tbl, n=10)
tbl_df %>% print(n=10) // print first 10 rows

filter(df, c1=v1, c2=v2|v3, c3 %in% c(v1, v2)) // select rows
# data_frame
* creates tbl_df
* no rownames
* special column names
data_frame(a=1:10, b=a*2, 'a+b'=a+b) %>% glimpse

# printing / inspecting
print(...)
n=10 // 10 lines
width=400 // 400px width
width=Inf // all columns
glimpse // similar to str


# select
select(df, c1, c2) // select columns
c1:c10 // between
-(c1:c10) // except
starts_with('prefix')
ends_with('suffix')
contains('substr')
matches('pattern')
arrange(df, c1, desc(c2)) // sort by multiple columns
c1:c10 // between
-(c1:c10) // except
c1=mpg, c2=drat // select and rename
starts_with('prefix')
ends_with('suffix')
contains('substr')
matches('pattern')
one_of(c('c1', 'c2')) // select by vector

# filter
filter(c1=1, c2>2)
between(c1, min, max)
!is.na(c1)

# mutate, transmutate
mutate(df, c = f(c1, c2)) // add or mutate columns
transmutate(c=f(c1)) // like mutate, but only keeps new columns

# misc
filter(df, c1=v1, c2=v2|v3, c3 %in% c(v1, v2)) // select rows
arrange(df, c1, desc(c2)) // sort by multiple columns
* df most be data.frame, not tbl_df!!
rename(df, new=old) // rename columns; unquoted
slice(start:end, c(i, j, k)) // select rows by index
add_rownames('c_rownames') // rownames -> column 'c_rownames'

# joins
left_join(df, by=)
right_join()
inner_join()
outer_join()
left_join(df, ...) // in a; fill NA
by=c('c1', 'c2')
by=c('left_c1'='right_c1') // different column names
right_join() // in b; fill NA
inner_join() // in a AND b
## filtering
* filter records in a; do not add additional columns
semi_join() // only show records from a that match b
anti_join() // only show records from a that do NOT match b


# group_by / summarise
group_by(columns) %>%
summarise(c=f(c), ...) // aggregate columns
group_by(columns) %>%
summarise_each(funs(f1, f2), columns) // apply same function(s) on column(s)
group_by(columns) %>%
summarise(n=n(), n_val=n_distinct(c)) // counts # records
group_by(columns) %>% // count # records
tally()
group_by(columns) %>%
summarise_each(funs(mean(., na.rm=T)), matches('pattern'))
group_by(columns) %>%
top_n(2, wt = column) // select top n records from group by column
summarise(c=f(c), ...) // aggregate columns
summarise_each(funs(f1, f2), columns) // apply same function(s) on column(s)
summarise_each(funs(mean(., na.rm=T)), matches('pattern'))
summarise(n=n(), n_val=n_distinct(c)) // counts # records

## functions
group_by(id) %>% n_groups() // # groups
group_by(id) %>% summarise(n=n()) // # records in group
group_by(.) %>% mutate(c=f(c)) // group-specific modifications
count(id) // # records in group
tally(var, ...) // == summarise(n=sum(var))
sort=T // sort by n
wt=col // weight records by col; summarize(n=sum(col))
count(...) // group_by(col) %>% tally
slice(start:end, c(i, j)) // select by index
slice(1) // select single group member
head(3) // first 3 record (does not always work!)
top_n(3, id) // select top n ordered by id in DESCENDING order
* returns MULTIPLE records, if id not unique!
sample_n(3) // sample three records of each group
distinct // unique members; more efficient than unique

## sorting
d %>% group_by(k) %>% arrange(...) // sort within groups
d %>% group_by(k) %>% summarize %>% ungroup %>% arrange(...) // sort globally
d %>% group_by(k1, k2) %>% summarize(cs=f(c)) %>% arrange(cs)
* summarize removes one group level(g2)
* sort by cs for each group in k1

## do
group_by(...) %>% do(f(.))
f is function that returns data.frame
. is current group as data.frame
data %>$ group_by(id) %>% do(data.frame(c1=sd(.$c1), c2=sd(.$c2)))


# piping
Expand All @@ -53,6 +117,7 @@ lag(v, n = 1), lead(v, n = 1)
* v - lag(v) // change to previous value
sample_n(v, n, replace = F, weights = NULL) // sample n rows
sample_frac(v, frac, ...) // sample frac % of rows
ntile(v, n) // assign each v to one of n bins; same #points in each bin


# Database support
Expand All @@ -62,3 +127,14 @@ sample_frac(v, frac, ...) // sample frac % of rows
my_db = src_sqlite('my_db.sqlite3')
table = tbl(my_db, 'table')

# Standard evaluation function
function() // non-standard evaluation
function_() // standard evaluation

# Strings/character in function
fun_()
filter_('c1 > 10')
rename_('new'='old')
arrange_('c1', 'desc(c2)')


8 changes: 6 additions & 2 deletions R/general.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
options('name') // get
options(name=value) // set

# Exception handling
results <- tryCatch(f(), error = function(x) return (y))
* return f(), or y if f() fails


# Representing missing values
* NA
Expand Down Expand Up @@ -39,9 +43,9 @@ Sys.setenv(env)
* installed.packages()
* available.packages()
* remove.packages('pkg') // uninstall package
* update.packages() // update all packages in library
* update.packages(ask=F, checkBuilt=T) // update all packages in library
* ask=F // do not ask
* checkBuild=T // built with R 2.15; but now R 3.0 -> check
* checkBuilt=T // built with R 2.15; but now R 3.0 -> check
* chooseCRANmirror()
* R CMD INSTALL <pkg directory> // install source package manually
* library, require(name): load packages
Expand Down
Loading

0 comments on commit 9a39738

Please sign in to comment.