forked from blei-lab/hdp
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathprint.topics.R
executable file
·51 lines (43 loc) · 1.23 KB
/
print.topics.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/env Rscript
print.topics <- function(words.fn, vocab.fn, topics.fn, top.n=5)
{
words <- as.matrix(read.table(words.fn, header=FALSE))
vocab <- readLines(vocab.fn, warn=FALSE)
num.topics <- nrow(words)
topics <- NULL
head <- ""
for (k in seq(num.topics))
{
prob <- words[k,]
total <- sum(prob)
prob <- prob/total
s <- sort.int(x=prob, decreasing=TRUE, index.return=TRUE)
top.idx <- s$ix[1:top.n]
topic.prob <- prob[top.idx]
topic.words <- vocab[top.idx]
topics <- cbind(topics, topic.words)
head <- paste(head, sprintf("%50d", k), sep="")
}
write(x=head, file=topics.fn)
for (i in seq(top.n))
{
line <- ""
for (k in seq(num.topics))
{
line <- paste(line, sprintf("%50s", topics[i,k]), sep="")
}
write(x=line, file=topics.fn, append=TRUE)
}
}
args <- commandArgs(TRUE)
if (length(args)<3)
{
cat("./print.topics word.counts.file vocab.file topics.file [top.n, 5 default]\n")
stop("too few parameters")
}
words.fn <- args[1]
vocab.fn <- args[2]
topics.fn <- args[3]
top.n <- 5
if (length(args) >= 4) top.n <- as.integer(args[4])
print.topics(words.fn, vocab.fn, topics.fn, top.n)