【问题标题】:Ignoring errors and continuing to run when executing R script from command line Linux从命令行Linux执行R脚本时忽略错误并继续运行
【发布时间】:2013-07-25 04:48:52
【问题描述】:

我在我的 linux VPS 上运行了以下 R 脚本,它经常返回错误从而中断脚本。我不确定如何围绕这些错误进行编程,并且想知道是否有办法强制脚本继续运行。这些错误通常作为“结果”表的越界错误发生。将代码直接粘贴到 R

library(RMySQL)
library(twitteR)
library(plyr)
library(stringr)
library(sentiment)

Date<-format(Sys.time(),"%Y-%m-%d %H:%M")
Time<-format(Sys.time(),"%H:%M")

tweets.con<-dbConnect(MySQL(),user="xxxxxxxxxxxx",password="xxxxxxxxxxxx",dbname="xxxxxxxxxx",host="xxxxxxxxxxxxxxxxxxxx.com")
Feel<-dbGetQuery(tweets.con,"select `tweet_text` from `tweets` where `created_at` BETWEEN timestamp(DATE_ADD(NOW(), INTERVAL 49 MINUTE)) AND timestamp(DATE_ADD(NOW(), INTERVAL 60 MINUTE))")

length(as.matrix(Feel))
n<-length(as.matrix(Feel))
Total_Count<-length(as.matrix(Feel))

results.con<-dbConnect(MySQL(),user="xxxxxxxxxxx",password="xxxxxxxxxxxxxxxxxx",dbname="xxxxxxxxxxxxxx",host="xxxxxxxxxxxxxxxxxx")
last.results.alt<-dbGetQuery(results.con,"select `Neg_Prop_Alt`,`Neu_Prop_Alt`,`Pos_Prop_Alt`,`neg5_Prop`,`neg4_Prop`,`neg3_Prop`,`neg2_Prop`,`neg1_Prop`,`zero_Prop`,`pos1_Prop`,`pos2_Prop`,`pos3_Prop`,`pos4_Prop`,`pos5_Prop` from `results_10m_alt` ORDER BY Date DESC LIMIT 1")

# function score.sentiment
score.sentiment = function(sentences, pos.words, neg.words, .progress='none')
{
   # Parameters
   # sentences: vector of text to score
   # pos.words: vector of words of postive sentiment
   # neg.words: vector of words of negative sentiment
   # .progress: passed to laply() to control of progress bar

   # create simple array of scores with laply
   scores = laply(sentences,
   function(sentence, pos.words, neg.words)
   {

# remove retweet entities
sentence = gsub("(RT|via)((?:\\b\\W*@\\w+)+)", "", sentence)
# remove at people
sentence = gsub("@\\w+", "", sentence)
# remove punctuation
sentence = gsub("[[:punct:]]", "", sentence)
# remove numbers
sentence = gsub("[[:digit:]]", "", sentence)
# remove control characters
sentence = gsub("[[:cntrl:]]", "", sentence)
# remove html links
sentence = gsub("http\\w+", "", sentence)
# remove unnecessary spaces
sentence = gsub("[ \t]{2,}", "", sentence)
sentence = gsub("^\\s+|\\s+$", "", sentence)

      # define error handling function when trying tolower
      tryTolower = function(x)
      {
         # create missing value
         y = NA
         # tryCatch error
         try_error = tryCatch(tolower(x), error=function(e) e)
         # if not an error
         if (!inherits(try_error, "error"))
         y = tolower(x)
         # result
         return(y)
      }
      # use tryTolower with sapply 
      sentence = sapply(sentence, tryTolower)

      # split sentence into words with str_split (stringr package)
      word.list = str_split(sentence, "\\s+")
      words = unlist(word.list)

      # compare words to the dictionaries of positive & negative terms
      pos.matches = match(words, pos.words)
      neg.matches = match(words, neg.words)

      # get the position of the matched term or NA
      # we just want a TRUE/FALSE
      pos.matches = !is.na(pos.matches)
      neg.matches = !is.na(neg.matches)

      # final score
      score = sum(pos.matches) - sum(neg.matches)
      return(score)
      }, pos.words, neg.words, .progress=.progress )

   # data frame with scores for each sentence
   scores.df = data.frame(text=sentences, score=scores)
   return(scores.df)
}

# import positive and negative words
pos = readLines("/home/jgraab/R/scripts/positive_words.txt")
neg = readLines("/home/jgraab/R/scripts/negative_words.txt")

Feel_txt = sapply(Feel, function(x) gettext(x))

scores.df = score.sentiment(Feel_txt, pos, neg, .progress='text')
results<-table(scores.df[,2])+.0001

#Set Table Defaults
Neg_Count_Alt<-0
Neg_Prop_Alt<-0
Neg_Change_Alt<-0
Neu_Count_Alt<-0
Neu_Prop_Alt<-0
Neu_Change_Alt<-0
Pos_Count_Alt<-0
Pos_Prop_Alt<-0
Pos_Change_Alt<-0
neg5_Count<-0
neg5_Prop<-0
neg5_Change<-0
neg4_Count<-0
neg4_Prop<-0
neg4_Change<-0
neg3_Count<-0
neg3_Prop<-0
neg3_Change<-0
neg2_Count<-0
neg2_Prop<-0
neg2_Change<-0
neg1_Count<-0
neg1_Prop<-0
neg1_Change<-0
zero_Count<-0
zero_Prop<-0
zero_Change<-0
pos1_Count<-0
pos1_Prop<-0
pos1_Change<-0
pos2_Count<-0
pos2_Prop<-0
pos2_Change<-0
pos3_Count<-0
pos3_Prop<-0
pos3_Change<-0
pos4_Count<-0
pos4_Prop<-0
pos4_Change<-0
pos5_Count<-0
pos5_Prop<-0
pos5_Change<-0

#Get Table Results
neg5_Count<-results[["-5"]]
neg5_Prop<-neg5_Count/Total_Count
neg5_Change<-(neg5_Prop-as.numeric(last.results.alt[[4]]))/as.numeric(last.results.alt[[4]])*100
neg4_Count<-results[["-4"]]
neg4_Prop<-neg4_Count/Total_Count
neg4_Change<-(neg4_Prop-as.numeric(last.results.alt[[5]]))/as.numeric(last.results.alt[[5]])*100
neg3_Count<-results[["-3"]]
neg3_Prop<-neg3_Count/Total_Count
neg3_Change<-(neg3_Prop-as.numeric(last.results.alt[[6]]))/as.numeric(last.results.alt[[6]])*100
neg2_Count<-results[["-2"]]
neg2_Prop<-neg2_Count/Total_Count
neg2_Change<-(neg2_Prop-as.numeric(last.results.alt[[7]]))/as.numeric(last.results.alt[[7]])*100
neg1_Count<-results[["-1"]]
neg1_Prop<-neg1_Count/Total_Count
neg1_Change<-(neg1_Prop-as.numeric(last.results.alt[[8]]))/as.numeric(last.results.alt[[8]])*100
zero_Count<-results[["0"]]
zero_Prop<-zero_Count/Total_Count
zero_Change<-(zero_Prop-as.numeric(last.results.alt[[9]]))/as.numeric(last.results.alt[[9]])*100
pos1_Count<-results[["1"]]
pos1_Prop<-pos1_Count/Total_Count
pos1_Change<-(pos1_Prop-as.numeric(last.results.alt[[10]]))/as.numeric(last.results.alt[[10]])*100
pos2_Count<-results[["2"]]
pos2_Prop<-pos2_Count/Total_Count
pos2_Change<-(pos2_Prop-as.numeric(last.results.alt[[11]]))/as.numeric(last.results.alt[[11]])*100
pos3_Count<-results[["3"]]
pos3_Prop<-pos3_Count/Total_Count
pos3_Change<-(pos3_Prop-as.numeric(last.results.alt[[12]]))/as.numeric(last.results.alt[[12]])*100
pos4_Count<-results[["4"]]
pos4_Prop<-pos4_Count/Total_Count
pos4_Change<-(pos4_Prop-as.numeric(last.results.alt[[13]]))/as.numeric(last.results.alt[[13]])*100
pos5_Count<-results[["5"]]
Pos5_Prop<-pos5_Count/Total_Count
Pos5_Change<-(pos5_Prop-as.numeric(last.results.alt[[14]]))/as.numeric(last.results.alt[[14]])*100

#Get Negative, Neutral, and Positive Totals
Neg_Count_Alt<-neg5_Count+neg4_Count+neg3_Count+neg2_Count+neg1_Count
Neg_Prop_Alt<-Neg_Count_Alt/Total_Count
Neg_Change_Alt<-(Neg_Prop_Alt-as.numeric(last.results.alt[[1]]))/as.numeric(last.results.alt[[1]])*100
Neu_Count_Alt<-zero_Count
Neu_Prop_Alt<-Neu_Count_Alt/Total_Count
Neu_Change_Alt<-(Neu_Prop_Alt-as.numeric(last.results.alt[[2]]))/as.numeric(last.results.alt[[2]])*100
Pos_Count_Alt<-pos1_Count+pos2_Count+pos3_Count+pos4_Count+pos5_Count
Pos_Prop_Alt<-Pos_Count_Alt/Total_Count
Pos_Change_Alt<-(Pos_Prop_Alt-as.numeric(last.results.alt[[3]]))/as.numeric(last.results.alt[[3]])*100

Mean<-(-5*neg5_Count-4*neg4_Count-3*neg3_Count-2*neg2_Count-neg1_Count+pos1_Count+2*pos2_Count+3*pos3_Count+4*pos4_Count+5*pos5_Count)/Total_Count

Feel_alt.df<-data.frame(Date,Time,Total_Count,Mean,Neg_Count_Alt,Neg_Prop_Alt,Neg_Change_Alt,Neu_Count_Alt,Neu_Prop_Alt,Neu_Change_Alt,Pos_Count_Alt,Pos_Prop_Alt,Pos_Change_Alt,
neg5_Count,neg5_Prop,neg5_Change,neg4_Count,neg4_Prop,neg4_Change,neg3_Count,neg3_Prop,neg3_Change,neg2_Count,neg2_Prop,neg2_Change,neg1_Count,neg1_Prop,neg1_Change,
zero_Count,zero_Prop,zero_Change,pos1_Count,pos1_Prop,pos1_Change,pos2_Count,pos2_Prop,pos2_Change,pos3_Count,pos3_Prop,pos3_Change,pos4_Count,pos4_Prop,pos4_Change,pos5_Count,pos5_Prop,pos5_Change)

dbWriteTable(results.con,name="results_10m_alt",Feel_alt.df,append=T,overwrite=F,row.names=F)

【问题讨论】:

  • 看看evaluate 包,也许knitr 使用evaluate

标签: r error-handling try-catch


【解决方案1】:

使用trytryCatch(前者更简单,通常你只需要)。您稍后已经在使用tryCatch,因此也可以使用它来处理有问题的查询。

【讨论】:

    【解决方案2】:

    这样的代码有很多错误是正常的!!

    1. 避免定义变量并使用 list 或 data.frame 结构来聚合您的结果。
    2. 使用小函数将代码分成小部分。这将有助于在之后使用trycatch

    脚本结构

    你的程序应该是这样的:

      data <- load.tweets()                      ## read inputs
      scores <- score.sentiment(data,...))       ## clean data/extract info
      ratios <- compute.ratios(scores,data,...)) ## analysis
      save.results(ratios,data,...))             ## save results
    

    例如,这里我尝试分解 2 个部分:

    获取表格结果

    我在这里使用mapply,因为您对所有系数重复相同的语句。

    compute.ratios <- function(){
      mapply(function(x,y){
        pos5_Count <-results[[x]]
        Pos5_Prop <- pos5_Count/Total_Count
        val <- as.numeric(last.results.alt[[y]]) ## you should check that val !=0
        Pos5_Change <- (pos5_Prop-val)/val*100
      },names(results),seq_along(last.results.alt))
    }
    

    定义一个函数来加载数据

    load.tweets <- function(){
    tweets.con <- dbConnect(MySQL(),user="xxxxxxxxxxxx",password="xxxxxxxxxxxx",
                                    dbname="xxxxxxxxxx",host="xxxxxxxxxxxxxxxxxxxx.com")
    Feel <- dbGetQuery(tweets.con,"SELECT `tweet_text` 
                                 FROM `tweets` 
                                 WHERE `created_at` 
                                 BETWEEN timestamp(DATE_ADD(NOW(), INTERVAL 49 MINUTE)) 
                                 AND timestamp(DATE_ADD(NOW(), INTERVAL 60 MINUTE))")
    
    
    n <- length(as.matrix(Feel))
    Total_Count<- n
    
    results.con<-dbConnect(MySQL(),user="xxxxxxxxxxx",password="xxxxxxxxxxxxxxxxxx",
                                   dbname="xxxxxxxxxxxxxx",host="xxxxxxxxxxxxxxxxxx")
    last.results.alt <- dbGetQuery(results.con,"SELECT `Neg_Prop_Alt`,`Neu_Prop_Alt`,
                                                       `Pos_Prop_Alt`,`neg5_Prop`,`neg4_Prop`,
                                                       `neg3_Prop`,`neg2_Prop`,`neg1_Prop`,
                                                        `zero_Prop`,`pos1_Prop`,`pos2_Prop`,`pos3_Prop`,
                                                        `pos4_Prop`,`pos5_Prop` 
                                               FROM `results_10m_alt` 
                                               ORDER BY Date DESC LIMIT 1")
    
    list(Feel=Feel,last.results.alt =last.results.alt )
    }
    

    【讨论】:

      【解决方案3】:

      您可以使用evaluate 包。 knitr 使用 evaluate 包来处理 literate 文档。

      我们可以使用knit来使用evaulate

      一个简单的例子是脚本test.r

      x <- -1:5
      a <- 'a'
      x <- x + a
      print(x)
      

      我们可以使用`kintr

      library(knitr)
      knit(text = paste('```{r}',source('test.r'),'```',collapse = '\n'))
       ## Error in x + a : non-numeric argument to binary operator
       # note the error has occured, but evalulation proceeded on
       # and x is defined
       x
       ## [1] -1  0  1  2  3  4  5
      

      以基本方式使用evaluate

       # cleaning up to show that x will be redefined
       rm(x)
      evaluate(input = paste(readLines('test.r')))
      
      ## [[1]]
      ## $src
      ## [1] "x <- -1:5\n"
      ## 
      ## attr(,"class")
      ## [1] "source"
      ## 
      ## [[2]]
      ## $src
      ## [1] "a <- 'a'\n"
      ## 
      ## attr(,"class")
      ## [1] "source"
      ## 
      ## [[3]]
      ## $src
      ## [1] "x <- x + a\n"
      ## 
      ## attr(,"class")
      ## [1] "source"
      ## 
      ## [[4]]
      ## <simpleError in x + a: non-numeric argument to binary operator>
      ## 
      ## [[5]]
      ## $src
      ## [1] "print(x)"
      ## 
      ## attr(,"class")
      ## [1] "source"
      ## 
      ## [[6]]
      ## [1] "[1] -1  0  1  2  3  4  5\n"
      
      # and x is defined!
      x
      ## [1] -1  0  1  2  3  4  5
      

      【讨论】:

        猜你喜欢
        • 2012-08-26
        • 2020-01-14
        • 1970-01-01
        • 1970-01-01
        • 2016-12-15
        • 1970-01-01
        • 2012-03-12
        • 2017-10-31
        • 2010-10-18
        相关资源
        最近更新 更多