R Markdown

tmptext = readLines("./test1.txt")

quote_checker = function(tmptext) {
  id_open = stringr::str_locate_all(tmptext, "“")
  id_close = stringr::str_locate_all(tmptext, "”")
  
  fixed_text = sapply(1:length(tmptext), function(i) {
    fixed_text_line = tmptext[i]
    
    if (length(id_open[[i]][, 1]) != length(id_close[[i]][, 1])) {
      message("# line ", i, " quotes not paired, skipped")
    }
    else {
      quoted_words = data.frame(openquote = id_open[[i]][, 1],
                                closequote = id_close[[i]][, 1])
      
      line_s2c = stringr::str_split(tmptext[i], "")[[1]]
      
      if (any(quoted_words$openquote > quoted_words$closequote)) {
        
        id = which(quoted_words$openquote > quoted_words$closequote)
        
        message("## line:", i, "wrong quote detected: ", length(id) )
        
        for (j in id) {
          message("### ", paste0(line_s2c[quoted_words$closequote[j]:quoted_words$openquote[j]], collapse =
                                  ""))
        }
        
        line_s2c[quoted_words$closequote[id]] = "“"
        line_s2c[quoted_words$openquote[id]] = "”"
        
        fixed_text_line = paste0(line_s2c,collapse="")
      }
    }
    
    return(fixed_text_line)
  })
  
  return(fixed_text)
  
}

  
rslt = quote_checker(tmptext)
## ## line:13wrong quote detected: 1
## ### ”‘原来,你认得。’林冲笑着说“
## ## line:19wrong quote detected: 2
## ### ”非标准运行代码“
## ### ”[聆听社区的声音](https://twitter.com/MilesMcBain/status/1144608295061090306)“
## ## line:21wrong quote detected: 1
## ### ”R 社区是世界上最友好的社区“
## ## line:27wrong quote detected: 1
## ### ”若为自由故,嘛都可以抛“
## ## line:31wrong quote detected: 6
## ### ”净“
## ### ”净“
## ### ”净“
## ### ”净土“
## ### ”数海“
## ### ”净“
## ## line:33wrong quote detected: 1
## ### ”我们的孩子不能白白牺牲“

writeLines(rslt, con = "./test1.fix.txt")
text2 = readLines("./test.txt")
quote_checker(text2)
## ## line:5wrong quote detected: 1
## ### ”test2“
## # line 9 quotes not paired, skipped
## # line 11 quotes not paired, skipped
##  [1] ""          ""          "“test1”"   ""          "“test2”"  
##  [6] ""          "\"test3\"" ""          "”test4”"   ""         
## [11] "“test5“"   ""