rentrez icon indicating copy to clipboard operation
rentrez copied to clipboard

Bug fix: entrez_search doesn't run for rettype = "count"

Open maia-sh opened this issue 3 years ago • 1 comments

I am trying to run entrez_search for rettype = "count" and received an error that Error in ans[[1]] : subscript out of bounds. I did some debugging and see this is an error in internal functions behind entrez_search. I'll paste my debugging and suggestions for changing the functions (parse_esearch,parse_esearch.XMLInternalDocument, print.esearch) below. I could also submit a pull request if you'd like, though I'm sure it needs some edits, since I'm new to classes/methods.

library(rentrez)
library(XML)

term <- "aquilegia[TITLE]"
db <- "pubmed"
retmode <- "xml"
usehistory <- FALSE
use_history <- FALSE
config <- NULL
retmax <- 0

# Runs
entrez_search(db = db,
              term = term,
              usehistory = usehistory
)

# Doesn't run: Error in ans[[1]] : subscript out of bounds
entrez_search(db = db,
              term = term,
              usehistory = usehistory,
              rettype = "count"
)

# Looking into entrez_search

# 1) Check for issue with API query --> No issue
# Runs
response_uilist <- 
  rentrez:::make_entrez_query("esearch", db = db, term = term, 
                              config = config, retmode = retmode, 
                              usehistory = usehistory
)

# Runs
response_count <-
  rentrez:::make_entrez_query("esearch", db = db, term = term, 
                              config = config, retmode = retmode, 
                              usehistory = usehistory, rettype = "count"
)

# 2) Check for issue with response parsing --> No issue
# Runs
parsed_uilist <- 
  rentrez:::parse_response(response_uilist, retmode)

# Runs
parsed_count <- 
  rentrez:::parse_response(response_count, retmode)

# 3) Check for issue with esearch parsing

# Doesn't run
rentrez:::parse_esearch(parsed_uilist, history = use_history)

# Runs
rentrez:::parse_esearch.XMLInternalDocument(parsed_uilist, history = use_history)

# Doesn't run
rentrez:::parse_esearch(parsed_count, history = use_history)

# Doesn't run: Error in ans[[1]] : subscript out of bounds
rentrez:::parse_esearch.XMLInternalDocument(parsed_count, history = use_history)

# Doesn't run: Error: object of type 'externalptr' is not subsettable
rentrez:::parse_esearch.list(parsed_count, history = use_history)

# 4) Look into rentrez:::parse_esearch.XMLInternalDocument
x <- parsed_count

# Here's the issue
res <- list(ids = xpathSApply(x, "//IdList/Id", xmlValue), 
            count = as.integer(xmlValue(x[["/eSearchResult/Count"]])), 
            retmax = as.integer(xmlValue(x[["/eSearchResult/RetMax"]])), 
            QueryTranslation = xmlValue(x[["/eSearchResult/QueryTranslation"]]), 
            file = x)

# Here's what's needed
count = as.integer(xmlValue(x[["/eSearchResult/Count"]]))
res <- list( count    = as.integer(xmlValue(x[["/eSearchResult/Count"]])),
                  file     = x)

# Edit functions
parse_esearch <- function(x, history, rettype = NULL) UseMethod("parse_esearch")

parse_esearch.XMLInternalDocument <- function(x, history, rettype = NULL){
  if(rettype == "count"){
    res <- list( count    = as.integer(xmlValue(x[["/eSearchResult/Count"]])),
                 file     = x)
  } else {
    res <- list( ids      = xpathSApply(x, "//IdList/Id", xmlValue),
                 count    = as.integer(xmlValue(x[["/eSearchResult/Count"]])),
                 retmax   = as.integer(xmlValue(x[["/eSearchResult/RetMax"]])),
                 QueryTranslation   = xmlValue(x[["/eSearchResult/QueryTranslation"]]),
                 file     = x)
    
    if(history){
      res$web_history = web_history(
        QueryKey = xmlValue(x[["/eSearchResult/QueryKey"]]),
        WebEnv   = xmlValue(x[["/eSearchResult/WebEnv"]])
      )
    }
  }
  class(res) <- c("esearch", "list")
  return(res)
}

# Test function
parsed_esearch <- parse_esearch.XMLInternalDocument(parsed_count, history = use_history, rettype = "count")

# Also adapt print function
x <- parsed_esearch

print.esearch <- function(x, ...){
  
  if(all(c(names(x)) %in% c("count", "file"))){
    msg <- paste("Entrez search result with", x$count, "hits")
  } else {
    
    display_term <- if(nchar(x$QueryTranslation) > 50){
      paste(substr(x$QueryTranslation, 1, 50), "...")
    } else x$QueryTranslation
    cookie_word <- if("web_history" %in% names(x)) "a" else "no"
    msg<- paste("Entrez search result with", x$count, "hits (object contains",
                length(x$ids), "IDs and", cookie_word, 
                "web_history object)\n Search term (as translated): "  , display_term, "\n")
  }
    
  cat(msg)
}

print.esearch(parsed_esearch)

maia-sh avatar Aug 21 '20 21:08 maia-sh

Hi @maia-sh ,

Thanks for this, and getting into the mind-boggling world of the way these classes and functions interact with each other .

These solutions look sensible to me, do you think you can get them into a pull request? if you do so, also let me know the details you'd like to be included for you as a contributor (name email etc).

dwinter avatar Sep 29 '20 00:09 dwinter