diff --git a/NEWS.md b/NEWS.md index 2e04d03..ce6a704 100755 --- a/NEWS.md +++ b/NEWS.md @@ -2,8 +2,12 @@ - `read.csv()` now uses `encoding = "UTF-8"` to better deal with non-ascii characters. +- Setting locale in queries via the `hl`argument now returns data (@marcf-91). For example, `gtrends(keyword = "Macron", geo = "FR", hl = "fr")`. + - It was difficult to maintain an up-to-date database of all country codes supported by Google because they do not provide such a list. `gtrends()` now only checks the syntax structure of the entered code. +- New Feature: `gtrends()` as a new parameter `compared_breakdown`. When set to `TRUE`, then the relative hits across the keywords will be returned. Can only be used if one `geo` is used conjointly with more than one keyword. For example: `head(gtrends(keyword = c("nhl", "nba"), geo = "CA", compared_breakdown = TRUE)$interest_by_region)`. + # gtrendsR 1.4.8 - Skip internet-based tests on CRAN that were found to randomly fail on Debian machine and locally under Ubuntu-latest (#384). diff --git a/R/gtrends.R b/R/gtrends.R index 3599421..7b589c2 100755 --- a/R/gtrends.R +++ b/R/gtrends.R @@ -33,11 +33,16 @@ #' @param hl A string specifying the ISO language code (ex.: \dQuote{en-US} or #' \dQuote{fr}). Default is \dQuote{en-US}. Note that this is only influencing #' the data returned by related topics. -#' -#' @param tz A number specifying the minutes the returned dates should be offset to UTC. -#' Note the parameter 'time' above is specified in UTC. -#' E.g. choosing "time=2018-01-01T01 2018-01-01T03" and "tz=-120" will yield data between 2018-01-01T03 and 2018-01-01T05, -#' i.e. data specified to be in UTC+2. +#' +#' @param tz A number specifying the minutes the returned dates should be offset +#' to UTC. Note the parameter 'time' above is specified in UTC. E.g. choosing +#' "time=2018-01-01T01 2018-01-01T03" and "tz=-120" will yield data between +#' 2018-01-01T03 and 2018-01-01T05, i.e. data specified to be in UTC+2. +#' +#' @param compared_breakdown Logical. Should compare breakdown the results by +#' city and subregion? Can only be used if one `geo` is used conjointly with +#' more than one keyword. If `TRUE`, then the relative hits across the +#' keywords will be returned. `FALSE` by default. #' #' @param low_search_volume Logical. Should include low search volume regions? #' @@ -45,7 +50,7 @@ #' Default should work in general; should only be changed by advanced users. #' #' @param onlyInterest If you only want the interest over time set it to TRUE. -#' +#' #' @section Categories: The package includes a complete list of categories that #' can be used to narrow requests. These can be accessed using #' \code{data("categories")}. @@ -92,7 +97,6 @@ #' gtrends(c("NHL", "NFL"), time = "today+5-y") # last five years (default) #' gtrends(c("NHL", "NFL"), time = "all") # since 2004 #' -#' #' ## Custom date format #' #' gtrends(c("NHL", "NFL"), time = "2010-01-01 2010-04-03") @@ -106,6 +110,11 @@ #' #' head(gtrends("NHL", hl = "en")$related_topics) #' head(gtrends("NHL", hl = "fr")$related_topics) +#' +#' ## Compared breakdown +#' head(gtrends(keyword = c("nhl", "nba"), geo = "CA", compared_breakdown = FALSE)$interest_by_region) +#' head(gtrends(keyword = c("nhl", "nba"), geo = "CA", compared_breakdown = TRUE)$interest_by_region) +#' #' } #' @export gtrends <- function( @@ -115,6 +124,7 @@ gtrends <- function( gprop = c("web", "news", "images", "froogle", "youtube"), category = 0, hl = "en-US", + compared_breakdown = FALSE, low_search_volume = FALSE, cookie_url = "http://trends.google.com/Cookies/NID", tz=0, # This equals UTC @@ -147,17 +157,6 @@ gtrends <- function( } } - # if (geo != "" && - # !all(geo %in% - # c( - # as.character(countries[, "country_code"]), - # as.character(countries[, "sub_code"]) - # ))) { - # stop("Country code not valid. Please use 'data(countries)' to retrieve valid codes.", - # call. = FALSE - # ) - # } - ## Check if valid category if (!all(category %in% categories[, "id"])) { stop( @@ -171,6 +170,10 @@ gtrends <- function( stop("Cannot parse the supplied time format.", call. = FALSE) } + if (compared_breakdown & (length(geo) != 1 | length(keyword) == 1)) { + stop("`compared breakdown` can be only used with one geo and multiple keywords.", call. = FALSE) + } + if(!(is.numeric(tz))){ if (tz %in% OlsonNames()){ tz <- map_tz2min(tz) @@ -212,7 +215,7 @@ gtrends <- function( interest_over_time <- interest_over_time(widget, comparison_item,tz) if(!onlyInterest){ - interest_by_region <- interest_by_region(widget, comparison_item, low_search_volume,tz) + interest_by_region <- interest_by_region(widget, comparison_item, low_search_volume,compared_breakdown, tz) related_topics <- related_topics(widget, comparison_item, hl,tz) related_queries <- related_queries(widget, comparison_item,tz,hl) res <- list( diff --git a/R/zzz.R b/R/zzz.R index 56cfbcb..3bc9ebd 100755 --- a/R/zzz.R +++ b/R/zzz.R @@ -370,27 +370,18 @@ interest_over_time <- function(widget, comparison_item,tz) { } -interest_by_region <- function(widget, comparison_item, low_search_volume,tz) { - i <- which(grepl("geom_map", widget$id, ignore.case = TRUE) == TRUE) +interest_by_region <- function(widget, comparison_item, low_search_volume, compared_breakdown, tz) { + i <- which(grepl("geo_map", widget$id, ignore.case = TRUE) == TRUE) if (length(i) == 0) { return(list(NULL)) } - ## Interest by region need to be retrieved individually - - # resolution <- sub(".* (\\w+)$", "\\1", widget$title[i]) - # resolution[resolution == "subregion"] <- "region" - # resolution[resolution == "metro"] <- "dma" - - # resolution <- c(resolution, rep(c("city", "dma"), each = length(resolution))) - - ## resolution <- expand.grid(i, c(ifelse( grepl("world", na.omit(widget$geo)), "country", "region" ), "city", "dma"), stringsAsFactors = FALSE) - + resolution <- unique(resolution) i <- resolution$Var1 @@ -406,14 +397,14 @@ interest_by_region <- function(widget, comparison_item, low_search_volume,tz) { # resolution[grepl("world", na.omit(widget$geo))] <- "country" resolution <- toupper(resolution) - res <- - mapply( - create_geo_payload, - i, - resolution, - MoreArgs = list(widget = widget, low_search_volume = low_search_volume, tz = tz), - SIMPLIFY = FALSE - ) +res <- + mapply( + create_geo_payload, + i, + resolution, + MoreArgs = list(widget = widget, low_search_volume = low_search_volume, compared_breakdown = compared_breakdown, tz = tz), + SIMPLIFY = FALSE + ) ## Remove duplicated ii <- !duplicated(res) @@ -432,7 +423,7 @@ interest_by_region <- function(widget, comparison_item, low_search_volume,tz) { } -create_geo_payload <- function(i, widget, resolution, low_search_volume,tz) { +create_geo_payload <- function(i, widget, resolution, compared_breakdown, low_search_volume, tz) { payload2 <- list() payload2$locale <- unique(na.omit(widget$request$locale)) payload2$comparisonItem <- widget$request$comparisonItem[[i]] @@ -442,21 +433,21 @@ create_geo_payload <- function(i, widget, resolution, low_search_volume,tz) { payload2$requestOptions$category <- widget$request$requestOptions$category[i] payload2$geo <- as.list((widget$request$geo[i, , drop = FALSE])) payload2$includeLowSearchVolumeGeos <- low_search_volume + + # If we want compared breakdown, it will return the relative hits per + # region/city when multiple keywords are provided. + + if (compared_breakdown) { + payload2$dataMode = "PERCENTAGES" + } - url <- paste0(URLencode("https://www.google.com/trends/api/widgetdata/comparedgeo/csv?req="), - URLencode(jsonlite::toJSON(payload2, auto_unbox = T,null="list"),reserved = TRUE), - URLencode(paste0("&token=",widget$token[i],"&tz=",tz,"&hl=en-US"))) - - # url <- URLencode(paste0( - # "https://www.google.com/trends/api/widgetdata/comparedgeo/csv?req=", - # jsonlite::toJSON(payload2, auto_unbox = T,null="list"), - # "&token=", widget$token[i], - # "&tz=",tz,"&hl=en-US" - # )) - - # url <- encode_keyword(url) - # VY. use the handler with proxy options. + url <- paste0( + URLencode("https://www.google.com/trends/api/widgetdata/comparedgeo/csv?req="), + URLencode(jsonlite::toJSON(payload2, auto_unbox = T, null = "list"), reserved = TRUE), + URLencode(paste0("&token=", widget$token[i], "&tz=", tz, "&hl=en-US")) + ) + res <- curl::curl_fetch_memory(url, handle = .pkgenv[["cookie_handler"]]) if (res$status_code != 200) { diff --git a/man/gtrends.Rd b/man/gtrends.Rd index 527704e..df48be1 100755 --- a/man/gtrends.Rd +++ b/man/gtrends.Rd @@ -11,6 +11,7 @@ gtrends( gprop = c("web", "news", "images", "froogle", "youtube"), category = 0, hl = "en-US", + compared_breakdown = FALSE, low_search_volume = FALSE, cookie_url = "http://trends.google.com/Cookies/NID", tz = 0, @@ -48,15 +49,20 @@ using \code{gtrends("NHL", c("CA", "US"))}.} \dQuote{fr}). Default is \dQuote{en-US}. Note that this is only influencing the data returned by related topics.} +\item{compared_breakdown}{Logical. Should compare breakdown the results by +city and subregion? Can only be used if one `geo` is used conjointly with +more than one keyword. If `TRUE`, then the relative hits across the +keywords will be returned. `FALSE` by default.} + \item{low_search_volume}{Logical. Should include low search volume regions?} \item{cookie_url}{A string specifying the URL from which to obtain cookies. Default should work in general; should only be changed by advanced users.} -\item{tz}{A number specifying the minutes the returned dates should be offset to UTC. -Note the parameter 'time' above is specified in UTC. -E.g. choosing "time=2018-01-01T01 2018-01-01T03" and "tz=-120" will yield data between 2018-01-01T03 and 2018-01-01T05, -i.e. data specified to be in UTC+2.} +\item{tz}{A number specifying the minutes the returned dates should be offset +to UTC. Note the parameter 'time' above is specified in UTC. E.g. choosing +"time=2018-01-01T01 2018-01-01T03" and "tz=-120" will yield data between +2018-01-01T03 and 2018-01-01T05, i.e. data specified to be in UTC+2.} \item{onlyInterest}{If you only want the interest over time set it to TRUE.} } @@ -113,7 +119,6 @@ gtrends(c("NHL", "NFL"), time = "today 12-m") # last 12 months gtrends(c("NHL", "NFL"), time = "today+5-y") # last five years (default) gtrends(c("NHL", "NFL"), time = "all") # since 2004 - ## Custom date format gtrends(c("NHL", "NFL"), time = "2010-01-01 2010-04-03") @@ -127,5 +132,10 @@ head(gtrends(c("NHL", "NFL"), gprop = "youtube")$interest_over_time) head(gtrends("NHL", hl = "en")$related_topics) head(gtrends("NHL", hl = "fr")$related_topics) + +## Compared breakdown +head(gtrends(keyword = c("nhl", "nba"), geo = "CA", compared_breakdown = FALSE)$interest_by_region) +head(gtrends(keyword = c("nhl", "nba"), geo = "CA", compared_breakdown = TRUE)$interest_by_region) + } }