PMassicotte · PMassicotte · Oct 24, 2021 · Oct 23, 2021
diff --git a/NEWS.md b/NEWS.md
@@ -2,8 +2,12 @@
 
 - `read.csv()` now uses `encoding = "UTF-8"` to better deal with non-ascii characters.
 
+- Setting locale in queries via the `hl`argument now returns data (@marcf-91). For example, `gtrends(keyword = "Macron", geo = "FR", hl = "fr")`.
+
 - It was difficult to maintain an up-to-date database of all country codes supported by Google because they do not provide such a list. `gtrends()` now only checks the syntax structure of the entered code.
 
+- New Feature: `gtrends()` as a new parameter `compared_breakdown`. When set to `TRUE`, then the relative hits across the keywords will be returned. Can only be used if one `geo` is used conjointly with more than one keyword. For example: `head(gtrends(keyword = c("nhl", "nba"), geo = "CA", compared_breakdown = TRUE)$interest_by_region)`.
+
 # gtrendsR 1.4.8
 
 - Skip internet-based tests on CRAN that were found to randomly fail on Debian machine and locally under Ubuntu-latest (#384). 

diff --git a/R/gtrends.R b/R/gtrends.R
@@ -33,19 +33,24 @@
 #' @param hl A string specifying the ISO language code (ex.: \dQuote{en-US} or
 #'   \dQuote{fr}). Default is \dQuote{en-US}. Note that this is only influencing
 #'   the data returned by related topics.
-#'   
-#' @param tz A number specifying the minutes the returned dates should be offset to UTC. 
-#' Note the parameter 'time' above is specified in UTC. 
-#' E.g. choosing "time=2018-01-01T01 2018-01-01T03" and "tz=-120" will yield data between 2018-01-01T03 and 2018-01-01T05, 
-#' i.e. data specified to be in UTC+2.
+#'
+#' @param tz A number specifying the minutes the returned dates should be offset
+#'   to UTC. Note the parameter 'time' above is specified in UTC. E.g. choosing
+#'   "time=2018-01-01T01 2018-01-01T03" and "tz=-120" will yield data between
+#'   2018-01-01T03 and 2018-01-01T05, i.e. data specified to be in UTC+2.
+#'
+#' @param compared_breakdown Logical. Should compare breakdown the results by
+#'   city and subregion? Can only be used if one `geo` is used conjointly with
+#'   more than one keyword. If `TRUE`, then the relative hits across the
+#'   keywords will be returned. `FALSE` by default.
 #'
 #' @param low_search_volume Logical. Should include low search volume regions?
 #'
 #' @param cookie_url A string specifying the URL from which to obtain cookies.
 #'   Default should work in general; should only be changed by advanced users.
 #'
 #' @param onlyInterest If you only want the interest over time set it to TRUE.
-#' 
+#'
 #' @section Categories: The package includes a complete list of categories that
 #'   can be used to narrow requests. These can be accessed using
 #'   \code{data("categories")}.
@@ -92,7 +97,6 @@
 #' gtrends(c("NHL", "NFL"), time = "today+5-y") # last five years (default)
 #' gtrends(c("NHL", "NFL"), time = "all") # since 2004
 #'
-#'
 #' ## Custom date format
 #'
 #' gtrends(c("NHL", "NFL"), time = "2010-01-01 2010-04-03")
@@ -106,6 +110,11 @@
 #'
 #' head(gtrends("NHL", hl = "en")$related_topics)
 #' head(gtrends("NHL", hl = "fr")$related_topics)
+#'
+#' ## Compared breakdown
+#' head(gtrends(keyword = c("nhl", "nba"), geo = "CA", compared_breakdown = FALSE)$interest_by_region)
+#' head(gtrends(keyword = c("nhl", "nba"), geo = "CA", compared_breakdown = TRUE)$interest_by_region)
+#'
 #' }
 #' @export
 gtrends <- function(
@@ -115,6 +124,7 @@ gtrends <- function(
                     gprop = c("web", "news", "images", "froogle", "youtube"),
                     category = 0,
                     hl = "en-US",
+                    compared_breakdown = FALSE,
                     low_search_volume = FALSE,
                     cookie_url = "http://trends.google.com/Cookies/NID",
                     tz=0, # This equals UTC
@@ -147,17 +157,6 @@ gtrends <- function(
     }
   }
 
-  # if (geo != "" &&
-  #   !all(geo %in%
-  #     c(
-  #       as.character(countries[, "country_code"]),
-  #       as.character(countries[, "sub_code"])
-  #     ))) {
-  #   stop("Country code not valid. Please use 'data(countries)' to retrieve valid codes.",
-  #     call. = FALSE
-  #   )
-  # }
-
   ## Check if valid category
   if (!all(category %in% categories[, "id"])) {
     stop(
@@ -171,6 +170,10 @@ gtrends <- function(
     stop("Cannot parse the supplied time format.", call. = FALSE)
   }
 
+  if (compared_breakdown & (length(geo) != 1 | length(keyword) == 1)) {
+    stop("`compared breakdown` can be only used with one geo and multiple keywords.", call. = FALSE)
+  }
+
   if(!(is.numeric(tz))){
     if (tz %in% OlsonNames()){
       tz <- map_tz2min(tz)
@@ -212,7 +215,7 @@ gtrends <- function(
   interest_over_time <- interest_over_time(widget, comparison_item,tz)
 
   if(!onlyInterest){
-    interest_by_region <- interest_by_region(widget, comparison_item, low_search_volume,tz)
+    interest_by_region <- interest_by_region(widget, comparison_item, low_search_volume,compared_breakdown, tz)
     related_topics <- related_topics(widget, comparison_item, hl,tz)
     related_queries <- related_queries(widget, comparison_item,tz,hl)
     res <- list(

diff --git a/R/zzz.R b/R/zzz.R
@@ -370,27 +370,18 @@ interest_over_time <- function(widget, comparison_item,tz) {
 }
 
 
-interest_by_region <- function(widget, comparison_item, low_search_volume,tz) {
-  i <- which(grepl("geom_map", widget$id, ignore.case = TRUE) == TRUE)
+interest_by_region <- function(widget, comparison_item, low_search_volume, compared_breakdown, tz) {
+  i <- which(grepl("geo_map", widget$id, ignore.case = TRUE) == TRUE)
 
   if (length(i) == 0) {
     return(list(NULL))
   }
 
-  ## Interest by region need to be retrieved individually
-
-  # resolution <- sub(".* (\\w+)$", "\\1", widget$title[i])
-  # resolution[resolution == "subregion"] <- "region"
-  # resolution[resolution == "metro"] <- "dma"
-
-  # resolution <- c(resolution, rep(c("city", "dma"), each = length(resolution)))
-
-  ##
   resolution <-
     expand.grid(i, c(ifelse(
       grepl("world", na.omit(widget$geo)), "country", "region"
     ), "city", "dma"), stringsAsFactors = FALSE)
-
+  
   resolution <- unique(resolution)
 
   i <- resolution$Var1
@@ -406,14 +397,14 @@ interest_by_region <- function(widget, comparison_item, low_search_volume,tz) {
   # resolution[grepl("world", na.omit(widget$geo))] <- "country"
   resolution <- toupper(resolution)
 
-  res <-
-    mapply(
-      create_geo_payload,
-      i,
-      resolution,
-      MoreArgs = list(widget = widget, low_search_volume = low_search_volume, tz = tz),
-      SIMPLIFY = FALSE
-    )
+res <-
+  mapply(
+    create_geo_payload,
+    i,
+    resolution,
+    MoreArgs = list(widget = widget, low_search_volume = low_search_volume, compared_breakdown = compared_breakdown, tz = tz),
+    SIMPLIFY = FALSE
+  )
 
   ## Remove duplicated
   ii <- !duplicated(res)
@@ -432,7 +423,7 @@ interest_by_region <- function(widget, comparison_item, low_search_volume,tz) {
 }
 
 
-create_geo_payload <- function(i, widget, resolution, low_search_volume,tz) {
+create_geo_payload <- function(i, widget, resolution, compared_breakdown, low_search_volume, tz) {
   payload2 <- list()
   payload2$locale <- unique(na.omit(widget$request$locale))
   payload2$comparisonItem <- widget$request$comparisonItem[[i]]
@@ -442,21 +433,21 @@ create_geo_payload <- function(i, widget, resolution, low_search_volume,tz) {
   payload2$requestOptions$category <- widget$request$requestOptions$category[i]
   payload2$geo <- as.list((widget$request$geo[i, , drop = FALSE]))
   payload2$includeLowSearchVolumeGeos <- low_search_volume
+
+  # If we want compared breakdown, it will return the relative hits per
+  # region/city when multiple keywords are provided.
+
+  if (compared_breakdown) {
+    payload2$dataMode = "PERCENTAGES"
+  }
 
 
-  url <- paste0(URLencode("https://www.google.com/trends/api/widgetdata/comparedgeo/csv?req="),
-                URLencode(jsonlite::toJSON(payload2, auto_unbox = T,null="list"),reserved = TRUE),
-                URLencode(paste0("&token=",widget$token[i],"&tz=",tz,"&hl=en-US")))
-
-  # url <- URLencode(paste0(
-  #   "https://www.google.com/trends/api/widgetdata/comparedgeo/csv?req=",
-  #   jsonlite::toJSON(payload2, auto_unbox = T,null="list"),
-  #   "&token=", widget$token[i],
-  #   "&tz=",tz,"&hl=en-US"
-  # ))
-
-  # url <- encode_keyword(url)
-  # VY. use the handler with proxy options.
+  url <- paste0(
+    URLencode("https://www.google.com/trends/api/widgetdata/comparedgeo/csv?req="),
+    URLencode(jsonlite::toJSON(payload2, auto_unbox = T, null = "list"), reserved = TRUE),
+    URLencode(paste0("&token=", widget$token[i], "&tz=", tz, "&hl=en-US"))
+  )
+
   res <- curl::curl_fetch_memory(url, handle = .pkgenv[["cookie_handler"]])
 
   if (res$status_code != 200) {

diff --git a/man/gtrends.Rd b/man/gtrends.Rd