Skip to content

Commit

Permalink
Merge pull request #404 from PMassicotte/breakdown-feature
Browse files Browse the repository at this point in the history
Implement comparative city/subregion breakdown.
  • Loading branch information
PMassicotte authored Oct 24, 2021
2 parents 2c74fee + 8ddce1f commit 68b8cda
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 58 deletions.
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@

- `read.csv()` now uses `encoding = "UTF-8"` to better deal with non-ascii characters.

- Setting locale in queries via the `hl`argument now returns data (@marcf-91). For example, `gtrends(keyword = "Macron", geo = "FR", hl = "fr")`.

- It was difficult to maintain an up-to-date database of all country codes supported by Google because they do not provide such a list. `gtrends()` now only checks the syntax structure of the entered code.

- New Feature: `gtrends()` as a new parameter `compared_breakdown`. When set to `TRUE`, then the relative hits across the keywords will be returned. Can only be used if one `geo` is used conjointly with more than one keyword. For example: `head(gtrends(keyword = c("nhl", "nba"), geo = "CA", compared_breakdown = TRUE)$interest_by_region)`.

# gtrendsR 1.4.8

- Skip internet-based tests on CRAN that were found to randomly fail on Debian machine and locally under Ubuntu-latest (#384).
Expand Down
41 changes: 22 additions & 19 deletions R/gtrends.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,24 @@
#' @param hl A string specifying the ISO language code (ex.: \dQuote{en-US} or
#' \dQuote{fr}). Default is \dQuote{en-US}. Note that this is only influencing
#' the data returned by related topics.
#'
#' @param tz A number specifying the minutes the returned dates should be offset to UTC.
#' Note the parameter 'time' above is specified in UTC.
#' E.g. choosing "time=2018-01-01T01 2018-01-01T03" and "tz=-120" will yield data between 2018-01-01T03 and 2018-01-01T05,
#' i.e. data specified to be in UTC+2.
#'
#' @param tz A number specifying the minutes the returned dates should be offset
#' to UTC. Note the parameter 'time' above is specified in UTC. E.g. choosing
#' "time=2018-01-01T01 2018-01-01T03" and "tz=-120" will yield data between
#' 2018-01-01T03 and 2018-01-01T05, i.e. data specified to be in UTC+2.
#'
#' @param compared_breakdown Logical. Should compare breakdown the results by
#' city and subregion? Can only be used if one `geo` is used conjointly with
#' more than one keyword. If `TRUE`, then the relative hits across the
#' keywords will be returned. `FALSE` by default.
#'
#' @param low_search_volume Logical. Should include low search volume regions?
#'
#' @param cookie_url A string specifying the URL from which to obtain cookies.
#' Default should work in general; should only be changed by advanced users.
#'
#' @param onlyInterest If you only want the interest over time set it to TRUE.
#'
#'
#' @section Categories: The package includes a complete list of categories that
#' can be used to narrow requests. These can be accessed using
#' \code{data("categories")}.
Expand Down Expand Up @@ -92,7 +97,6 @@
#' gtrends(c("NHL", "NFL"), time = "today+5-y") # last five years (default)
#' gtrends(c("NHL", "NFL"), time = "all") # since 2004
#'
#'
#' ## Custom date format
#'
#' gtrends(c("NHL", "NFL"), time = "2010-01-01 2010-04-03")
Expand All @@ -106,6 +110,11 @@
#'
#' head(gtrends("NHL", hl = "en")$related_topics)
#' head(gtrends("NHL", hl = "fr")$related_topics)
#'
#' ## Compared breakdown
#' head(gtrends(keyword = c("nhl", "nba"), geo = "CA", compared_breakdown = FALSE)$interest_by_region)
#' head(gtrends(keyword = c("nhl", "nba"), geo = "CA", compared_breakdown = TRUE)$interest_by_region)
#'
#' }
#' @export
gtrends <- function(
Expand All @@ -115,6 +124,7 @@ gtrends <- function(
gprop = c("web", "news", "images", "froogle", "youtube"),
category = 0,
hl = "en-US",
compared_breakdown = FALSE,
low_search_volume = FALSE,
cookie_url = "http://trends.google.com/Cookies/NID",
tz=0, # This equals UTC
Expand Down Expand Up @@ -147,17 +157,6 @@ gtrends <- function(
}
}

# if (geo != "" &&
# !all(geo %in%
# c(
# as.character(countries[, "country_code"]),
# as.character(countries[, "sub_code"])
# ))) {
# stop("Country code not valid. Please use 'data(countries)' to retrieve valid codes.",
# call. = FALSE
# )
# }

## Check if valid category
if (!all(category %in% categories[, "id"])) {
stop(
Expand All @@ -171,6 +170,10 @@ gtrends <- function(
stop("Cannot parse the supplied time format.", call. = FALSE)
}

if (compared_breakdown & (length(geo) != 1 | length(keyword) == 1)) {
stop("`compared breakdown` can be only used with one geo and multiple keywords.", call. = FALSE)
}

if(!(is.numeric(tz))){
if (tz %in% OlsonNames()){
tz <- map_tz2min(tz)
Expand Down Expand Up @@ -212,7 +215,7 @@ gtrends <- function(
interest_over_time <- interest_over_time(widget, comparison_item,tz)

if(!onlyInterest){
interest_by_region <- interest_by_region(widget, comparison_item, low_search_volume,tz)
interest_by_region <- interest_by_region(widget, comparison_item, low_search_volume,compared_breakdown, tz)
related_topics <- related_topics(widget, comparison_item, hl,tz)
related_queries <- related_queries(widget, comparison_item,tz,hl)
res <- list(
Expand Down
59 changes: 25 additions & 34 deletions R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -370,27 +370,18 @@ interest_over_time <- function(widget, comparison_item,tz) {
}


interest_by_region <- function(widget, comparison_item, low_search_volume,tz) {
i <- which(grepl("geom_map", widget$id, ignore.case = TRUE) == TRUE)
interest_by_region <- function(widget, comparison_item, low_search_volume, compared_breakdown, tz) {
i <- which(grepl("geo_map", widget$id, ignore.case = TRUE) == TRUE)

if (length(i) == 0) {
return(list(NULL))
}

## Interest by region need to be retrieved individually

# resolution <- sub(".* (\\w+)$", "\\1", widget$title[i])
# resolution[resolution == "subregion"] <- "region"
# resolution[resolution == "metro"] <- "dma"

# resolution <- c(resolution, rep(c("city", "dma"), each = length(resolution)))

##
resolution <-
expand.grid(i, c(ifelse(
grepl("world", na.omit(widget$geo)), "country", "region"
), "city", "dma"), stringsAsFactors = FALSE)

resolution <- unique(resolution)

i <- resolution$Var1
Expand All @@ -406,14 +397,14 @@ interest_by_region <- function(widget, comparison_item, low_search_volume,tz) {
# resolution[grepl("world", na.omit(widget$geo))] <- "country"
resolution <- toupper(resolution)

res <-
mapply(
create_geo_payload,
i,
resolution,
MoreArgs = list(widget = widget, low_search_volume = low_search_volume, tz = tz),
SIMPLIFY = FALSE
)
res <-
mapply(
create_geo_payload,
i,
resolution,
MoreArgs = list(widget = widget, low_search_volume = low_search_volume, compared_breakdown = compared_breakdown, tz = tz),
SIMPLIFY = FALSE
)

## Remove duplicated
ii <- !duplicated(res)
Expand All @@ -432,7 +423,7 @@ interest_by_region <- function(widget, comparison_item, low_search_volume,tz) {
}


create_geo_payload <- function(i, widget, resolution, low_search_volume,tz) {
create_geo_payload <- function(i, widget, resolution, compared_breakdown, low_search_volume, tz) {
payload2 <- list()
payload2$locale <- unique(na.omit(widget$request$locale))
payload2$comparisonItem <- widget$request$comparisonItem[[i]]
Expand All @@ -442,21 +433,21 @@ create_geo_payload <- function(i, widget, resolution, low_search_volume,tz) {
payload2$requestOptions$category <- widget$request$requestOptions$category[i]
payload2$geo <- as.list((widget$request$geo[i, , drop = FALSE]))
payload2$includeLowSearchVolumeGeos <- low_search_volume

# If we want compared breakdown, it will return the relative hits per
# region/city when multiple keywords are provided.

if (compared_breakdown) {
payload2$dataMode = "PERCENTAGES"
}


url <- paste0(URLencode("https://www.google.com/trends/api/widgetdata/comparedgeo/csv?req="),
URLencode(jsonlite::toJSON(payload2, auto_unbox = T,null="list"),reserved = TRUE),
URLencode(paste0("&token=",widget$token[i],"&tz=",tz,"&hl=en-US")))

# url <- URLencode(paste0(
# "https://www.google.com/trends/api/widgetdata/comparedgeo/csv?req=",
# jsonlite::toJSON(payload2, auto_unbox = T,null="list"),
# "&token=", widget$token[i],
# "&tz=",tz,"&hl=en-US"
# ))

# url <- encode_keyword(url)
# VY. use the handler with proxy options.
url <- paste0(
URLencode("https://www.google.com/trends/api/widgetdata/comparedgeo/csv?req="),
URLencode(jsonlite::toJSON(payload2, auto_unbox = T, null = "list"), reserved = TRUE),
URLencode(paste0("&token=", widget$token[i], "&tz=", tz, "&hl=en-US"))
)

res <- curl::curl_fetch_memory(url, handle = .pkgenv[["cookie_handler"]])

if (res$status_code != 200) {
Expand Down
20 changes: 15 additions & 5 deletions man/gtrends.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 68b8cda

Please sign in to comment.