-
Notifications
You must be signed in to change notification settings - Fork 1
/
machinevisionscripts.R
279 lines (243 loc) · 13.4 KB
/
machinevisionscripts.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
## A Dataset Documenting Representations of Machine Vision Technologies
## in Artworks, Games and Narratives
##
# Scripts for handling the data in R. Scripts written by Jill Walker Rettberg.
# March 18, 2022.
#
# The dataset can be found at: --------------------------------------------
# Rettberg, Jill Walker; Kronman, Linda; Solberg, Ragnhild; Gunderson, Marianne;
# Bjørklund, Stein Magne; Stokkedal, Linn Heidi; Jacob, Kurdin; Markham, Annette,
# 2022, "A Dataset Documenting Representations of Machine Vision Technologies
# in Artworks, Games and Narratives",
# https://doi.org/10.18710/2G0XKN, DataverseNO.
# Set working directory and load tidyverse---------------------------------------------------
# These scripts assume that the data files are in the a subdirectory
# of your working directory called data.
#
#getwd()
#setwd()
# These scripts use the Tidyverse package. If you haven't already installed that,
# remove the # from the following line and run it.
#
#install.packages("tidyverse")
library(tidyverse)
# Import creativeworks ----------------------------------------------------
# Importing each file with factors etc
CreativeWorks <- read_csv("data/creativeworks.csv",
locale = locale(encoding = "UTF-8"),
col_types = cols(
WorkID = col_integer(),
WorkTitle = col_character(),
Sentiment = col_factor(levels = c(
"Exciting", "Flawed", "Helpful", "Neutral", "Wondrous",
"Hostile","Oppressive", "Alien", "Creepy", "Subversive",
"Dangerous", "Intrusive", "Empowering", "Protective",
"Intimate", "Misleading", "Fun", "Overwhelming",
"Prosocial", "Disgusting")),
Topic = col_factor(levels = c(
"Nudity", "Social Media", "Romantic relationship", "Climate Change",
"Dystopian", "Horror", "Robots/androids", "Surveillance", "Automation",
"City", "Labour", "War", "Identity", "AI", "Animals", "Consciousness",
"Nature", "Companionship", "Competition", "Playful", "Family",
"Free will", "Physical violence", "Crime", "Hacking", "Conflict",
"Empathy", "Utopian", "Race", "Sex", "Cyborgs", "Inequality",
"Economy", "Grief", "Autonomous vehicles", "Gender")),
TechRef= col_factor(levels = c(
"Holograms", "Augmented reality", "Ocular implant",
"Emotion recognition", "Surveillance cameras", "AI",
"Virtual reality", "Motion tracking", "Body scans",
"Drones", "MicroscopeOrTelescope", "Biometrics",
"Image generation", "Facial recognition",
"Object recognition", "3D scans", "Machine learning",
"Filtering", "Deepfake", "Camera", "Cameraphone",
"Interactive panoramas", "Non-Visible Spectrum", "UGV",
"Webcams", "Satellite images")),
TechUsed= col_factor(levels = c(
"Holograms", "Augmented reality", "Ocular implant",
"Emotion recognition", "Surveillance cameras", "AI",
"Virtual reality", "Motion tracking", "Body scans",
"Drones", "MicroscopeOrTelescope", "Biometrics",
"Image generation", "Facial recognition",
"Object recognition", "3D scans", "Machine learning",
"Filtering", "Deepfake", "Camera", "Cameraphone",
"Interactive panoramas", "Non-Visible Spectrum", "UGV",
"Webcams", "Satellite images"))))
# Import characters.csv ---------------------------------------------------
Characters <- read_csv("data/characters.csv",
locale = locale(encoding = "UTF-8"),
col_types = cols(
CharacterID = col_integer(),
Character = col_character(),
Species = col_factor(levels = c(
"Animal", "Cyborg", "Fictional",
"Human", "Machine", "Unknown")),
Gender = col_factor(levels = c(
"Female","Male","Non-binary or Other", "Trans Woman",
"Unknown")),
RaceOrEthnicity = col_factor(levels = c(
"Asian", "Black", "Person of Colour", "White",
"Immigrant", "Indigenous", "Complex", "Unknown")),
Age = col_factor(levels = c(
"Child", "Young Adult", "Adult", "Elderly",
"Unknown"),
ordered = TRUE),
Sexuality = col_factor(levels = c(
"Homosexual", "Heterosexual", "Bi-sexual", "Other",
"Unknown")),
IsGroup = col_logical(),
IsCustomizable = col_logical()
)
)
# Simplify character traits -----------------------------------------------
# Change some of the variables to simplify for analysis. This example merges
# black, person of colour, indigenous, immigrant and complex into one category:
# PoC, and merges machines and cyborgs into a value called Robot. Since our
# categories for race and ethnicity do not follow the specific demographic
# categories of a country, and there are very few cases of some of the values
# like indigenous), for some analyses combining categories will be better.
# The code can easily be adapted to combine or rename categories differently.
#
# Format here is:
# mutate(New_column_name = recode(Old_column_name, "New value" = "Old value"))
#
# Could remove customizable characters with this line:
# filter(IsCustomizable == FALSE) %>%
#
# Convert "Unknown" values to NA.
#
# Select relevant columns.
Characters <- Orig_Characters %>%
na_if("Unknown") %>%
select(Character, Species, Gender, Sexuality,
RaceOrEthnicity, Age) %>%
mutate(RaceOrEthnicity = recode(RaceOrEthnicity,
"Asian" = "Asian",
"Black" = "PoC",
"White" = "White",
"Person of Colour" = "PoC",
"Indigenous" = "PoC",
"Immigrant" = "PoC",
"Complex" = "PoC")) %>%
mutate(Species = recode(Species,
"Human" = "Human",
"Machine" = "Robot",
"Cyborg" = "Robot",
"Fictional" = "Fictional",
"Animal" = "Animal"))
# Load situations.csv -----------------------------------------------------
Situations <- read_csv("data/situations.csv",
locale = locale(encoding = "UTF-8"),
col_types = cols(
SituationID = col_integer(),
Situation = col_character(),
Genre = col_character(),
Character = col_character(),
Entity = col_character(),
Technology = col_character(),
Verb = col_character()
)
)
# Merge characters.csv with situations.csv to see character actions -------
# Make a new dataframe called Verbs that shows all characters with their traits
# (species, age etc), what situations they are in and what actions they take
# when interacting with machine vision.
Verbs <- merge(Situations, Characters, by = "Character", all = TRUE)
# Contingency tables -----------------------------------------------------
# The following scripts transform Verbs into contingency tables where each row
# contains one verb and the number of times it is used in each of the genres (art,
# games, narratives) and who or what uses it. A new column has also been added
# which is TRUE if the verb is active (ends in -ing) and FALSE if it is passive
# (ends in -ed).
#
Tech_verbs_contingency <- Verbs %>%
filter(!is.na(Technology)) %>%
select(Verb, Genre, Technology) %>%
pivot_longer(cols= -Verb,
names_to = "variable",
values_to = "value") %>%
group_by(Verb, value) %>%
summarise(n=n()) %>%
pivot_wider(names_from = "value", values_from = "n") %>%
mutate_all(~replace(., is.na(.), 0)) %>% # convert NA to 0 since it's count
mutate(target = str_detect(Verb, "ing"), .after = Verb) %>% # new col target
relocate(Verb, target, Art, Game, Narrative) #put these cols first
Entity_verbs_contingency <- Verbs %>%
filter(!is.na(Entity)) %>%
select(Verb, Genre, Entity) %>%
pivot_longer(cols= -Verb,
names_to = "variable",
values_to = "value") %>%
group_by(Verb, value) %>%
summarise(n=n()) %>%
pivot_wider(names_from = "value", values_from = "n") %>%
mutate_all(~replace(., is.na(.), 0)) %>% # convert NA to 0 since it's count
mutate(target = str_detect(Verb, "ing"), .after = Verb) %>% # new col target
relocate(Verb, target, Art, Game, Narrative)
Character_verbs_contingency <- Verbs %>%
filter(!is.na(Character)) %>%
select(Verb, Gender, Species, RaceOrEthnicity, Age, Sexuality) %>%
pivot_longer(cols= -Verb,
names_to = "variable",
values_to = "value") %>%
group_by(Verb, value) %>%
summarise(n=n()) %>%
pivot_wider(names_from = "value", values_from = "n") %>%
mutate_all(~replace(., is.na(.), 0)) %>% # convert NA to 0 since it's count
mutate(target = str_detect(Verb, "ing"), .after = Verb) %>% # new col target
relocate()
write_csv(Tech_verbs_contingency, "data/tech_verbs_contingency.csv")
write_csv(Character_verbs_contingency, "data/character_verbs_contingency.csv")
write_csv(Entity_verbs_contingency, "data/entity_verbs_contingency.csv")
# Make continency table from situations showing verb use-----------------------------------
# Make contingency table SitCounts with count of times each verb is used by
# a Character, a Technology or an Entity. Then add a column VerbType stating
# whether Verb is active or passive.
SitCounts <- Situations %>%
mutate(Tech = !is.na(Technology),
Char = !is.na(Character),
Ent = !is.na(Entity)) %>%
select(Verb, Tech, Char, Ent) %>%
pivot_longer(-Verb, names_to = "var", values_to = "value") %>%
count(Verb, var, wt = value) %>%
pivot_wider(Verb, names_from = var, values_from = n) %>%
mutate(VerbType = case_when(
str_detect(Verb, "ing$") ~ "Active",
str_detect(Verb, "ed$") ~ "Passive"))
# Transform worksinfo.csv to a wide format table --------------------------
# If you find it easier to work with a wide table, with a row for each work
# and columns for WorkID, Work_WikidataID, WorkTitle, Genre, Year, Creator,
# Country, URL and IsSciFI, you can use this code to generate it.
# Because many works have multiple creators, multiple countries and
# even mulitple URLs, the code creates a new column for each creator.
worksinfo <- read_csv("data/worksinfo.csv") %>%
arrange(WorkID, Variable) %>%
group_by(WorkID, Variable) %>%
mutate(dupe = n()>1,
dup_id = row_number(),
Variable = as.character(Variable),
Variable = case_when(
dup_id == 1 & Variable == "Creator" ~ "Creator1",
dup_id == 2 & Variable == "Creator" ~ "Creator2",
dup_id == 3 & Variable == "Creator" ~ "Creator3",
dup_id == 4 & Variable == "Creator" ~ "Creator4",
dup_id == 5 & Variable == "Creator" ~ "Creator5",
dup_id == 6 & Variable == "Creator" ~ "Creator6",
dup_id == 7 & Variable == "Creator" ~ "Creator7",
dup_id == 8 & Variable == "Creator" ~ "Creator8",
dup_id == 1 & Variable == "Country" ~ "Country1",
dup_id == 2 & Variable == "Country" ~ "Country2",
dup_id == 3 & Variable == "Country" ~ "Country3",
dup_id == 4 & Variable == "Country" ~ "Country4",
dup_id == 5 & Variable == "Country" ~ "Country5",
dup_id == 1 & Variable == "URL" ~ "URL1",
dup_id == 2 & Variable == "URL" ~ "URL2",
dup_id == 3 & Variable == "URL" ~ "URL3",
TRUE ~ Variable)) %>%
select(-c(dupe, dup_id)) %>%
pivot_wider(names_from = Variable,
values_from = Value) %>%
select(WorkID, Work_WikidataID, WorkTitle, Genre, Year, Creator1,
Country1, URL1, URL2, URL3, Country2, Country3, Country4,
Country5, Creator2, Creator3, Creator4, Creator5, Creator6,
Creator7, Creator8, IsSciFi) %>%
ungroup()