Load the MOMA data

library(here)
library(readr)
library(janitor)
library(dplyr)
moma <- read_csv(here::here("data", "artworks.csv"),
                 col_types = cols(
                   BeginDate = col_number(),
                   EndDate = col_number(),
                   `Length (cm)` = col_number(),
                   `Circumference (cm)` = col_number(),
                   `Duration (sec.)` = col_number(),
                   `Diameter (cm)` = col_number()
                 )) %>% 
  clean_names()
problems(moma)
# A tibble: 8 x 5
     row col       expected    actual file                                      
   <int> <chr>     <chr>       <chr>  <chr>                                     
1 103824 DateAcqu… "date like… 1998-… '/Users/bedricks/Dropbox/Work/Teaching/Sp…
2 128673 DateAcqu… "date like… 1977-… '/Users/bedricks/Dropbox/Work/Teaching/Sp…
3 131406 DateAcqu… "date like… 1998-… '/Users/bedricks/Dropbox/Work/Teaching/Sp…
4 131785 DateAcqu… "date like… 1975-… '/Users/bedricks/Dropbox/Work/Teaching/Sp…
5 132165 DateAcqu… "date like… 1998-… '/Users/bedricks/Dropbox/Work/Teaching/Sp…
6 132166 DateAcqu… "date like… 1998-… '/Users/bedricks/Dropbox/Work/Teaching/Sp…
7 132167 DateAcqu… "date like… 1995   '/Users/bedricks/Dropbox/Work/Teaching/Sp…
8 132169 DateAcqu… "date like… 1998-… '/Users/bedricks/Dropbox/Work/Teaching/Sp…

Basic cleaning with stringr of gender variable, which refers to the gender of the artist (a () is used a placeholder for “various artists”)

library(stringr)
moma <- moma %>% 
  mutate(gender = str_replace_all(gender, fixed("(female)", 
                                                    ignore_case = TRUE), "F"),
         gender = str_replace_all(gender, fixed("(male)", 
                                                    ignore_case = TRUE), "M"),
         num_artists = str_count(gender, "[:alpha:]"),
         num_artists = na_if(num_artists, 0),
         n_female_artists = str_count(gender, "F"),
         n_male_artists = str_count(gender, "M"),
         artist_gender = case_when(
           num_artists == 1 & n_female_artists == 1 ~ "Female",
           num_artists == 1 & n_male_artists == 1 ~ "Male"
         ))

Let’s also do some detecting of strings in the credit_line variable.

moma <- moma %>% 
  mutate(purchase = str_detect(credit_line, fixed("purchase", ignore_case = TRUE)),
         gift = str_detect(credit_line, fixed("gift", ignore_case = TRUE)),
         exchange = str_detect(credit_line, fixed("exchange", ignore_case = TRUE)))

According to MOMA: Acquisitions to the Collection may be made by purchase, gift, fractional interest gift, bequest, or exchange.

Let’s clean up some dates:

library(lubridate)
moma <- moma %>% 
  mutate(year_acquired = year(date_acquired)) %>% 
  rename(artist_birth_year = begin_date, artist_death_year = end_date) %>% 
  mutate(year_created = str_extract(date, "\\d{4}"),
         artist_birth_year = na_if(artist_birth_year, 0),
         artist_death_year = na_if(artist_death_year, 0))

What different kinds of art classifications are available?

moma %>% 
  distinct(classification) %>% 
  print(n = Inf)
# A tibble: 31 x 1
   classification                
   <chr>                         
 1 Architecture                  
 2 Mies van der Rohe Archive     
 3 Design                        
 4 Illustrated Book              
 5 Print                         
 6 Drawing                       
 7 Film                          
 8 Multiple                      
 9 Periodical                    
10 Photograph                    
11 Painting                      
12 (not assigned)                
13 Architectural Model           
14 Product Design                
15 Video                         
16 Media                         
17 Performance                   
18 Sculpture                     
19 Photography Research/Reference
20 Software                      
21 Installation                  
22 Work on Paper                 
23 Audio                         
24 Textile                       
25 Ephemera                      
26 Collage                       
27 Film (object)                 
28 Frank Lloyd Wright Archive    
29 Poster                        
30 Graphic Design                
31 Furniture and Interiors       

We want to focus on standard rectangular paintings:

library(tidyr)
moma <- moma %>% 
  filter(classification == "Painting") %>% 
  drop_na(height_cm, width_cm) %>% 
  filter(height_cm > 0 & width_cm > 0)

We’ll select those columns we want to keep:

moma <- moma %>% 
  select(title, contains("artist"), contains("year"), contains("_cm"),
         purchase, gift, exchange, classification, department)

Now let’s export this data frame for the lab.

write_csv(moma, here::here("data", "artworks-cleaned.csv"))

Creative Commons License