Code for Quiz 5. More practice with dplyr functions.
drug_cos.csv
into R and assign it to drug_cos
.drug_cos <- read_csv("https://estanny.com/static/week5/drug_cos.csv")
glimpse(drug_cos)
Rows: 104
Columns: 9
$ ticker <chr> "ZTS", "ZTS", "ZTS", "ZTS", "ZTS", "ZTS", "Z...
$ name <chr> "Zoetis Inc", "Zoetis Inc", "Zoetis Inc", "Z...
$ location <chr> "New Jersey; U.S.A", "New Jersey; U.S.A", "N...
$ ebitdamargin <dbl> 0.149, 0.217, 0.222, 0.238, 0.182, 0.335, 0....
$ grossmargin <dbl> 0.610, 0.640, 0.634, 0.641, 0.635, 0.659, 0....
$ netmargin <dbl> 0.058, 0.101, 0.111, 0.122, 0.071, 0.168, 0....
$ ros <dbl> 0.101, 0.171, 0.176, 0.195, 0.140, 0.286, 0....
$ roe <dbl> 0.069, 0.113, 0.612, 0.465, 0.285, 0.587, 0....
$ year <dbl> 2011, 2012, 2013, 2014, 2015, 2016, 2017, 20...
distinct
to subset distinc rows.drug_cos %>%
distinct(year)
# A tibble: 8 x 1
year
<dbl>
1 2011
2 2012
3 2013
4 2014
5 2015
6 2016
7 2017
8 2018
count()
to count observations by group.drug_cos %>%
count(year)
# A tibble: 8 x 2
year n
* <dbl> <int>
1 2011 13
2 2012 13
3 2013 13
4 2014 13
5 2015 13
6 2016 13
7 2017 13
8 2018 13
drug_cos %>%
count(name)
# A tibble: 13 x 2
name n
* <chr> <int>
1 AbbVie Inc 8
2 Allergan plc 8
3 Amgen Inc 8
4 Biogen Inc 8
5 Bristol Myers Squibb Co 8
6 ELI LILLY & Co 8
7 Gilead Sciences Inc 8
8 Johnson & Johnson 8
9 Merck & Co Inc 8
10 Mylan NV 8
11 PERRIGO Co plc 8
12 Pfizer Inc 8
13 Zoetis Inc 8
drug_cos %>%
count(ticker, name)
# A tibble: 13 x 3
ticker name n
<chr> <chr> <int>
1 ABBV AbbVie Inc 8
2 AGN Allergan plc 8
3 AMGN Amgen Inc 8
4 BIIB Biogen Inc 8
5 BMY Bristol Myers Squibb Co 8
6 GILD Gilead Sciences Inc 8
7 JNJ Johnson & Johnson 8
8 LLY ELI LILLY & Co 8
9 MRK Merck & Co Inc 8
10 MYL Mylan NV 8
11 PFE Pfizer Inc 8
12 PRGO PERRIGO Co plc 8
13 ZTS Zoetis Inc 8
filter()
to extract rows that meet criteria# A tibble: 26 x 9
ticker name location ebitdamargin grossmargin netmargin ros
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 ZTS Zoet~ New Jer~ 0.222 0.634 0.111 0.176
2 ZTS Zoet~ New Jer~ 0.379 0.672 0.245 0.326
3 PRGO PERR~ Ireland 0.236 0.362 0.125 0.19
4 PRGO PERR~ Ireland 0.178 0.387 0.028 0.088
5 PFE Pfiz~ New Yor~ 0.634 0.814 0.427 0.51
6 PFE Pfiz~ New Yor~ 0.34 0.79 0.208 0.221
7 MYL Myla~ United ~ 0.228 0.44 0.09 0.153
8 MYL Myla~ United ~ 0.258 0.35 0.031 0.074
9 MRK Merc~ New Jer~ 0.282 0.615 0.1 0.123
10 MRK Merc~ New Jer~ 0.313 0.681 0.147 0.206
# ... with 16 more rows, and 2 more variables: roe <dbl>, year <dbl>
# A tibble: 52 x 9
ticker name location ebitdamargin grossmargin netmargin ros
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 ZTS Zoet~ New Jer~ 0.217 0.64 0.101 0.171
2 ZTS Zoet~ New Jer~ 0.238 0.641 0.122 0.195
3 ZTS Zoet~ New Jer~ 0.335 0.659 0.168 0.286
4 ZTS Zoet~ New Jer~ 0.379 0.672 0.245 0.326
5 PRGO PERR~ Ireland 0.226 0.345 0.127 0.183
6 PRGO PERR~ Ireland 0.157 0.371 0.059 0.104
7 PRGO PERR~ Ireland -0.791 0.389 -0.76 -0.877
8 PRGO PERR~ Ireland 0.178 0.387 0.028 0.088
9 PFE Pfiz~ New Yor~ 0.447 0.82 0.267 0.307
10 PFE Pfiz~ New Yor~ 0.359 0.807 0.184 0.247
# ... with 42 more rows, and 2 more variables: roe <dbl>, year <dbl>
# A tibble: 8 x 9
ticker name location ebitdamargin grossmargin netmargin ros roe
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 MYL Myla~ United ~ 0.245 0.418 0.088 0.161 0.146
2 MYL Myla~ United ~ 0.244 0.428 0.094 0.163 0.184
3 MYL Myla~ United ~ 0.228 0.44 0.09 0.153 0.209
4 MYL Myla~ United ~ 0.242 0.457 0.12 0.169 0.283
5 MYL Myla~ United ~ 0.243 0.447 0.09 0.133 0.089
6 MYL Myla~ United ~ 0.19 0.424 0.043 0.052 0.044
7 MYL Myla~ United ~ 0.272 0.402 0.058 0.121 0.054
8 MYL Myla~ United ~ 0.258 0.35 0.031 0.074 0.028
# ... with 1 more variable: year <dbl>
select()
to select, rename and reorder columnsticker
, name
and ros
drug_cos %>%
select(ticker, name, ros)
# A tibble: 104 x 3
ticker name ros
<chr> <chr> <dbl>
1 ZTS Zoetis Inc 0.101
2 ZTS Zoetis Inc 0.171
3 ZTS Zoetis Inc 0.176
4 ZTS Zoetis Inc 0.195
5 ZTS Zoetis Inc 0.14
6 ZTS Zoetis Inc 0.286
7 ZTS Zoetis Inc 0.321
8 ZTS Zoetis Inc 0.326
9 PRGO PERRIGO Co plc 0.178
10 PRGO PERRIGO Co plc 0.183
# ... with 94 more rows
select
to exclude columns ticker
, name
and ros
drug_cos %>%
select(ticker, -name, -ros)
# A tibble: 104 x 1
ticker
<chr>
1 ZTS
2 ZTS
3 ZTS
4 ZTS
5 ZTS
6 ZTS
7 ZTS
8 ZTS
9 PRGO
10 PRGO
# ... with 94 more rows
select
start with drug_cos
THEN
change the name of location
to headquarter
put the columns in this order: year
, ticker
, headquarter
, netmargin
, roe
drug_cos %>%
select(year, ticker, headquarter =location, netmargin, roe)
# A tibble: 104 x 5
year ticker headquarter netmargin roe
<dbl> <chr> <chr> <dbl> <dbl>
1 2011 ZTS New Jersey; U.S.A 0.058 0.069
2 2012 ZTS New Jersey; U.S.A 0.101 0.113
3 2013 ZTS New Jersey; U.S.A 0.111 0.612
4 2014 ZTS New Jersey; U.S.A 0.122 0.465
5 2015 ZTS New Jersey; U.S.A 0.071 0.285
6 2016 ZTS New Jersey; U.S.A 0.168 0.587
7 2017 ZTS New Jersey; U.S.A 0.163 0.488
8 2018 ZTS New Jersey; U.S.A 0.245 0.694
9 2011 PRGO Ireland 0.123 0.248
10 2012 PRGO Ireland 0.127 0.236
# ... with 94 more rows
Use inputs from your quiz question filter and select and replace SEE QUIZ with inputs from your quiz and replace the ??? in the code
drug_cos
THENticker
, year
and ebitdamargin
# A tibble: 16 x 3
ticker year ebitdamargin
<chr> <dbl> <dbl>
1 PFE 2011 0.371
2 PFE 2012 0.447
3 PFE 2013 0.634
4 PFE 2014 0.359
5 PFE 2015 0.289
6 PFE 2016 0.267
7 PFE 2017 0.353
8 PFE 2018 0.34
9 MRK 2011 0.305
10 MRK 2012 0.33
11 MRK 2013 0.282
12 MRK 2014 0.567
13 MRK 2015 0.298
14 MRK 2016 0.254
15 MRK 2017 0.278
16 MRK 2018 0.313
drug_cos
THENticker
, ros
and roe
. Change the name of roe
to return_on_equity
# A tibble: 16 x 3
ticker ros return_on_equity
<chr> <dbl> <dbl>
1 MRK 0.15 0.114
2 MRK 0.182 0.113
3 MRK 0.123 0.089
4 MRK 0.409 0.248
5 MRK 0.136 0.096
6 MRK 0.117 0.092
7 MRK 0.162 0.063
8 MRK 0.206 0.199
9 LLY 0.22 0.306
10 LLY 0.239 0.273
11 LLY 0.255 0.290
12 LLY 0.153 0.138
13 LLY 0.14 0.162
14 LLY 0.159 0.185
15 LLY 0.096 -0.015
16 LLY 0.155 0.264
select
ranges of columnsdrug_cos %>%
select(ebitdamargin:netmargin)
# A tibble: 104 x 3
ebitdamargin grossmargin netmargin
<dbl> <dbl> <dbl>
1 0.149 0.61 0.058
2 0.217 0.64 0.101
3 0.222 0.634 0.111
4 0.238 0.641 0.122
5 0.182 0.635 0.071
6 0.335 0.659 0.168
7 0.366 0.666 0.163
8 0.379 0.672 0.245
9 0.216 0.343 0.123
10 0.226 0.345 0.127
# ... with 94 more rows
drug_cos %>%
select(4:6)
# A tibble: 104 x 3
ebitdamargin grossmargin netmargin
<dbl> <dbl> <dbl>
1 0.149 0.61 0.058
2 0.217 0.64 0.101
3 0.222 0.634 0.111
4 0.238 0.641 0.122
5 0.182 0.635 0.071
6 0.335 0.659 0.168
7 0.366 0.666 0.163
8 0.379 0.672 0.245
9 0.216 0.343 0.123
10 0.226 0.345 0.127
# ... with 94 more rows
select
helper functionsstarts_with("abc")
matches columns start with “abc”
ends_with("abc")
matches columns end with “abc”
contains_with("abc")
matches columns contain with “abc”
drug_cos %>%
select(ticker, contains("locat"))
# A tibble: 104 x 2
ticker location
<chr> <chr>
1 ZTS New Jersey; U.S.A
2 ZTS New Jersey; U.S.A
3 ZTS New Jersey; U.S.A
4 ZTS New Jersey; U.S.A
5 ZTS New Jersey; U.S.A
6 ZTS New Jersey; U.S.A
7 ZTS New Jersey; U.S.A
8 ZTS New Jersey; U.S.A
9 PRGO Ireland
10 PRGO Ireland
# ... with 94 more rows
drug_cos %>%
select(ticker, starts_with("r"))
# A tibble: 104 x 3
ticker ros roe
<chr> <dbl> <dbl>
1 ZTS 0.101 0.069
2 ZTS 0.171 0.113
3 ZTS 0.176 0.612
4 ZTS 0.195 0.465
5 ZTS 0.14 0.285
6 ZTS 0.286 0.587
7 ZTS 0.321 0.488
8 ZTS 0.326 0.694
9 PRGO 0.178 0.248
10 PRGO 0.183 0.236
# ... with 94 more rows
drug_cos %>%
select(year, ends_with("margin"))
# A tibble: 104 x 4
year ebitdamargin grossmargin netmargin
<dbl> <dbl> <dbl> <dbl>
1 2011 0.149 0.61 0.058
2 2012 0.217 0.64 0.101
3 2013 0.222 0.634 0.111
4 2014 0.238 0.641 0.122
5 2015 0.182 0.635 0.071
6 2016 0.335 0.659 0.168
7 2017 0.366 0.666 0.163
8 2018 0.379 0.672 0.245
9 2011 0.216 0.343 0.123
10 2012 0.226 0.345 0.127
# ... with 94 more rows
group_by
to set up data for operations by groupgroup_by
drug_cos %>%
group_by(ticker)
# A tibble: 104 x 9
# Groups: ticker [13]
ticker name location ebitdamargin grossmargin netmargin ros
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 ZTS Zoet~ New Jer~ 0.149 0.61 0.058 0.101
2 ZTS Zoet~ New Jer~ 0.217 0.64 0.101 0.171
3 ZTS Zoet~ New Jer~ 0.222 0.634 0.111 0.176
4 ZTS Zoet~ New Jer~ 0.238 0.641 0.122 0.195
5 ZTS Zoet~ New Jer~ 0.182 0.635 0.071 0.14
6 ZTS Zoet~ New Jer~ 0.335 0.659 0.168 0.286
7 ZTS Zoet~ New Jer~ 0.366 0.666 0.163 0.321
8 ZTS Zoet~ New Jer~ 0.379 0.672 0.245 0.326
9 PRGO PERR~ Ireland 0.216 0.343 0.123 0.178
10 PRGO PERR~ Ireland 0.226 0.345 0.127 0.183
# ... with 94 more rows, and 2 more variables: roe <dbl>, year <dbl>
drug_cos %>%
group_by(year)
# A tibble: 104 x 9
# Groups: year [8]
ticker name location ebitdamargin grossmargin netmargin ros
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 ZTS Zoet~ New Jer~ 0.149 0.61 0.058 0.101
2 ZTS Zoet~ New Jer~ 0.217 0.64 0.101 0.171
3 ZTS Zoet~ New Jer~ 0.222 0.634 0.111 0.176
4 ZTS Zoet~ New Jer~ 0.238 0.641 0.122 0.195
5 ZTS Zoet~ New Jer~ 0.182 0.635 0.071 0.14
6 ZTS Zoet~ New Jer~ 0.335 0.659 0.168 0.286
7 ZTS Zoet~ New Jer~ 0.366 0.666 0.163 0.321
8 ZTS Zoet~ New Jer~ 0.379 0.672 0.245 0.326
9 PRGO PERR~ Ireland 0.216 0.343 0.123 0.178
10 PRGO PERR~ Ireland 0.226 0.345 0.127 0.183
# ... with 94 more rows, and 2 more variables: roe <dbl>, year <dbl>
summarize
to calculate summary statisticsroe
for all companiesdrug_cos %>%
summarize( max_roe = max(roe))
# A tibble: 1 x 1
max_roe
<dbl>
1 1.31
roe
for each year
drug_cos %>%
group_by(year) %>%
summarise( max_roe = max(roe))
# A tibble: 8 x 2
year max_roe
* <dbl> <dbl>
1 2011 0.451
2 2012 0.69
3 2013 1.13
4 2014 0.828
5 2015 1.31
6 2016 1.11
7 2017 0.932
8 2018 0.694
roe
for each ticker
drug_cos %>%
group_by(ticker) %>%
summarise( max_roe = max(roe))
# A tibble: 13 x 2
ticker max_roe
* <chr> <dbl>
1 ABBV 1.31
2 AGN 0.184
3 AMGN 0.585
4 BIIB 0.334
5 BMY 0.373
6 GILD 1.04
7 JNJ 0.244
8 LLY 0.306
9 MRK 0.248
10 MYL 0.283
11 PFE 0.342
12 PRGO 0.248
13 ZTS 0.694
Mean for year
Find the mean ros for each year
and call the variable mean_ros
Extract the mean for 2016
# A tibble: 1 x 2
year mean_ros
<dbl> <dbl>
1 2016 0.253
Median for year
Find the median ros for each year
and call the variable median_ros
Extract the median for 2016
# A tibble: 1 x 2
year meadian_ros
<dbl> <dbl>
1 2016 0.286
drug_cos %>%
filter(ticker == "PFE") %>%
ggplot(aes(x = year, y = netmargin)) +
geom_col() +
scale_y_continuous(labels = scales::percent) +
labs(title = "Comparison of net margin",
subtitle = "for Pfizer from 2011 to 2018",
x = NULL, y = NULL) +
theme_classic()
ggsave(filename = "preview.png",
path = here::here("_posts", "2021-03-09-data-manipulation"))