generated from ebmdatalab/notebook-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Lithium PRIMARY and SECONDARY analysis.R
850 lines (690 loc) · 34.6 KB
/
Lithium PRIMARY and SECONDARY analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
## ANALYSIS:
# Installing packages needed:
install.packages("tidyverse")
library(tidyverse)
install.packages("dplyr")
library(dplyr)
install.packages("ggplot2")
library(ggplot2)
install.packages("scales")
library(scales)
install.packages("sf")
library(sf)
install.packages("here")
library(here)
install.packages("readr")
library(readr)
install.packages("data.table")
library(data.table)
install.packages("patchwork")
library(patchwork)
install.packages("paletteer")
library(paletteer)
install.packages("RColorBrewer")
library(RColorBrewer)
install.packages("gridExtra")
library(gridExtra)
install.packages("readxl")
library(readxl)
## SECONDARY ANALYSIS:
# Secondary care Dataset
# NEW DATASET TO BE WORKING FROM,
# Originally they were 26 organisations in the unlabelled region column, and 1 NA.
# Now using the new dataset on mergers there are 4, on thex-axis labelled region they come up as 'NA'kingdirectory = setwd("C:/Users/K22007671/OneDrive - King's College London")
setwd("C:/Users/hayleyschiffer/Desktop")
getwd()
Lithium_SCMD <- read_excel("scmd_lithium-2.xlsx")
New_Missingregions<-Lithium_SCMD %>%
filter(is.na(region)) %>%
group_by(ods_name) %>%
summarise(N=n())
print(New_Missingregions)
# Originally they were 26 organisations in the unlabelled region column, and 1 NA.
# Now using the new dataset on mergers there are 4, on thex-axis labelled region they come up as 'NA'
# Cleaning up dataset
# Manually adding the regions to the organisations (4) that come up in the NA region column
Lithium_SCMD <- Lithium_SCMD %>%
mutate(region = ifelse(ods_name == "Black Country Healthcare NHS Foundation Trust" & is.na(region),
"West Midlands",
region))
Lithium_SCMD <- Lithium_SCMD %>%
mutate(region = ifelse(ods_name == "Bradford District Care NHS Foundation Trust" & is.na(region),
"Yorkshire and The Humber",
region))
Lithium_SCMD <- Lithium_SCMD %>%
mutate(region = ifelse(ods_name == "Camden and Islington NHS Foundation Trust" & is.na(region),
"London",
region))
Lithium_SCMD <- Lithium_SCMD %>%
mutate(region = ifelse(ods_name == "Sheffield Health & Social Care NHS Foundation Trust" & is.na(region),
"Yorkshire and The Humber",
region))
unique(Lithium_SCMD$region) # ALL organisations are now attributed to a region
#converting from date-time to date, will make plotting easier
class(Lithium_SCMD$year_month) #[1] "POSIXct" "POSIXt"
Lithium_SCMD <- Lithium_SCMD %>%
mutate(year_month = as.Date(year_month)) # converting to date from date-time
Lithium_SCMD <- Lithium_SCMD %>%
filter(year_month %in% as.Date(c("2023-01-01",
"2023-02-01",
"2023-03-01",
"2023-04-01",
"2023-05-01",
"2023-06-01",
"2023-07-01",
"2023-08-01",
"2023-09-01",
"2023-10-01",
"2023-11-01",
"2023-12-01")))
Lithium_SCMD <- Lithium_SCMD %>%
filter(indicative_cost >= 0)
# changing from 9 to 7 regions, beause the primary care dataset has 7 regions:
# Step 1: Merging specific regions: North East and Yorkshire and the Humber and the east and west midlands
# Replace specific regions with broader region names
Lithium_SCMD$region_grouped <- with(Lithium_SCMD,
ifelse(region %in% c("East Midlands", "West Midlands"), "Midlands",
ifelse(region %in% c("North East", "Yorkshire and The Humber"), "North East & Yorkshire",
region)))
View(Lithium_SCMD)
unique(Lithium_SCMD$region_grouped)
# Removing ampoule from Lithium_SCMD$unit_dose_uom because lithium injectable is not used in secondary care Orla said
unique(Lithium_SCMD$unit_dose_uom)
Lithium_SCMD <- Lithium_SCMD %>%
filter(unit_dose_uom != "ampoule")
# Creating a quantity_mg column for regional analysis
quantity_mg = Lithium_SCMD$strnt_nmrtr_val * Lithium_SCMD$quantity
Lithium_SCMD <- Lithium_SCMD %>%
mutate(quantity_mg = quantity_mg)
# regional breakdown
# SPECIFYING - REGION ORDERS ON THE X-AXIS FOR MY PLOTS:
# Specify region orders
secondary_region_order <- c("London", "Midlands", "North West", "North East & Yorkshire", "South West", "South East", "East of England")
# Regional plot, vtm_name: Lithium carbonate vs Citrate for secondary care
# Secondary Care Plot
vtm_regional_secondary_plot <- ggplot(Lithium_SCMD, aes(x = factor(region_grouped, levels = secondary_region_order), y = quantity_mg / 1e6, fill = vtm_name)) +
geom_bar(stat = "identity", width = 0.7) + # Adjust bar width for thinner bars
labs(title = "Secondary Care: Regional Breakdown of Lithium Usage",
subtitle = "Analysis of Lithium (mg) usage by region for secondary care settings 2023",
x = "Region",
y = "Lithium usage (millions of mg)",
fill = "Lithium Carbonate | Lithium Citrate") +
scale_y_continuous(labels = label_comma(), expand = c(0, 0)) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 12), # Larger x-axis labels
axis.title = element_text(size = 14, face = "bold"), # Bold axis titles
plot.title = element_text(size = 16, face = "bold"), # Larger and bold title
plot.subtitle = element_text(size = 12), # Subtitle
legend.title = element_text(size = 12, face = "bold"), # Bold legend title
legend.text = element_text(size = 10))
plot(vtm_regional_secondary_plot)
# Regional plot, vmp_name: Lithium carbonate vs Citrate products
vmp_regional_secondary_plot = ggplot(Lithium_SCMD, aes(x = factor(region_grouped, levels = secondary_region_order), y = quantity_mg / 1e6, fill = vmp_name)) +
geom_bar(stat = "identity", width = 0.7) + # Thinner bars
labs(title = "Secondary Care: Regional Breakdown of Lithium Usage",
x = "Region",
y = "Lithium usage (millions of mg)", # Y-axis label unchanged
fill = "VMP Name") +
scale_y_continuous(labels = scales::label_comma()) + # Ensure y-axis labels are formatted with commas
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 12), # Larger x-axis labels
axis.title = element_text(size = 14, face = "bold"), # Bold axis titles
plot.title = element_text(size = 16, face = "bold"), # Larger and bold title
plot.subtitle = element_text(size = 12), # Subtitle
legend.title = element_text(size = 12, face = "bold"), # Bold legend title
legend.text = element_text(size = 10)) # Legend text size
plot(vmp_regional_secondary_plot)
# organisational breakdown
# Organisation breakdown in MGs for Lithium Carbonate and Citrate:
# PLOTTING top 50 organisations to help with readability:
top_50_data <- Lithium_SCMD %>%
group_by(ods_name) %>%
summarise(total_quantity = sum(quantity_mg)) %>%
arrange(desc(total_quantity)) %>%
slice(1:50)
top_50_data <- top_50_data %>%
inner_join(Lithium_SCMD, by = "ods_name")
# Plotting top 50 organisations: Organisation breakdown in MGs for Lithium Carbonate and Citrate - vtm
ggplot(top_50_data, aes(x = reorder(ods_name, total_quantity), y = quantity_mg, fill = vtm_name)) +
geom_bar(stat = "identity") +
labs(title = "Top 50 Organisations' Lithium Usage 2023",
x = "Organisation (ods_name)",
y = "Lithium usage (mg)",
fill = "VTM Name") +
theme_minimal() +
theme(axis.text.y = element_blank(), # Hide y-axis labels
axis.text.x = element_text(angle = 90, hjust = 1, size = 12, face = "bold"), # Bold and larger x-axis labels
axis.title.x = element_text(size = 14, face = "bold"), # Bold x-axis title
axis.title.y = element_text(size = 14, face = "bold"), # Bold y-axis title
plot.title = element_text(size = 16, face = "bold"), # Bold title
plot.subtitle = element_text(size = 12, face = "bold"), # Bold subtitle (if needed)
legend.title = element_text(size = 12, face = "bold"), # Bold legend title
legend.text = element_text(size = 10)) # Legend text size
# Plotting top 50: Organisational breakdown in MGs for the products of Lithium citrate and carbonate - vmp
ggplot(top_50_data, aes(x = reorder(ods_name, total_quantity), y = quantity_mg, fill = vmp_name)) +
geom_bar(stat = "identity") +
labs(title = "Top 50 Organisations' Lithium Usage (products), 2023",
x = "Organisation (ods_name)",
y = "Lithium usage (mg)",
fill = "VMP Name") +
theme_minimal() +
theme(axis.text.y = element_blank(), # Hide y-axis labels
axis.text.x = element_text(angle = 90, hjust = 1, size = 12, face = "bold"), # Bold and larger x-axis labels
axis.title.x = element_text(size = 14, face = "bold"), # Bold x-axis title
axis.title.y = element_text(size = 14, face = "bold"), # Bold y-axis title
plot.title = element_text(size = 16, face = "bold"), # Bold title
plot.subtitle = element_text(size = 12, face = "bold"), # Bold subtitle (if needed)
legend.title = element_text(size = 12, face = "bold"), # Bold legend title
legend.text = element_text(size = 10)) # Legend text size
# Secondary care Mapping plot, 2023 mg / population
################################################
# NEW SHAPEFILE - it was in a python format so had to make RDS
library(sf)
# Read the GeoJSON file
geojson_file <- "/Users/K22007671/OneDrive - King's College London/Desktop/Research methods 1 2022/Represent/new_nuts_shp.txt"
geojson_data <- st_read(geojson_file)
# Save the object as an RDS file
saveRDS(geojson_data, file = "/Users/K22007671/OneDrive - King's College London/Desktop/Research methods 1 2022/Represent/new_nuts_shp.rds")
# Load the RDS file
nuts_shp <- readRDS( "/Users/K22007671/OneDrive - King's College London/Desktop/Research methods 1 2022/Represent/new_nuts_shp.rds")
View(nuts_shp)
# Ensuring geometries are valid
nuts_shp <- st_make_valid(nuts_shp)
View(nuts_shp)
########################################################
# Secondary care Mapping plot, focusing on all of 2023 mg / population
# getting the sum of mg per region
unique(Lithium_SCMD$region_grouped)
lithium_df<-Lithium_SCMD %>%
# getting the sum of mg per region
group_by(region_grouped) %>%
summarise(quantity_mg =sum(quantity_mg)) %>%
mutate(name = case_when(
region_grouped =="East of England"~"UKH",
region_grouped=="North West"~"UKD",
region_grouped=="North East & Yorkshire"~"UKE",
region_grouped=="London"~"UKF",
region_grouped=="Midlands"~"UKG",
region_grouped=="South East"~"UKJ",
region_grouped=="South West"~"UKK"),
region = as.factor(region_grouped)) %>%
filter(!is.na(region_grouped))
View(lithium_df)
twenty_three <- Lithium_SCMD %>%
filter(year_month %in% as.Date(c("2023-01-01",
"2023-02-01",
"2023-03-01",
"2023-04-01",
"2023-05-01",
"2023-06-01",
"2023-07-01",
"2023-08-01",
"2023-09-01",
"2023-10-01",
"2023-11-01",
"2023-12-01")))
View(twenty_three)
twenty_three <- twenty_three %>%
mutate(twentythree_quantity_mg = strnt_nmrtr_val * quantity)
# Calculate the total mg of Lithium for each region in 22023
total_mg_by_region <- twenty_three%>%
group_by(region_grouped) %>%
summarize(total_mg2023 = sum(twentythree_quantity_mg, na.rm = TRUE))
View(total_mg_by_region)
secondary_lithium_df <- lithium_df %>%
mutate(total_mg_by_region)
# Now adding population data to this dataset, that I got from population_data, estimates from ONS
NorthEast_Yorkt_pop = 8224302
NorthWest_pop = 7516113
midlands_pop = 10956592
east_pop = 6398497
london_pop = 8866180
SouthEast_pop = 9379833
SouthWest_pop = 5764881
secondary_lithium_df <- secondary_lithium_df %>%
mutate(population = case_when(
region == "North East & Yorkshire" ~ NorthEast_Yorkt_pop,
region == "North West" ~ NorthWest_pop,
region == "Midlands" ~ midlands_pop,
region == "East of England" ~ east_pop,
region == "London" ~ london_pop,
region == "South East" ~ SouthEast_pop,
region == "South West" ~ SouthWest_pop,
))
View(secondary_lithium_df)
secondary_lithium_df <- secondary_lithium_df %>%
mutate(`mg/population(2023)` = total_mg2023 / population)
# the names in the shape file and my regions in my lithium_df are different, so fixing this
mapping_table <- tibble(
name = c(
"LONDON COMMISSIONING REGION",
"SOUTH WEST COMMISSIONING REGION",
"SOUTH EAST COMMISSIONING REGION",
"MIDLANDS COMMISSIONING REGION",
"EAST OF ENGLAND COMMISSIONING REGION",
"NORTH WEST COMMISSIONING REGION",
"NORTH EAST AND YORKSHIRE COMMISSIONING REGION"
),
region = c(
"London",
"South West",
"South East",
"Midlands",
"East of England",
"North West",
"North East & Yorkshire"
)
)
library(dplyr)
# Join nuts_shp with the mapping table
nuts_shp <- nuts_shp %>%
left_join(mapping_table, by = "name")
# Join the datasets ~
coverage_data <- nuts_shp %>%
left_join(secondary_lithium_df, by = "region") # Join using the correct column names
View(coverage_data)
# Extract unique values for breaks and labels
unique_values <- sort(unique(coverage_data$`mg/population(2023)`))
breaks <- c(0, unique_values, max(unique_values, na.rm = TRUE) + 10)
labels <- scales::label_number(breaks)
# Plot with specified color scale
legend_labels <- c( "0" = "0",paste0(sort(round(secondary_lithium_df$`mg/population(2023)`, 1)), " mg/population"), "120" = "120")
coverage_plot <- coverage_data %>%
ggplot(aes(geometry = geometry, fill = `mg/population(2023)`)) +
geom_sf(lwd = 0.8, colour = 'black') +
geom_sf_label(aes(label = region), # Use `region` for labels
colour = "white",
label.size = 0.1,
label.r = unit(0.5, "lines"),
fun.geometry = st_centroid,
show.legend = FALSE) +
scale_fill_gradientn(
colors = c("#ffd13a", "#ff7c00", "#f20c51"),
breaks = breaks,
labels = labels
) +
theme_minimal() +
theme(
legend.position = c(0.2, 0.5),
legend.text = element_text(hjust = 1),
panel.background = element_rect(fill = "white")
) +
ggtitle("Secondary care: Total mg of lithium used in 2023 regionally/ population estimates") +
guides(fill = guide_legend(title = "mg Lithium/ population")) +
xlab("") +
ylab("")
# Display the plot
plot(coverage_plot)
# Acconpanying barplot: 2023/pop
view(secondary_lithium_df)
# ammended hist
barplot_mg_2023_secondary = ggplot(secondary_lithium_df, aes(x = region_grouped, y = `mg/population(2023)`)) +
geom_col(fill = "#FF0000" , color = "#FF6f6f")+
# Use geom_col() for bar plots where the height of bars is defined by y
geom_text(aes(label = round(`mg/population(2023)`, 1)), vjust = -0.3, size = 3.5) + # Add data labels
theme_minimal() +
xlab("Region") +
ylab("Lithium usage (mg) / population") +
ggtitle("Secondary Care: Total Regional Lithium Usage (mg) for 2023 / Population Estimates") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 10), #
axis.text.y = element_text(size = 10),
plot.title = element_text(size = 12, face = "bold"),
plot.margin = margin(10, 10, 10, 10) # Adjusting the plot margins for better spacing
)
plot(barplot_mg_2023_secondary)
### primary care analysis ###
##PRI##PRI##PRIMARY ANALYSIS:
PrimaryCare_Lithium= read.csv('df_primarycare.csv')
unique(PrimaryCare_Lithium$month) # STARTS FROM APRIL 2019 LIKE THE scmd DATASET
# I need to link practice codes to regions
Practise_codes= read.csv('practice_codes.csv')
# I NEED TO MERGE: PRACTICE of primarycare_lihtium to the column 'code' of practice codes
# Creating a merged dataset so now I have practice codes and their associated region
merged_data <- PrimaryCare_Lithium %>%
left_join(Practise_codes, by = c("practice" = "code"))
View(merged_data)
head(merged_data)
#strnt_nmrtr_val
df_primarycare2= read.csv('df_primarycare2.csv')
df_primarycare2 <- read_csv(file.choose())
View(df_primarycare2)
PRIMARYCARE_dataset <- merge(merged_data,
df_primarycare2[, c("bnf_code", "strnt_nmrtr_val")],
by = "bnf_code",
all.x = TRUE)
View(PRIMARYCARE_dataset)
unique(PRIMARYCARE_dataset$setting)
PRIMARYCARE_dataset <- subset(PRIMARYCARE_dataset, setting == 4) # So it just includes GPS
View(PRIMARYCARE_dataset)
PRIMARYCARE_dataset <- PRIMARYCARE_dataset %>%
mutate(month = as.Date(month))
PRIMARYCARE_dataset <- PRIMARYCARE_dataset %>%
filter(month %in% as.Date(c("2023-01-01",
"2023-02-01",
"2023-03-01",
"2023-04-01",
"2023-05-01",
"2023-06-01",
"2023-07-01",
"2023-08-01",
"2023-09-01",
"2023-10-01",
"2023-11-01",
"2023-12-01")))
# NOW I CAN CACULATE Quantity in mg of Lithium: because I have strnt_nmrtr_val and quantity
# HOW i calculated it last time:
quantity_mg = PRIMARYCARE_dataset$strnt_nmrtr_val * PRIMARYCARE_dataset$quantity
PRIMARYCARE_dataset <- PRIMARYCARE_dataset %>%
mutate(quantity_mg = quantity_mg)
View(PRIMARYCARE_dataset)
# ADDING to the dataset/ cleaning up:
# adding a chemical column, so I know whether it is, carbonate or citrate
# Assuming 'bnf_name' is the column with the product names in your dataset
library(dplyr)
PRIMARYCARE_dataset <- PRIMARYCARE_dataset %>%
mutate(chemical = case_when(
bnf_name %in% c("Camcolit 250 tablets",
"Camcolit 400 modified-release tablets",
"Liskonum 450mg modified-release tablets",
"Lithium carbonate 200mg modified-release tablets",
"Lithium carbonate 200mg/5ml oral suspension",
"Lithium carbonate 250mg tablets",
"Lithium carbonate 400mg modified-release tablets",
"Lithium carbonate 450mg modified-release tablets",
"Lithonate 400mg modified-release tablets",
"Priadel 200mg modified-release tablets",
"Priadel 400mg modified-release tablets") ~ "Lithium Carbonate",
bnf_name %in% c("Li-Liquid 1.018g/5ml oral solution",
"Li-Liquid 509mg/5ml oral solution",
"Lithium citrate 1.018g/5ml oral solution",
"Lithium citrate 509mg/5ml oral solution",
"Lithium citrate 520mg/5ml oral solution sugar free",
"Priadel 520mg/5ml liquid") ~ "Lithium Citrate",
TRUE ~ "Other" # TO see if anything isn't attributed to the two groups
))
unique(PRIMARYCARE_dataset$chemical)
# Primary care, regional PLOTS, using regional_team
unique(PRIMARYCARE_dataset$regional_team)
# adding a regions column:
# Assuming your dataset is stored in a data frame called df
# Create a named vector for the mapping of regional_team to regions
region_mapping <- c(
"Y60" = "Midlands",
"Y63" = "North Eastern Yorkshire",
"Y59" = "South East",
"Y61" = "East of England",
"Y62" = "North West",
"Y56" = "London",
"Y58" = "South West"
)
# Add the new 'regions' column by mapping the 'regional_team' column
PRIMARYCARE_dataset$Region <- region_mapping[PRIMARYCARE_dataset$regional_team]
unique(PRIMARYCARE_dataset$Region)
View(PRIMARYCARE_dataset)
# vmp analysis later on I will need vmp_names, using the bnf names - TO ALLOCATE CORRECT VMP_NAMES:
bnf_to_vmp <- data.frame(
bnf_name = c(
"Camcolit 250 tablets",
"Camcolit 400 modified-release tablets",
"Li-Liquid 1.018g/5ml oral solution",
"Li-Liquid 509mg/5ml oral solution",
"Liskonum 450mg modified-release tablets",
"Lithium carbonate 200mg modified-release tablets",
"Lithium carbonate 200mg/5ml oral suspension",
"Lithium carbonate 250mg tablets",
"Lithium carbonate 400mg modified-release tablets",
"Lithium carbonate 450mg modified-release tablets",
"Lithium citrate 1.018g/5ml oral solution",
"Lithium citrate 509mg/5ml oral solution",
"Lithium citrate 520mg/5ml oral solution sugar free",
"Lithonate 400mg modified-release tablets",
"Priadel 200mg modified-release tablets",
"Priadel 400mg modified-release tablets",
"Priadel 520mg/5ml liquid"
),
VMP_name = c(
"Lithium carbonate 250mg tablets (Camcolit brand)",
"Lithium carbonate 400mg modified-release tablets (Camcolit brand)",
"Lithium citrate 1.018g/5ml oral solution (Li-Liquid brand)",
"Lithium citrate 509mg/5ml oral solution (Li-Liquid brand)",
"Lithium carbonate 450mg modified-release tablets (Liskonum brand)",
"Lithium carbonate 200mg modified-release tablets",
"Lithium carbonate 200mg/5ml oral suspension",
"Lithium carbonate 250mg tablets",
"Lithium carbonate 400mg modified-release tablets",
"Lithium carbonate 450mg modified-release tablets",
"Lithium citrate 1.018g/5ml oral solution",
"Lithium citrate 509mg/5ml oral solution",
"Lithium citrate 520mg/5ml oral solution sugar free",
"Lithium carbonate 400mg modified-release tablets (Lithonate brand)",
"Lithium carbonate 200mg modified-release tablets (Priadel brand)",
"Lithium carbonate 400mg modified-release tablets (Priadel brand)",
"Lithium citrate 520mg/5ml oral solution (Priadel brand)"
)
)
# merge function
PRIMARYCARE_dataset <- merge(PRIMARYCARE_dataset, bnf_to_vmp, by = "bnf_name", all.x = TRUE)
View(PRIMARYCARE_dataset)
# Regional plot, vtm_name: Lithium carbonate vs Citrate for Primary care
# Lithium carbonate VS citrate use regionally in primary care plot
citrate_color <- "#E69F00"
carbonate_color <- "#56B4E9"
primary_region_order <- c("London", "Midlands", "North West", "North Eastern Yorkshire", "South West", "South East", "East of England")
vtm_regional_primary_plot <- ggplot(PRIMARYCARE_dataset, aes(x = factor(Region, levels = primary_region_order), y = quantity_mg / 1e6, fill = chemical)) + # chemical = vtm in this dataset
geom_bar(stat = "identity", width = 0.7) + # Adjust bar width for thinner bars
labs(title = "Primary Care: Regional Breakdown of Lithium Usage",
subtitle = "Analysis of Lithium (mg) usage by region for primary care settings 2023",
x = "Region",
y = "Lithium usage (millions of mg)",
fill = "Lithium Carbonate | Lithium Citrate") +
scale_y_continuous(labels = scales::label_comma(), expand = c(0, 0)) +
scale_fill_manual(values = c("Lithium Citrate" = citrate_color, "Lithium Carbonate" = carbonate_color)) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 12), # Larger x-axis labels
axis.title = element_text(size = 14, face = "bold"), # Bold axis titles
plot.title = element_text(size = 16, face = "bold"), # Larger and bold title
plot.subtitle = element_text(size = 12), # Subtitle
legend.title = element_text(size = 12, face = "bold"), # Bold legend title
legend.text = element_text(size = 10))
# Now, I have both regional plots for vtm for primary and secondary 2023
combined_plot <- vtm_regional_primary_plot / vtm_regional_secondary_plot
plot(combined_plot)
# Regional plot, vmp_name: Lithium carbonate vs Citrate products in primary care
# Use a palette with 17 distinct colours - because in primary I have 17 VMPs
colors1 <- brewer.pal(12, "Set3") # 12 colors from Set3
colors2 <- brewer.pal(9, "Set1") # 9 colors from Set1
color_palette <- c(colors1, colors2) # Combine to create a palette of 21 colors
color_palette <- color_palette[1:17]
vmp_regional_primary_plot <- ggplot(PRIMARYCARE_dataset, aes(x = factor(Region, levels = primary_region_order), y = quantity_mg / 1e6, fill = VMP_name)) +
geom_bar(stat = "identity", width = 0.7) +
labs(title = "Primary Care: Regional Breakdown of Lithium Usage",
subtitle = "Detailed analysis of Lithium usage (mg) by VMP Name across different regions in primary care and secondary care 2023.",
x = "Region",
y = "Lithium usage (millions of mg)",
fill = "VMP Name") +
scale_y_continuous(labels = label_comma()) +
scale_fill_manual(values = color_palette) + # Apply the 17-color palette
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 12),
axis.title = element_text(size = 14, face = "bold"),
plot.title = element_text(size = 16, face = "bold"),
plot.subtitle = element_text(size = 12),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 10),
legend.key.height = unit(1, "cm"), # Adjust the height of the legend keys
legend.key.width = unit(0.5, "cm")) + # Adjust the width of the legend keys for more squishing
guides(fill = guide_legend(ncol = 2, byrow = TRUE, label.position = "right", title.position = "top"))
# Print the primary plot
print(vmp_regional_primary_plot)
# depeding on preference:
combined_plot <- vmp_regional_primary_plot | vmp_regional_secondary_plot
combined_plot <- vmp_regional_primary_plot / vmp_regional_secondary_plot
# Now, I have both regional plots for vmp for primary and secondary 2023
# Print the combined plot
plot(combined_plot)
# Plotting Lithium carbonate and citrate products used regionally - generic vs product
#Classify BNF names into Generic or Branded
PRIMARYCARE_dataset <- PRIMARYCARE_dataset %>%
mutate(bnf_type = case_when( bnf_name %in% c("Lithium carbonate 250mg tablets", "Lithium carbonate 400mg modified-release tablets",
"Lithium carbonate 450mg modified-release tablets",
"Lithium carbonate 200mg modified-release tablets",
"Lithium carbonate 200mg/5ml oral suspension",
"Lithium citrate 520mg/5ml oral solution sugar free",
"Lithium citrate 1.018g/5ml oral solution",
"Lithium citrate 509mg/5ml oral solution") ~ "Generic",
bnf_name %in%c("Camcolit 250 tablets",
"Camcolit 400 modified-release tablets",
"Liskonum 450mg modified-release tablets",
"Priadel 400mg modified-release tablets",
"Priadel 200mg modified-release tablets",
"Priadel 520mg/5ml liquid",
"Li-Liquid 509mg/5ml oral solution",
"Li-Liquid 1.018g/5ml oral solution") ~ "Branded", TRUE ~ "Unknown" ))
# PLOT
ggplot(PRIMARYCARE_dataset, aes(x = reorder(Region, -quantity_mg), y = quantity_mg / 1e6, fill = bnf_type)) +
geom_bar(stat = "identity", width = 0.6) + # Adjust bar width for smaller body
labs(title = "Primary Care Regional Breakdown of Lithium Usage",
subtitle = "Analysis of lithium usage in Primary care: Branded vs Generic for 2023",
x = "Region",
y = "Lithium Usage (mg in millions)", # Updated y-axis label
fill = "BNF Type") +
scale_fill_manual(values = c("Generic" = "#00A651", "Branded" = "#0072B2")) +
scale_y_continuous(labels = scales::label_comma()) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 12), # Larger x-axis labels
axis.title = element_text(size = 14, face = "bold"), # Bold axis titles
plot.title = element_text(size = 16, face = "bold"), # Bold title
plot.subtitle = element_text(size = 12, face = "italic"), # Italic subtitle
legend.title = element_text(size = 12, face = "bold"), # Bold legend title
legend.text = element_text(size = 10)) # Legend text size
############################## NEW PCD mapping code for all of 2023 #######################
# Mapping plot, primary care all of 2023 - I need tge mg for 2023 and the population data
primary_lithium_df<-PRIMARYCARE_dataset %>%
# getting the sum of mg per region
group_by(Region) %>%
summarise(quantity_mg =sum(quantity_mg)) %>%
###
mutate(nuts118cd = case_when(Region =="East of England"~"UKH",
Region=="North West"~"UKD",
Region=="North Eastern Yorkshire"~"UKE",
Region=="London"~"UKF",
Region=="Midlands"~"UKG",
Region=="South East"~"UKJ",
Region=="South West"~"UKK"),
region = as.factor(Region)) %>%
filter(!is.na(Region))
PRIMARYCARE_dataset <- PRIMARYCARE_dataset %>%
mutate(month = as.Date(month))
unique(PRIMARYCARE_dataset$month)
PRIMARY_twenty_three <- PRIMARYCARE_dataset %>%
filter(month %in% as.Date(c("2023-01-01",
"2023-02-01",
"2023-03-01",
"2023-04-01",
"2023-05-01",
"2023-06-01",
"2023-07-01",
"2023-08-01",
"2023-09-01",
"2023-10-01",
"2023-11-01",
"2023-12-01")))
View(PRIMARY_twenty_three)
PRIMARY_twenty_three <- PRIMARY_twenty_three %>%
mutate(twenty_three_quantity_mg = strnt_nmrtr_val * quantity)
# Calculate the total mg of Lithium for each region in primary care 2023
primary_total_mg_by_region <- PRIMARY_twenty_three %>%
group_by(Region) %>%
summarize(total_mg_2023 = sum(twenty_three_quantity_mg, na.rm = TRUE))
View(primary_total_mg_by_region)
View(primary_lithium_df) # THIS IS the dataset i have been using for the mapping code,
#adding this column to of total_mg for 2023 for each region to lithium_df
primary_lithium_df <- primary_total_mg_by_region %>%
mutate(primary_total_mg_by_region)
View(primary_lithium_df)
# Now adding population data to this dataset, that I got from ONS statistics
NorthEast_Yorkt_pop = 8224302
NorthWest_pop = 7516113
midlands_pop = 10956592
east_pop = 6398497
london_pop = 8866180
SouthEast_pop = 9379833
SouthWest_pop = 5764881
primary_lithium_df <- primary_lithium_df %>%
mutate(population = case_when(
Region == "North Eastern Yorkshire" ~ NorthEast_Yorkt_pop,
Region == "North West" ~ NorthWest_pop,
Region == "Midlands" ~ midlands_pop,
Region == "East of England" ~ east_pop,
Region == "London" ~ london_pop,
Region == "South East" ~ SouthEast_pop,
Region == "South West" ~ SouthWest_pop,
))
View(primary_lithium_df)
# this is what I'll be plotting - NOW calculating the total_mg_lithium(mg)/ population for the first 6 months of 2022. (Making is more specific as it was in the last mapping code the 5 years of lihtium stock)
primary_lithium_df <- primary_lithium_df %>%
mutate(`mg/population(2023)` = total_mg_2023 / population)
View(primary_lithium_df)
# there are differences in the region names I need to fix, specifically in "North East & Yorkshire"/ "North Eastern Yorkshire"
View(nuts_shp)
View(primary_lithium_df)
mapping <- data.frame(
lithium_region = c( "North Eastern Yorkshire", "London", "Midlands", "East of England", "South East", "South West", "North West"),
nuts_shp_region = c( "North East & Yorkshire", "London", "Midlands", "East of England", "South East", "South West", "North West"),
stringsAsFactors = FALSE
)
# Join on the correct column names
primary_lithium_df <- primary_lithium_df %>%
left_join(mapping, by = c("Region" = "lithium_region"))
# Inspect the joined dataset
primary_coverage_data <- nuts_shp %>%
left_join(primary_lithium_df, by = c("region" = "nuts_shp_region"))
View(primary_coverage_data)
unique_values <- sort(unique(primary_lithium_df$`mg/population(2023)`))
# Define labels for the legend
legend_labels <- scales::label_number()(unique_values)
# Plot with specified color scale
coverage_plot <- primary_coverage_data %>%
ggplot(aes(geometry = geometry, fill = `mg/population(2023)`)) +
geom_sf(lwd = 0.8, colour = 'black') +
geom_sf_label(aes(label = region), # Use `region` for labels
colour = "white",
label.size = 0.1,
label.r = unit(0.5, "lines"),
fun.geometry = st_centroid,
show.legend = FALSE) +
scale_fill_gradientn(
colors = c("#00A651", "#0072B2", "#CC79A7"),
breaks = unique_values, # Use unique values for breaks
labels = legend_labels # Use labels based on unique values
) +
theme_minimal() +
theme(
legend.position = c(0.2, 0.5),
legend.text = element_text(hjust = 1),
panel.background = element_rect(fill = "white")
) +
ggtitle("Primary Care: Total mg of lithium used in 2023 regionally/ population estimates") +
guides(fill = guide_legend(title = "mg Lithium/ population")) +
xlab("") +
ylab("")
# Display the plot
plot(coverage_plot)
barplot_mg_2023_primary = ggplot(primary_coverage_data, aes(x = region, y = `mg/population(2023)`)) +
geom_col(fill = "#008080" , color = "#E0FFFF")+ # Add border color and size
# Use geom_col() for bar plots where the height of bars is defined by y
geom_text(aes(label = round(`mg/population(2023)`, 1)), vjust = -0.3, size = 3.5) + # Add data labels
theme_minimal() +
xlab("Region") +
ylab("Lithium usage (mg) / population") +
ggtitle("Primary Care: Total Regional Lithium Usage (mg) for 2023 / Population Estimates") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 10), # Adjust angle and size of x-axis labels
axis.text.y = element_text(size = 10), # Adjust size of y-axis labels
plot.title = element_text(size = 12, face = "bold"), # Adjust title size and style
plot.margin = margin(10, 10, 10, 10) # Adjust plot margins for better spacing
)
plot(barplot_mg_2023_primary)
# now I have bar plots for 2023 for both primary and secondary
grid.arrange(barplot_mg_2023_primary, barplot_mg_2023_secondary, ncol = 1)