Demographical and Media Research on the Filipino Canadian Community (Elections Canada Special Project)


# R code used for this research #

#################################
## INSTALL NECESSARY PACKAGES  ##
#################################
install.packages("data.table")
install.packages("funModeling")
install.packages("Hmisc")
install.packages("ggplot2")
install.packages("maps")
install.packages("tmap")
install.packages("sf")
install.packages("dplyr")
install.packages("corrplot")
install.packages("CatEncoders")

library(data.table)
library(funModeling)
library(Hmisc)
library(ggplot2)
library(maps)
library(tmap)
library(sf)
library(dplyr)
library(corrplot)
library(CatEncoders)

###############################
## IMPORT AND EXPLORE DATA  ##
###############################

Data <- fread("C:/Users/LB0009/Downloads/Scraped Data From Statistics Canada.csv")
View(Data)

head(Data)
tail(Data)
nrow(Data)
ncol(Data)
df_status(Data)
#There are two na values. That will be dealt with later.

#Summary of Data
summary(Data)

#Distribution of Numerical Variables
plot_num(Data) #Graphical
profiling_num(Data) #Numerical

#Distribution of Categorical Variables
freq(Data)

#############################################
## PLOT DISTRIBUTION OF CERTAIN VARIABLES  ##
#############################################

###DISTRIBUTION OF NATIONAL FILIPINO POPULATION (2016 AND 2021)

hist(Data$`Number of Filipinos (2021 Census)`, xlab = 'Number of Filipinos', 
     ylab = 'Number of Districts', main = 'Distribution of Filipino Population by Electoral District (2021)'
     ,breaks = 100
     , col = 'blue'
     , border = "white")
hist(Data$`Number of Filipinos (2016 Census)`, xlab = 'Number of Filipinos', 
     ylab = 'Number of Districts', main = 'Distribution of Filipino Population by Electoral District (2016)'
     ,breaks = 100
     , col = 'blue'
     , border = "white")

###DISTRIBUTION OF NATIONAL FILIPINO POPULATION, PROVINCES (2021)

#First, the data will be grouped by provinces for ease of analysis.
NLData <- Data[Data$`Province/Territory`=="Newfoundland and Labrador",]
PEIData <- Data[Data$`Province/Territory`=="PEI",]
NovaScotiaData <- Data[Data$`Province/Territory`=="Nova Scotia",]
NewBrunswickData <- Data[Data$`Province/Territory`=="New Brunswick",]
QuebecData <- Data[Data$`Province/Territory`=="Quebec",]
OntarioData <- Data[Data$`Province/Territory`=="Ontario",]
ManitobaData <- Data[Data$`Province/Territory`=="Manitoba",]
SaskData <- Data[Data$`Province/Territory`=="Saskatchewan",]
AlbertaData <- Data[Data$`Province/Territory`=="Alberta",]
BCData <- Data[Data$`Province/Territory`=="British Columbia",]


#Now histograms can be plotted

hist(Data$`% Filipinos`, xlab = '% of Population that is Filipino', 
     ylab = 'Number of Districts', main = 'Proportional Distribution of Filipino Population by Electoral District (2021)'
     , col = 'blue'
     , border = "white")


hist(NLData$`% Filipinos`, xlab = '% of Population that is Filipino', 
     ylab = 'Number of Districts', main = 'Proportional Distribution of Filipino Population by Electoral District (2021)-Newfoundland and Labrador'
     , col = 'blue'
     , border = "white")

hist(PEIData$`% Filipinos`, xlab = '% of Population that is Filipino', 
     ylab = 'Number of Districts', main = 'Proportional Distribution of Filipino Population by Electoral District (2021)-Prince Edward Island'
     , col = 'blue'
     , border = "white")

hist(NovaScotiaData$`% Filipinos`, xlab = '% of Population that is Filipino', 
     ylab = 'Number of Districts', main = 'Proportional Distribution of Filipino Population by Electoral District (2021)-Nova Scotia'
     , col = 'blue'
     , border = "white")

hist(NewBrunswickData$`% Filipinos`, xlab = '% of Population that is Filipino', 
     ylab = 'Number of Districts', main = 'Proportional Distribution of Filipino Population by Electoral District (2021)-New Brunswick'
     , col = 'blue'
     , border = "white")

hist(QuebecData$`% Filipinos`, xlab = '% of Population that is Filipino', 
     ylab = 'Number of Districts', main = 'Proportional Distribution of Filipino Population by Electoral District (2021)-Quebec'
     , col = 'blue'
     , breaks = 100
     , border = "white")

hist(OntarioData$`% Filipinos`, xlab = '% of Population that is Filipino', 
     ylab = 'Number of Districts', main = 'Proportional Distribution of Filipino Population by Electoral District (2021)-Ontario'
     , col = 'blue'
     , breaks = 100
     , border = "white")

hist(ManitobaData$`% Filipinos`, xlab = '% of Population that is Filipino', 
     ylab = 'Number of Districts', main = 'Proportional Distribution of Filipino Population by Electoral District (2021)-Manitoba'
     , col = 'blue'
     , border = "white")

hist(SaskData$`% Filipinos`, xlab = '% of Population that is Filipino', 
     ylab = 'Number of Districts', main = 'Proportional Distribution of Filipino Population by Electoral District (2021)-Saskatchewan'
     , col = 'blue'
     , border = "white")

hist(AlbertaData$`% Filipinos`, xlab = '% of Population that is Filipino', 
     ylab = 'Number of Districts', main = 'Proportional Distribution of Filipino Population by Electoral District (2021)-Alberta'
     , col = 'blue'
     , breaks = 50
     , border = "white")

hist(BCData$`% Filipinos`, xlab = '% of Population that is Filipino', 
     ylab = 'Number of Districts', main = 'Proportional Distribution of Filipino Population by Electoral District (2021)-British Columbia'
     , col = 'blue'
     , breaks = 50
     , border = "white")

###GROWTH RATE OF FILIPINO POPULATION PER DISTRICT, PROVINCES (2016-2021)

#Some districts may have had a Filipino population of o in 2016 but grew by 2021.
#Mathematically, there would be no growth rate, as shown by na values.

Data_na <- Data[!complete.cases(Data),]
View(Data_na)

#A district in New Brunswick, and another in Quebec exhibit  such behaviour.
#For the purpose of this specific analysis, both rows will be dropped.

Data_No_na <- Data[complete.cases(Data), ]
NewBrunswick_No_na <- NewBrunswickData[complete.cases(NewBrunswickData), ]
Quebec_No_na <- QuebecData[complete.cases(QuebecData), ]
df_status(Data_No_na)


#Completing the analysis


hist(Data_No_na$`Filipino Growth Rate % (2016-2021)`, xlab = '% Change from 2016', 
     ylab = 'Number of Districts', main = 'Distribution of Filipino Growth by Electoral District (2021)'
     , breaks = 100
     , col = 'blue'
     , border = "white")

hist(NLData$`Filipino Growth Rate % (2016-2021)`, xlab = '% Change from 2016', 
     ylab = 'Number of Districts', main = 'Distribution of Filipino Growth by Electoral District (2021)-Newfoundland and Labrador'
     , col = 'blue'
     , border = "white")

hist(PEIData$`Filipino Growth Rate % (2016-2021)`, xlab = '% Change from 2016', 
     ylab = 'Number of Districts', main = 'Distribution of Filipino Growth by Electoral District (2021)-Prince Edward Island'
     , col = 'blue'
     , border = "white")

hist(NovaScotiaData$`Filipino Growth Rate % (2016-2021)`, xlab = '% Change from 2016', 
     ylab = 'Number of Districts', main = 'Distribution of Filipino Growth by Electoral District (2021)-Nova Scotia'
     , col = 'blue'
     , border = "white")

hist(NewBrunswick_No_na$`Filipino Growth Rate % (2016-2021)`, xlab = '% Change from 2016', 
     ylab = 'Number of Districts', main = 'Distribution of Filipino Growth by Electoral District (2021)-New Brunswick'
     , col = 'blue'
     , border = "white")

hist(Quebec_No_na$`Filipino Growth Rate % (2016-2021)`, xlab = '% Change from 2016', 
     ylab = 'Number of Districts', main = 'Distribution of Filipino Growth by Electoral District (2021)-Quebec'
     , col = 'blue'
     , breaks = 100
     , border = "white")

hist(OntarioData$`Filipino Growth Rate % (2016-2021)`, xlab = '% Change from 2016', 
     ylab = 'Number of Districts', main = 'Distribution of Filipino Growth by Electoral District (2021)-Ontario'
     , col = 'blue'
     , breaks = 100
     , border = "white")

hist(ManitobaData$`Filipino Growth Rate % (2016-2021)`, xlab = '% Change from 2016', 
     ylab = 'Number of Districts', main = 'Distribution of Filipino Growth by Electoral District (2021)-Manitoba'
     , col = 'blue'
     , border = "white")

hist(SaskData$`Filipino Growth Rate % (2016-2021)`, xlab = '% Change from 2016', 
     ylab = 'Number of Districts', main = 'Distribution of Filipino Growth by Electoral District (2021)-Saskatchewan'
     , col = 'blue'
     , border = "white")

hist(AlbertaData$`Filipino Growth Rate % (2016-2021)`, xlab = '% Change from 2016', 
     ylab = 'Number of Districts', main = 'Distribution of Filipino Growth by Electoral District (2021)-Alberta'
     , col = 'blue'
     , breaks = 50
     , border = "white")

hist(BCData$`Filipino Growth Rate % (2016-2021)`, xlab = '% Change from 2016', 
     ylab = 'Number of Districts', main = 'Distribution of Filipino Growth by Electoral District (2021)-British Columbia'
     , col = 'blue'
     , breaks = 50
     , border = "white")

###PROPORTION OF RECENT IMMIGRANTS (2016-2021) THAT WERE FROM THE PHILIPPINES

hist(Data$`% of Recent Immigrants from PH (2016-2021)`, xlab = '% Recent Immigrants from the Philippines (2016-2021)', 
     ylab = 'Number of Districts', main = 'Distribution of Recent Filipino Immigrants by Electoral District (2021)'
     , breaks = 100
     , col = 'blue'
     , border = "white")

hist(NLData$`% of Recent Immigrants from PH (2016-2021)`, xlab = '% Recent Immigrants from the Philippines (2016-2021)', 
     ylab = 'Number of Districts', main = 'Distribution of Recent Filipino Immigrants by Electoral District (2021)-Newfoundland and Labrador'
     , col = 'blue'
     , border = "white")

hist(PEIData$`% of Recent Immigrants from PH (2016-2021)`, xlab = '% Recent Immigrants from the Philippines (2016-2021)', 
     ylab = 'Number of Districts', main = 'Distribution of Recent Filipino Immigrants by Electoral District (2021)-Prince Edward Island'
     , col = 'blue'
     , border = "white")

hist(NovaScotiaData$`% of Recent Immigrants from PH (2016-2021)`, xlab = '% Recent Immigrants from the Philippines (2016-2021)', 
     ylab = 'Number of Districts', main = 'Distribution of Recent Filipino Immigrants by Electoral District (2021)-Nova Scotia'
     , col = 'blue'
     , border = "white")

hist(NewBrunswickData$`% of Recent Immigrants from PH (2016-2021)`, xlab = '% Recent Immigrants from the Philippines (2016-2021)', 
     ylab = 'Number of Districts', main = 'Distribution of Recent Filipino Immigrants by Electoral District (2021)-New Brunswick'
     , col = 'blue'
     , border = "white")

hist(QuebecData$`% of Recent Immigrants from PH (2016-2021)`, xlab = '% Recent Immigrants from the Philippines (2016-2021)', 
     ylab = 'Number of Districts', main = 'Distribution of Recent Filipino Immigrants by Electoral District (2021)-Quebec'
     , col = 'blue'
     , breaks = 100
     , border = "white")

hist(OntarioData$`% of Recent Immigrants from PH (2016-2021)`, xlab = '% Recent Immigrants from the Philippines (2016-2021)', 
     ylab = 'Number of Districts', main = 'Distribution of Recent Filipino Immigrants by Electoral District (2021)-Ontario'
     , col = 'blue'
     , breaks = 100
     , border = "white")

hist(ManitobaData$`% of Recent Immigrants from PH (2016-2021)`, xlab = '% Recent Immigrants from the Philippines (2016-2021)', 
     ylab = 'Number of Districts', main = 'Distribution of Recent Filipino Immigrants by Electoral District (2021)-Manitoba'
     , col = 'blue'
     , border = "white")

hist(SaskData$`% of Recent Immigrants from PH (2016-2021)`, xlab = '% Recent Immigrants from the Philippines (2016-2021)', 
     ylab = 'Number of Districts', main = 'Distribution of Recent Filipino Immigrants by Electoral District (2021)-Saskatchewan'
     , col = 'blue'
     , border = "white")

hist(AlbertaData$`% of Recent Immigrants from PH (2016-2021)`, xlab = '% Recent Immigrants from the Philippines (2016-2021)', 
     ylab = 'Number of Districts', main = 'Distribution of Recent Filipino Immigrants by Electoral District (2021)-Alberta'
     , col = 'blue'
     , breaks = 50
     , border = "white")

hist(BCData$`% of Recent Immigrants from PH (2016-2021)`, xlab = '% Recent Immigrants from the Philippines (2016-2021)', 
     ylab = 'Number of Districts', main = 'Distribution of Recent Filipino Immigrants by Electoral District (2021)-British Columbia'
     , col = 'blue'
     , breaks = 50
     , border = "white")

###########################
## CORRELATION ANALYSIS  ##
###########################

#Perform label encoding of Province/Territory, a categorical variable
#define original categorical labels
labs = LabelEncoder.fit(Data$`Province/Territory`)

#convert labels to numeric values
Data$`Province/Territory` = transform(labs, Data$`Province/Territory`)

#Extract socioeconomic variables for analysis
Data_for_CorrAnalysis <- Data[ , c("Province/Territory","% Filipinos"
                                   ,"% of Population with Knowledge of French","Median Age","Healthcare Workers (per 100,000)"
                                   ,"% of Recent Immigrants from PH (2016-2021)"
                                   , "Number of People with Bachelors Degree or Higher (per 100,000)")]
View(Data_for_CorrAnalysis)

M <- cor(Data_for_CorrAnalysis)
corrplot(M, method = 'number',sig.level = 0.05)

###########################
## GEOSPATIAL ANALYSIS  ##
###########################

#Download shapefile of elecotral boundaries (2013 representation order)
my_sf <- read_sf("C:/Users/LB0009/Downloads/lfed000b21a_e")

head(my_sf)

#Merge shapefile with csv file, joined through the names of the electoral districts
my_sf_merged <- my_sf %>%
  left_join(Data, by = c("FEDENAME" = "Electoral District (2013 Representation Order)"))

#Map % of Filipinos by Riding
my_sf_merged$FilipinoPercentClass <- 
  cut(my_sf_merged$`% Filipinos`, breaks =c(-Inf,1,2.5,5,10,20,50,Inf),
      labels=c('>0% but <1%%', '1-2.5%', '2.5-5%','5-10%', '10-20%', '20-50%', '>50%'))
Map1 <- ggplot(my_sf_merged) +
  geom_sf(aes(fill = FilipinoPercentClass), color='gray',data=my_sf_merged) +
  geom_sf(fill='transparent', color='white', data=my_sf_merged) +
  scale_fill_brewer(name='% Filipinos') +
  labs(title='Proportion of Filipinos by Riding',
       caption=c('Source: Statistics Canada')) +
  theme_gray() +
  theme(title=element_text(face='bold'), legend.position='bottom')
Map1


#Map % of Recent Immigrants (2016-2021) Coming from the Philippines
my_sf_merged$RecentImmClass <- 
  cut(my_sf_merged$`% of Recent Immigrants from PH (2016-2021)`, breaks=c(0,5,10,20,50,Inf),
      labels=c('<5%', '5-10%', '10-20%', '20-50%', '>50%'))
Map2 <- ggplot(my_sf_merged) +
  geom_sf(aes(fill = RecentImmClass), color='gray',data=my_sf_merged) +
  geom_sf(fill='transparent', color='white', data=my_sf_merged) +
  scale_fill_brewer(name='% Recent Immigrants from PH') +
  labs(title='Proportion of Recent Immigrants (2016-2021) That Come from The Philippines',
       caption=c('Source: Statistics Canada')) +
  theme_gray() +
  theme(title=element_text(face='bold'), legend.position='bottom')
Map2


#Map Filipino Growth Rate (2016-2021)
my_sf_merged$GrowthRateClass <- 
  cut(my_sf_merged$`Filipino Growth Rate % (2016-2021)`, breaks=c(-200,0,10,25,50,100,200,500,Inf),
      labels=c('<0%', '0-10%', '10-25%', '25-50%','50-100%','100-200','200-500%' ,'>500%'))
Map3 <- ggplot(my_sf_merged) +
  geom_sf(aes(fill = GrowthRateClass), color='transparent',data=my_sf_merged) +
  geom_sf(fill='transparent', color='white', data=my_sf_merged) +
  scale_fill_brewer(name='% Change, 2016-2021') +
  labs(title='Growth rate of Filipino population (2016-2021)',
       caption=c('Source: Statistics Canada')) +
  theme_gray() +
  theme(title=element_text(face='bold'), legend.position='bottom')
Map3
  


#Map Secondary Philippine Language Usage
my_sf_merged$SecondLangClass <- my_sf_merged$`Second Most Spoken Philippine Language`

Map4 <- ggplot(my_sf_merged) +
  geom_sf(aes(fill = SecondLangClass), color='transparent',data=my_sf_merged) +
  geom_sf(fill='transparent', color='white', data=my_sf_merged) +
  scale_fill_viridis_d(name='Secondary Language') +
  scale_fill_discrete() +
  scale_colour_grey() +
  labs(title='Usage of Secondary Filipino Languages',
       caption=c('Source: Statistics Canada')) +
  theme_void() +
  theme(title=element_text(face='bold'), legend.position='bottom')
Map4