- Analyzed by Thanatcha Khunkhet - Smart Data Center of The Department of Livestock Development (SDC DLD)
- Date of analysis: 3 - 8 January, 2019
- Data used: Domestic animal movement data (from e-movement), FMD outbreaks data (from e-smart surveillance)
Install packages
- only have to do if your r doesn’t have these packages
install.packages("tidyverse")
install.packages("dplyr")
install.packages("epitools") # for computing Relative Risk (RR)
Setup
library(tidyverse) # to use some data transformation functions outside of the built-in
## Warning: package 'tidyverse' was built under R version 3.5.2
## Warning: package 'dplyr' was built under R version 3.5.2
library(dplyr ) # to use some data transformation functions outside of the built-in
library(epitools) # for calculating relative risk
## Warning: package 'epitools' was built under R version 3.5.2
Sys.setlocale(locale = "Thai") # to use Thai language and local Thai time
options(scipen = 999) # to disable auto convert number to scientific notation
Read Data
R4_ani.csv
= ร.4
EXPORT_EXCEL_WITH_KKR3 (esmart).csv
= esmart surveillance
super_master_location.csv
= master amphur tambon province
r4_ani = read.csv("R4_ani.csv", encoding = "UTF-8", stringsAsFactors = FALSE)
fmd_report = read.csv("EXPORT_EXCEL_WITH_KKR3 (esmart).csv", encoding = "UTF-8", stringsAsFactors = FALSE)
super_master_loc = read.csv("super_master_location.csv", encoding = "UTF-8", stringsAsFactors = FALSE)
Create Column is_market
market_vec = lapply(r4_ani$src_name, function(x) {grepl("^ตลาด", x)})
market_vec = unlist(market_vec)
summary(market_vec)
## Mode FALSE TRUE
## logical 4419477 193637
r4_ani['is_market'] = market_vec
Check for duplicates and empties (and remove if have any)
- check master location’s tambon codes
anyDuplicated(super_master_loc$X.U.FEFF.MS_Sub.District.Code)
## [1] 0
- check r4 ani’s tambon (referred by tambon, amphur, province in thai)
# check duplicates
ind_dup_r4 = which(duplicated(r4_ani[,c('dest_tambon_th', "dest_amphur_th", "dest_province_th")]))
length(ind_dup_r4)
## [1] 4605924
- check r4 for the empty tambon
r4_ani[ind_dup_r4[1:3], c('dest_tambon_th', "dest_amphur_th", "dest_province_th")]
## dest_tambon_th dest_amphur_th dest_province_th
## 3 แสนแสบ มีนบุรี กรุงเทพมหานคร
## 6 ท่าเยี่ยม โชคชัย นครราชสีมา
## 10 ท่าเยี่ยม โชคชัย นครราชสีมา
empty_tambon_r4 = which(r4_ani$dest_tambon_th == '')
length(empty_tambon_r4)
## [1] 4268
empty_amphur_r4 = which(r4_ani$dest_amphur_th == '')
length(empty_amphur_r4)
## [1] 1
r4_ani[empty_amphur_r4[1:3],]
## cer_no cer_issuer_name
## 1274912 45840708000008 นายทนงศักดิ์ ขะพินิจ
## NA NA <NA>
## NA.1 NA <NA>
## cer_issuer_org mover_name
## 1274912 สำนักงานปศุสัตว์อำเภอวานรนิวาส วิเชียร ราชรินทร์
## NA <NA> <NA>
## NA.1 <NA> <NA>
## disease_free_region src_detail src_name src_addr_code src_moo
## 1274912 อื่นๆ ฟาร์มประยงค์ 188 5
## NA <NA> <NA> <NA> <NA> <NA>
## NA.1 <NA> <NA> <NA> <NA> <NA>
## src_soi src_road src_tambon_th src_tambon_eng src_amphur_th
## 1274912
## NA <NA> <NA> <NA> <NA> <NA>
## NA.1 <NA> <NA> <NA> <NA> <NA>
## src_amphur_en src_province_th src_province_en dest_detail
## 1274912 อื่นๆ
## NA <NA> <NA> <NA> <NA>
## NA.1 <NA> <NA> <NA> <NA>
## dest_name dest_addr_code dest_moo dest_soi dest_road
## 1274912 P0328028/2550 3
## NA <NA> <NA> <NA> <NA> <NA>
## NA.1 <NA> <NA> <NA> <NA> <NA>
## dest_tambon_th dest_tambon_en dest_amphur_th dest_amphur_en
## 1274912
## NA <NA> <NA> <NA> <NA>
## NA.1 <NA> <NA> <NA> <NA>
## dest_province_th dest_province_en cer_purpose ani_type ani_src
## 1274912 เข้าโรงฆ่า สัตว์เล็ก
## NA <NA> <NA> <NA> <NA> <NA>
## NA.1 <NA> <NA> <NA> <NA> <NA>
## ani_code ani_name species_code species_name amount unit
## 1274912 201 สุกร 02 สุกรขุน 20 ตัว
## NA NA <NA> <NA> <NA> NA <NA>
## NA.1 NA <NA> <NA> <NA> NA <NA>
## vehicle_no issue_date expire_date is_market date_issue_date
## 1274912 บจ 7031 มุกดาหาร 03-JUL-15 03-JUL-15 FALSE 2015-07-03
## NA <NA> <NA> <NA> NA <NA>
## NA.1 <NA> <NA> <NA> NA <NA>
Filter out r4 that has empty tambons
- store the result in
r4_ani_filtered
variable
r4_ani_filtered = filter(r4_ani, dest_tambon_th != '' & dest_amphur_th != '' & dest_province_th != '' )
dim(r4_ani_filtered) #4608846 43
## [1] 4608846 43
dim(r4_ani)# 4613114 43
## [1] 4613114 43
Get the tambon code for each r4 row
- This is done by joining the filtered r4 with the master location where the amphur tambon and province are equal.
- Noted that master location was prepared prior to this analysis. The master location can be used to convert emove location to the master location.
lj_r4_ani_filtered = left_join(r4_ani_filtered, super_master_loc, by = c("dest_tambon_th" = "EMV_Tumbon.Th_emove", "dest_amphur_th" = "EMV_Amphur.Th_emove", "dest_province_th" = "EMV_Province.Th_emove"))
- The number of rows of the leftjoin result should be the same as the datatable used in the left part of the join (i.e. r4_ani_filtered)
dim(lj_r4_ani_filtered)#4608846 51
## [1] 4608846 51
dim(r4_ani_filtered)
## [1] 4608846 43
Find out if each move (r4) caused the FMD outbreak (within the 30 days of moving)
result = list()
tambon_codes = list()
for(i in 1:dim(lj_r4_ani_filtered)[1]){
issue_date = lj_r4_ani_filtered[i, 'date_issue_date']
tambon_dest = lj_r4_ani_filtered[i, 'X.U.FEFF.MS_Sub.District.Code']
if (!is.na(tambon_dest)) {
matched = (fmd_report_date - issue_date <= 30 & fmd_report_date - issue_date >= 0) & (fmd_report$TambonCode == tambon_dest)
w = which(matched)
if (length(w) > 0) {
result[[i]] = w
}
}
}
length(result)
## [1] 4608791
dim(lj_r4_ani_filtered)[1]
## [1] 4608846
# unlist result to see the number of fmd outbreaks that matched
unlisted_result = unlist(result)
length(unlisted_result)
## [1] 13879
# see the number of moves that matched
ind_matched_date_tambon = c()
for(i in 1:length(result)){
if(!is.null(result[[i]])) {
ind_matched_date_tambon = c(ind_matched_date_tambon, i)
}
}
length(ind_matched_date_tambon) #13375
## [1] 13375
View how many outbreaks that one move matched
max_length = 0
length_result = c()
for(i in ind_matched_date_tambon) {
length_result = c(length_result, length(result[[i]]))
if(length(result[[i]]) > max_length) {
max_length = length(result[[i]])
}
}
max_length #5
## [1] 5
length(length_result) #13375
## [1] 13375
Check if matching was done correctly
summary(as.factor(length_result))
## 1 2 3 4 5
## 13012 255 77 29 2
index_first_result_that_matched_two = which(length_result == 2)[1] #194
index_first_result_that_matched_two
## [1] 194
index_lj_that_matched = ind_matched_date_tambon[index_first_result_that_matched_two] # 307221
index_lj_that_matched # preview
## [1] 307221
lj_r4_ani_filtered[index_lj_that_matched, c('X.U.FEFF.MS_Sub.District.Code', 'date_issue_date')]
## X.U.FEFF.MS_Sub.District.Code date_issue_date
## 307221 760408 2014-05-21
fmd_indices_that_matched = result[[index_lj_that_matched]] #38 50
dim(fmd_report)
## [1] 952 106
fmd_report[fmd_indices_that_matched, c("TambonCode", "date_report_date")]
## TambonCode date_report_date
## 38 760408 2014-06-12
## 50 760408 2014-06-20
Summary
# explore
lj_r4_ani_filtered['matched_date_tambon'] = 1:dim(lj_r4_ani_filtered)[1] %in% ind_matched_date_tambon
summary(lj_r4_ani_filtered['matched_date_tambon']) #FALSE:4595471 TRUE :13375
## matched_date_tambon
## Mode :logical
## FALSE:4595471
## TRUE :13375
lj_r4_ani_filtered[ind_matched_date_tambon[1],]
## cer_no cer_issuer_name
## 118536 45750214001770 นายศักดิ์ชาย บุณยราศรัย
## cer_issuer_org mover_name disease_free_region
## 118536 สำนักงานปศุสัตว์อำเภอสันทราย สุบัน ผาเทพ
## src_detail src_name src_addr_code src_moo src_soi src_road
## 118536 อื่นๆ ฟาร์ม ม.แม่โจ้ 63 4
## src_tambon_th src_tambon_eng src_amphur_th src_amphur_en
## 118536 หนองหาร Nonghan\t สันทราย San Sai\t
## src_province_th src_province_en dest_detail dest_name
## 118536 เชียงใหม่ Chiang Mai อื่นๆ ฟาร์ม วิลาศ
## dest_addr_code dest_moo dest_soi dest_road dest_tambon_th
## 118536 132/1 2 ทาปลาดุก
## dest_tambon_en dest_amphur_th dest_amphur_en dest_province_th
## 118536 ThaPladuk\t แม่ทา Mae Tha ลำพูน
## dest_province_en cer_purpose ani_type ani_src ani_code
## 118536 Lamphun ไปเลี้ยงขุน สัตว์เล็ก ในพื้นที่เดิม 201
## ani_name species_code species_name amount unit vehicle_no
## 118536 สุกร 05 ลูกสุกรพันธุ์ 120 ตัว 70-1100 ลพ
## issue_date expire_date is_market date_issue_date
## 118536 25-FEB-14 25-FEB-14 FALSE 2014-02-25
## X.U.FEFF.MS_Sub.District.Code MS_Sub.District.Name.EN
## 118536 510201 THA PLA DUK
## MS_Province.Name.TH MS_District.Name.TH MS_Sub.District.Name.TH
## 118536 ลำพูน แม่ทา ทาปลาดุก
## Ani_Farm.Province Ani_Farm.Amphur Ani_Farm.Tumbon
## 118536 ลำพูน แม่ทา ทาปลาดุก
## matched_date_tambon
## 118536 TRUE
# create summary table (2x2)
count_disease_table = table(lj_r4_ani_filtered$is_market, lj_r4_ani_filtered$matched_date_tambon)
colnames(count_disease_table) = c("FMD-", "FMD+")
rownames(count_disease_table) = c("src_isnt_market", "src_is_market")
count_disease_table
##
## FMD- FMD+
## src_isnt_market 4402574 12635
## src_is_market 192897 740
write.csv(lj_r4_ani_filtered, "lj_r4_ani_filtered.csv", row.names = FALSE)
Relative Risk (RR)
- use
epitab
R package to calculate the RR value
epitab(count_disease_table,method="riskratio")
## $tab
##
## FMD- p0 FMD+ p1 riskratio lower
## src_isnt_market 4402574 0.9971383 12635 0.002861699 1.000000 NA
## src_is_market 192897 0.9961784 740 0.003821584 1.335425 1.240184
##
## upper p.value
## src_isnt_market NA NA
## src_is_market 1.43798 0.0000000000001989906
##
## $measure
## [1] "wald"
##
## $conf.level
## [1] 0.95
##
## $pvalue
## [1] "fisher.exact"