Install packages

install.packages("tidyverse")
install.packages("dplyr")
install.packages("epitools") # for computing Relative Risk (RR)

Setup

library(tidyverse) # to use some data transformation functions outside of the built-in
## Warning: package 'tidyverse' was built under R version 3.5.2
## Warning: package 'dplyr' was built under R version 3.5.2
library(dplyr ) # to use some data transformation functions outside of the built-in
library(epitools) # for calculating relative risk
## Warning: package 'epitools' was built under R version 3.5.2
Sys.setlocale(locale = "Thai") # to use Thai language and local Thai time
options(scipen = 999) # to disable auto convert number to scientific notation

Read Data

r4_ani = read.csv("R4_ani.csv", encoding = "UTF-8", stringsAsFactors = FALSE)
fmd_report = read.csv("EXPORT_EXCEL_WITH_KKR3 (esmart).csv", encoding = "UTF-8", stringsAsFactors = FALSE)
super_master_loc =  read.csv("super_master_location.csv", encoding = "UTF-8", stringsAsFactors = FALSE)

View Meta Data

dim(r4_ani)
## [1] 4613114      43
colnames(r4_ani)
##  [1] "cer_no"              "cer_issuer_name"     "cer_issuer_org"     
##  [4] "mover_name"          "disease_free_region" "src_detail"         
##  [7] "src_name"            "src_addr_code"       "src_moo"            
## [10] "src_soi"             "src_road"            "src_tambon_th"      
## [13] "src_tambon_eng"      "src_amphur_th"       "src_amphur_en"      
## [16] "src_province_th"     "src_province_en"     "dest_detail"        
## [19] "dest_name"           "dest_addr_code"      "dest_moo"           
## [22] "dest_soi"            "dest_road"           "dest_tambon_th"     
## [25] "dest_tambon_en"      "dest_amphur_th"      "dest_amphur_en"     
## [28] "dest_province_th"    "dest_province_en"    "cer_purpose"        
## [31] "ani_type"            "ani_src"             "ani_code"           
## [34] "ani_name"            "species_code"        "species_name"       
## [37] "amount"              "unit"                "vehicle_no"         
## [40] "issue_date"          "expire_date"         "is_market"          
## [43] "date_issue_date"
dim(fmd_report)
## [1] 952 106
colnames(fmd_report)
##   [1] "X.U.FEFF.Amphurecode"     "Animals.Sources"         
##   [3] "Animal"                   "Amphure"                 
##   [5] "Between.Date"             "Between.Date.All"        
##   [7] "Create.Date"              "Create.Date.Th"          
##   [9] "Culture.Area"             "Culture.Patient"         
##  [11] "Date.Found"               "Date.Found.Th"           
##  [13] "Department.Name"          "Diagnosis"               
##  [15] "Diagnosis.Name.Start"     "Died.Kcollect.Num"       
##  [17] "Environment"              "First.Patient"           
##  [19] "First.Type.Patient"       "Kkr1.Code"               
##  [21] "Kkr1.Id"                  "Lab.Date"                
##  [23] "Lab.Date.Th"              "Lab.Diagnosis"           
##  [25] "Lab.Diagnosis.Name"       "Lab.Id"                  
##  [27] "Lab.Methods"              "Lab.Name"                
##  [29] "Lab.Results"              "Latitude"                
##  [31] "Longitude"                "Manage"                  
##  [33] "Month"                    "MOO"                     
##  [35] "Move.Date"                "Move.Date.Th"            
##  [37] "Move.Feed.Date"           "Move.Feed.Date.Th"       
##  [39] "Move.Other"               "Move.Other.Date"         
##  [41] "Move.Other.Date.Th"       "Move.People.Date"        
##  [43] "Move.People.Date.Th"      "Move.Product.Date"       
##  [45] "Move.Product.Date.Th"     "Move.Type1"              
##  [47] "Move.Type2"               "Move.Vehicles.Date"      
##  [49] "Move.Vehicles.Date.Th"    "Number.of.Records"       
##  [51] "Opt.Animal.Type"          "Opt.Area.Total"          
##  [53] "Opt.End.Date"             "Opt.End.Date.Th"         
##  [55] "Opt.Start.Date"           "Opt.Start.Date.Th"       
##  [57] "Opt.Vaccinate.Total"      "Owner"                   
##  [59] "Patient.Balance.Num"      "Patient.Be.Cured.Num"    
##  [61] "Patient.Collect.Num"      "Patient.First.Num"       
##  [63] "Patient.Week.Num"         "Pet.Trade"               
##  [65] "Provincecode"             "Publish.Area"            
##  [67] "Publish.Date"             "Publish.Date.Th"         
##  [69] "Publish.Diagnosis"        "Publish.Diagnosis.Name"  
##  [71] "Publish.Enddate"          "Publish.Enddate.Th"      
##  [73] "Publish.Type"             "Publish.Type.Name"       
##  [75] "Province"                 "Recent"                  
##  [77] "Recent.Area"              "Report"                  
##  [79] "Report.Date"              "Report.Date.Th"          
##  [81] "Risk.Num"                 "Slaughter.House"         
##  [83] "Ststus.Point"             "Symptom"                 
##  [85] "Tambolcode"               "TambonCode"              
##  [87] "Tambol"                   "Vaccination"             
##  [89] "Vaccination.Area"         "Vaccination.Area.Date"   
##  [91] "Vaccination.Area.Date.Th" "Vaccination.Date"        
##  [93] "Vaccination.Date.Th"      "Water"                   
##  [95] "Week"                     "W"                       
##  [97] "Year"                     "Zone.Code"               
##  [99] "Collect"                  "Died"                    
## [101] "Patient"                  "Risk"                    
## [103] "sDIED.NUM"                "sGROUP.NUM"              
## [105] "sPATIENT.NUM"             "date_report_date"

Create Column is_market

market_vec = lapply(r4_ani$src_name, function(x) {grepl("^ตลาด", x)})
market_vec = unlist(market_vec)
summary(market_vec)
##    Mode   FALSE    TRUE 
## logical 4419477  193637
r4_ani['is_market'] = market_vec

Format Data

Sys.setlocale(locale = "English") # date data was stored in English, so we have to temporary set the locale to English in order to convert type of the 'date'
## [1] "LC_COLLATE=English_United States.1252;LC_CTYPE=English_United States.1252;LC_MONETARY=English_United States.1252;LC_NUMERIC=C;LC_TIME=English_United States.1252"
fmd_report_date = as.Date(fmd_report$Report.Date, format="%m/%d/%Y")
r4_ani_issue_date = as.Date(r4_ani$issue_date, format="%d-%h-%y")
Sys.setlocale(locale = "Thai")
## [1] "LC_COLLATE=Thai_Thailand.874;LC_CTYPE=Thai_Thailand.874;LC_MONETARY=Thai_Thailand.874;LC_NUMERIC=C;LC_TIME=Thai_Thailand.874"
r4_ani['date_issue_date'] = r4_ani_issue_date
fmd_report['date_report_date'] = fmd_report_date

Check for duplicates and empties (and remove if have any)

  1. check master location’s tambon codes
anyDuplicated(super_master_loc$X.U.FEFF.MS_Sub.District.Code)
## [1] 0
  1. check r4 ani’s tambon (referred by tambon, amphur, province in thai)
# check duplicates
ind_dup_r4 = which(duplicated(r4_ani[,c('dest_tambon_th', "dest_amphur_th", "dest_province_th")]))
length(ind_dup_r4)
## [1] 4605924
  1. check r4 for the empty tambon
r4_ani[ind_dup_r4[1:3], c('dest_tambon_th', "dest_amphur_th", "dest_province_th")]
##    dest_tambon_th dest_amphur_th dest_province_th
## 3          แสนแสบ        มีนบุรี    กรุงเทพมหานคร
## 6       ท่าเยี่ยม         โชคชัย       นครราชสีมา
## 10      ท่าเยี่ยม         โชคชัย       นครราชสีมา
empty_tambon_r4 = which(r4_ani$dest_tambon_th == '')
length(empty_tambon_r4)
## [1] 4268
empty_amphur_r4 = which(r4_ani$dest_amphur_th == '')
length(empty_amphur_r4)
## [1] 1
r4_ani[empty_amphur_r4[1:3],]
##                 cer_no       cer_issuer_name
## 1274912 45840708000008 นายทนงศักดิ์  ขะพินิจ
## NA                  NA                  <NA>
## NA.1                NA                  <NA>
##                         cer_issuer_org         mover_name
## 1274912 สำนักงานปศุสัตว์อำเภอวานรนิวาส วิเชียร  ราชรินทร์
## NA                                <NA>               <NA>
## NA.1                              <NA>               <NA>
##         disease_free_region src_detail     src_name src_addr_code src_moo
## 1274912                          อื่นๆ ฟาร์มประยงค์           188       5
## NA                     <NA>       <NA>         <NA>          <NA>    <NA>
## NA.1                   <NA>       <NA>         <NA>          <NA>    <NA>
##         src_soi src_road src_tambon_th src_tambon_eng src_amphur_th
## 1274912                                                            
## NA         <NA>     <NA>          <NA>           <NA>          <NA>
## NA.1       <NA>     <NA>          <NA>           <NA>          <NA>
##         src_amphur_en src_province_th src_province_en dest_detail
## 1274912                                                     อื่นๆ
## NA               <NA>            <NA>            <NA>        <NA>
## NA.1             <NA>            <NA>            <NA>        <NA>
##             dest_name dest_addr_code dest_moo dest_soi dest_road
## 1274912 P0328028/2550                       3                   
## NA               <NA>           <NA>     <NA>     <NA>      <NA>
## NA.1             <NA>           <NA>     <NA>     <NA>      <NA>
##         dest_tambon_th dest_tambon_en dest_amphur_th dest_amphur_en
## 1274912                                                            
## NA                <NA>           <NA>           <NA>           <NA>
## NA.1              <NA>           <NA>           <NA>           <NA>
##         dest_province_th dest_province_en cer_purpose  ani_type ani_src
## 1274912                                    เข้าโรงฆ่า สัตว์เล็ก        
## NA                  <NA>             <NA>        <NA>      <NA>    <NA>
## NA.1                <NA>             <NA>        <NA>      <NA>    <NA>
##         ani_code ani_name species_code species_name amount unit
## 1274912      201     สุกร           02      สุกรขุน     20  ตัว
## NA            NA     <NA>         <NA>         <NA>     NA <NA>
## NA.1          NA     <NA>         <NA>         <NA>     NA <NA>
##               vehicle_no issue_date expire_date is_market date_issue_date
## 1274912 บจ 7031 มุกดาหาร  03-JUL-15   03-JUL-15     FALSE      2015-07-03
## NA                  <NA>       <NA>        <NA>        NA            <NA>
## NA.1                <NA>       <NA>        <NA>        NA            <NA>

Filter out r4 that has empty tambons

r4_ani_filtered = filter(r4_ani, dest_tambon_th != '' & dest_amphur_th != '' & dest_province_th != '' )
dim(r4_ani_filtered) #4608846      43
## [1] 4608846      43
dim(r4_ani)# 4613114      43
## [1] 4613114      43

Get the tambon code for each r4 row

lj_r4_ani_filtered =  left_join(r4_ani_filtered, super_master_loc, by = c("dest_tambon_th" = "EMV_Tumbon.Th_emove", "dest_amphur_th" = "EMV_Amphur.Th_emove", "dest_province_th" = "EMV_Province.Th_emove"))
dim(lj_r4_ani_filtered)#4608846      51
## [1] 4608846      51
dim(r4_ani_filtered)
## [1] 4608846      43

Find out if each move (r4) caused the FMD outbreak (within the 30 days of moving)

result = list()
tambon_codes = list()
for(i in 1:dim(lj_r4_ani_filtered)[1]){
  issue_date = lj_r4_ani_filtered[i, 'date_issue_date']
  tambon_dest =  lj_r4_ani_filtered[i, 'X.U.FEFF.MS_Sub.District.Code']
  if (!is.na(tambon_dest)) {
    matched = (fmd_report_date - issue_date <=  30 & fmd_report_date - issue_date >= 0) & (fmd_report$TambonCode == tambon_dest)
    w = which(matched)
    if (length(w) > 0) {
      result[[i]] = w
    }
  }
}
length(result)
## [1] 4608791
dim(lj_r4_ani_filtered)[1]
## [1] 4608846
# unlist result to see the number of fmd outbreaks that matched
unlisted_result = unlist(result)
length(unlisted_result)
## [1] 13879
# see the number of moves that matched
ind_matched_date_tambon = c()
for(i in 1:length(result)){
  if(!is.null(result[[i]])) {
    ind_matched_date_tambon = c(ind_matched_date_tambon, i)
  }
}
length(ind_matched_date_tambon) #13375
## [1] 13375

View how many outbreaks that one move matched

max_length = 0
length_result = c()
for(i in ind_matched_date_tambon) {
  length_result = c(length_result, length(result[[i]]))
  if(length(result[[i]]) > max_length) {
    max_length = length(result[[i]])
  }
}
max_length #5
## [1] 5
length(length_result) #13375
## [1] 13375

Check if matching was done correctly

summary(as.factor(length_result))
##     1     2     3     4     5 
## 13012   255    77    29     2
index_first_result_that_matched_two =  which(length_result == 2)[1] #194
index_first_result_that_matched_two
## [1] 194
index_lj_that_matched =  ind_matched_date_tambon[index_first_result_that_matched_two] # 307221
index_lj_that_matched # preview
## [1] 307221
lj_r4_ani_filtered[index_lj_that_matched, c('X.U.FEFF.MS_Sub.District.Code', 'date_issue_date')]
##        X.U.FEFF.MS_Sub.District.Code date_issue_date
## 307221                        760408      2014-05-21
fmd_indices_that_matched =  result[[index_lj_that_matched]] #38 50
dim(fmd_report)
## [1] 952 106
fmd_report[fmd_indices_that_matched, c("TambonCode", "date_report_date")]
##    TambonCode date_report_date
## 38     760408       2014-06-12
## 50     760408       2014-06-20

Summary

# explore
lj_r4_ani_filtered['matched_date_tambon'] = 1:dim(lj_r4_ani_filtered)[1] %in% ind_matched_date_tambon
summary(lj_r4_ani_filtered['matched_date_tambon']) #FALSE:4595471  TRUE :13375  
##  matched_date_tambon
##  Mode :logical      
##  FALSE:4595471      
##  TRUE :13375
lj_r4_ani_filtered[ind_matched_date_tambon[1],]
##                cer_no          cer_issuer_name
## 118536 45750214001770 นายศักดิ์ชาย  บุณยราศรัย
##                      cer_issuer_org   mover_name disease_free_region
## 118536 สำนักงานปศุสัตว์อำเภอสันทราย สุบัน  ผาเทพ                    
##        src_detail       src_name src_addr_code src_moo src_soi src_road
## 118536      อื่นๆ ฟาร์ม ม.แม่โจ้            63       4                 
##        src_tambon_th src_tambon_eng src_amphur_th src_amphur_en
## 118536       หนองหาร       Nonghan\t       สันทราย      San Sai\t
##        src_province_th src_province_en dest_detail   dest_name
## 118536       เชียงใหม่      Chiang Mai       อื่นๆ ฟาร์ม วิลาศ
##        dest_addr_code dest_moo dest_soi dest_road dest_tambon_th
## 118536          132/1        2                          ทาปลาดุก
##        dest_tambon_en dest_amphur_th dest_amphur_en dest_province_th
## 118536     ThaPladuk\t          แม่ทา        Mae Tha            ลำพูน
##        dest_province_en cer_purpose  ani_type       ani_src ani_code
## 118536          Lamphun ไปเลี้ยงขุน สัตว์เล็ก ในพื้นที่เดิม      201
##        ani_name species_code  species_name amount unit vehicle_no
## 118536     สุกร           05 ลูกสุกรพันธุ์    120  ตัว 70-1100 ลพ
##        issue_date expire_date is_market date_issue_date
## 118536  25-FEB-14   25-FEB-14     FALSE      2014-02-25
##        X.U.FEFF.MS_Sub.District.Code MS_Sub.District.Name.EN
## 118536                        510201             THA PLA DUK
##        MS_Province.Name.TH MS_District.Name.TH MS_Sub.District.Name.TH
## 118536               ลำพูน               แม่ทา                ทาปลาดุก
##        Ani_Farm.Province Ani_Farm.Amphur Ani_Farm.Tumbon
## 118536             ลำพูน           แม่ทา        ทาปลาดุก
##        matched_date_tambon
## 118536                TRUE
# create summary table (2x2)
count_disease_table = table(lj_r4_ani_filtered$is_market, lj_r4_ani_filtered$matched_date_tambon)
colnames(count_disease_table) = c("FMD-", "FMD+")
rownames(count_disease_table) = c("src_isnt_market", "src_is_market")
count_disease_table
##                  
##                      FMD-    FMD+
##   src_isnt_market 4402574   12635
##   src_is_market    192897     740
write.csv(lj_r4_ani_filtered, "lj_r4_ani_filtered.csv", row.names = FALSE)

Relative Risk (RR)

epitab(count_disease_table,method="riskratio")
## $tab
##                  
##                      FMD-        p0  FMD+          p1 riskratio    lower
##   src_isnt_market 4402574 0.9971383 12635 0.002861699  1.000000       NA
##   src_is_market    192897 0.9961784   740 0.003821584  1.335425 1.240184
##                  
##                     upper               p.value
##   src_isnt_market      NA                    NA
##   src_is_market   1.43798 0.0000000000001989906
## 
## $measure
## [1] "wald"
## 
## $conf.level
## [1] 0.95
## 
## $pvalue
## [1] "fisher.exact"