「2014 FIFA ワールドカップ Brasil」を散布図で分析

「Copa do Mundo de Futebol FIFA Brasil 2014」を散布図で分析してみた。

データは、ここから入手。
https://www.whoscored.com/Statistics
こんな風な tab 区切りファイルに保存して

Team	Rank	Rating	Shotsconceded 	Shots	Tackles	CaughtOffside	Blocks	Interception	Clearances	Save	Goals	Dribbles	PossessionLoss	AerialWon	AerialLost	Passes	KeyPpasses	Assists	Fouls	Fouled
Algeria	10	6.92	15.5	9	20.8	1.5	10.8	16.3	29.5	4.8	1.5	6.5	19.8	14.8	22	326	6.5	1.3	17.3	13.5
Argentina	4	7.12	11.1	15.4	19	1.4	12.5	14.1	26.7	2.4	0.9	12.3	24	10.4	9.7	461.3	10.4	0.3	11	16.6
Australia	30	6.39	11.3	9	12.3	1	12	15.3	21	2.7	1	10.3	24.4	12.7	10.7	401.7	7.3	0.7	16.7	13
...省略...
Switzerland	1	7.14	18.3	16.3	20	1.3	15.8	12	24.5	4.3	1.8	6.5	23.6	11	10.3	392	13	1.5	16.5	15.8
Uruguay	26	6.63	10.8	11.8	18.5	2.8	12.3	15.8	24.3	2.3	1	5.5	23.3	18.3	17.5	346	7.8	0.5	18.3	15.8
USA	11	6.91	23.5	11	20	1	17.1	13.5	38.5	5.8	1	9	17.5	15	16.3	385.3	5.8	0.8	12.3	14

R に読み込み

d <- read.table("WorldCup2014TeamStatistics.txt", header=T)

rownames(d) <- c("Algeria",
"Argentina",
"Australia",
"Belgium",
"Bosnia_and_Herzegovina",
"Brazil",
"Cameroon",
"Chile",
"Colombia",
"Costa_Rica",
"Croatia",
"Ecuador",
"England",
"France",
"Germany",
"Ghana",
"Greece",
"Honduras",
"Iran",
"Italy",
"Ivory_Coast",
"Japan",
"Mexico",
"Netherlands",
"Nigeria",
"Portugal",
"Russia",
"South_Korea",
"Spain",
"Switzerland",
"Uruguay",
"USA")

正規化

# 平均が0、分散が1となるよう正規化
d_scale <- scale(d[,4:21])

パス成功率とポゼッション

plot(d_scale[,18], d_scale[,14], t='n', main="パス成功率とポゼッション", xlab="パス成功率", ylab="ポゼッション")
text(d_scale[,18], d_scale[,14], labels=rownames(d_scale),col=(d_merge$cluster + 1))

f:id:fornext1119:20180703123443p:plain

パスとドリブル

plot(d_scale[,19], d_scale[,13], t='n', main="パスとドリブル", xlab="パス", ylab="ドリブル")
text(d_scale[,19], d_scale[,13], labels=rownames(d_scale),col=(d_merge$cluster + 1))

f:id:fornext1119:20180703123523p:plain

シュートと被シュート

plot(d_scale[,5], d_scale[,4], t='n', main="シュートと被シュート", xlab="シュート", ylab="被シュート")
text(d_scale[,5], d_scale[,4], labels=rownames(d_scale),col=(d_merge$cluster + 1))

f:id:fornext1119:20180703123653p:plain

ポゼッションと得点

plot(d_scale[,14], d_scale[,12], t='n', main="ポゼッションと得点", xlab="ポゼッション", ylab="得点")
text(d_scale[,14], d_scale[,12], labels=rownames(d_scale),col=(d_merge$cluster + 1))