ritish369 commited on
Commit
a5c63c2
·
1 Parent(s): eb0f487

Some more files

Browse files
Files changed (1) hide show
  1. helper.py +211 -0
helper.py ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+
4
+ def medal_tally(df):
5
+ medal_tally = df.drop_duplicates(
6
+ subset=["Team", "NOC", "Games", "Year", "City", "Sport", "Event", "Medal"]
7
+ )
8
+
9
+ medal_tally = (
10
+ medal_tally.groupby("region")
11
+ .sum()[["Gold", "Silver", "Bronze"]]
12
+ .sort_values("Gold", ascending=False)
13
+ .reset_index()
14
+ )
15
+
16
+ medal_tally["total"] = (
17
+ medal_tally["Gold"] + medal_tally["Silver"] + medal_tally["Bronze"]
18
+ )
19
+
20
+ medal_tally["Gold"].astype("int")
21
+ medal_tally["Silver"].astype("int")
22
+ medal_tally["Bronze"].astype("int")
23
+ medal_tally["total"].astype("int")
24
+
25
+ return medal_tally
26
+
27
+
28
+ def country_year_list(df):
29
+ years = df["Year"].unique().tolist()
30
+ years.sort()
31
+ years.insert(0, "Overall")
32
+
33
+ country = np.unique(df["region"].dropna().values).tolist()
34
+ country.sort()
35
+ country.insert(0, "Overall")
36
+
37
+ return years, country
38
+
39
+
40
+ # For plotting a line plot in the app
41
+
42
+
43
+ # def participating_nations_over_time(df):
44
+ def data_over_time(df, col):
45
+ nations_over_time = (
46
+ df.drop_duplicates(["Year", col])["Year"]
47
+ .value_counts()
48
+ .reset_index()
49
+ .sort_values("Year")
50
+ )
51
+ nations_over_time.rename(
52
+ columns={"count": col, "Year": "Year/Edition"}, inplace=True
53
+ )
54
+ return nations_over_time
55
+
56
+
57
+ # Table of the most decorated/successful athletes i.e., athletes with most medal wins
58
+ def most_successful(df, sport):
59
+ # Done since many values in Medal column are NaN values
60
+ temp_df = df.dropna(subset=["Medal"])
61
+ if sport != "Overall":
62
+ temp_df = temp_df[temp_df["Sport"] == sport]
63
+ # Becomes/converts to a dataframe when reset_index() is used.
64
+ x = (
65
+ temp_df["Name"]
66
+ .value_counts()
67
+ .reset_index()
68
+ .head(15)
69
+ .merge(df, left_on="Name", right_on="Name", how="left")[
70
+ ["Name", "count", "Sport", "region"]
71
+ ]
72
+ .drop_duplicates("Name")
73
+ )
74
+ x.rename(columns={"Name": "Name", "count": "Medals"}, inplace=True)
75
+ return x
76
+
77
+
78
+ # Creating a function having inputs year and country, and will show the output on the app after the
79
+ # selected inputs from dropdown box on the app.
80
+
81
+
82
+ def fetch_medal_tally(df, year, country):
83
+ medal_df = df.drop_duplicates(
84
+ subset=["Team", "NOC", "Games", "Year", "City", "Sport", "Event", "Medal"]
85
+ )
86
+ # Flag set for showing year-wise medals for a specific country
87
+ flag = 0
88
+ if year == "Overall" and country == "Overall":
89
+ temp_df = medal_df
90
+ if year == "Overall" and country != "Overall":
91
+ flag = 1
92
+ temp_df = medal_df[medal_df["region"] == country]
93
+ if year != "Overall" and country == "Overall":
94
+ temp_df = medal_df[medal_df["Year"] == int(year)]
95
+ if year != "Overall" and country != "Overall":
96
+ temp_df = medal_df[
97
+ (medal_df["Year"] == int(year)) & (medal_df["region"] == country)
98
+ ]
99
+
100
+ if flag == 1:
101
+ x = (
102
+ temp_df.groupby("Year")
103
+ .sum()[["Gold", "Silver", "Bronze"]]
104
+ .sort_values("Year", ascending=True)
105
+ .reset_index()
106
+ )
107
+ else:
108
+ x = (
109
+ temp_df.groupby("region")
110
+ .sum()[["Gold", "Silver", "Bronze"]]
111
+ .sort_values("Gold", ascending=False)
112
+ .reset_index()
113
+ )
114
+
115
+ x["total"] = x["Gold"] + x["Silver"] + x["Bronze"]
116
+
117
+ x["Gold"].astype("int")
118
+ x["Silver"].astype("int")
119
+ x["Bronze"].astype("int")
120
+ x["total"].astype("int")
121
+
122
+ return x
123
+
124
+
125
+ def yearwise_medal_tally(df, country):
126
+ temp_df = df.dropna(subset="Medal")
127
+ # Solving team sports problem
128
+ temp_df.drop_duplicates(
129
+ subset=["Team", "NOC", "Games", "Year", "City", "Sport", "Event", "Medal"],
130
+ inplace=True,
131
+ )
132
+
133
+ new_df = temp_df[temp_df["region"] == country]
134
+ final_df = new_df.groupby("Year").count()["Medal"].reset_index()
135
+
136
+ return final_df
137
+
138
+
139
+ def country_event_heatmap(df, country):
140
+ temp_df = df.dropna(subset="Medal")
141
+ # Solving team sports problem
142
+ temp_df.drop_duplicates(
143
+ subset=["Team", "NOC", "Games", "Year", "City", "Sport", "Event", "Medal"],
144
+ inplace=True,
145
+ )
146
+
147
+ new_df = temp_df[temp_df["region"] == country]
148
+
149
+ pt = new_df.pivot_table(
150
+ index="Sport", columns="Year", values="Medal", aggfunc="count"
151
+ ).fillna(0)
152
+
153
+ return pt
154
+
155
+
156
+ def most_successful_countrywise(df, country):
157
+ # Done since many values in Medal column are NaN values
158
+ temp_df = df.dropna(subset=["Medal"])
159
+
160
+ temp_df = temp_df[temp_df["region"] == country]
161
+ # Becomes/converts to a dataframe when reset_index() is used.
162
+ x = (
163
+ temp_df["Name"]
164
+ .value_counts()
165
+ .reset_index()
166
+ .head(10)
167
+ .merge(df, left_on="Name", right_on="Name", how="left")[
168
+ ["Name", "count", "Sport"]
169
+ ]
170
+ .drop_duplicates("Name")
171
+ )
172
+ x.rename(columns={"Name": "Name", "count": "Medals"}, inplace=True)
173
+ return x
174
+
175
+
176
+ def weight_v_height(df, sport):
177
+ # Creating athlete_df dataframe
178
+ athlete_df = df.drop_duplicates(subset=["Name", "region"])
179
+ # Cleaning sorta the dataframe
180
+ athlete_df.fillna({"Medal": "No Medal"}, inplace=True)
181
+
182
+ if sport != "Overall":
183
+ temp_df = athlete_df[athlete_df["Sport"] == sport]
184
+ return temp_df
185
+ else:
186
+ return athlete_df
187
+
188
+
189
+ def men_vs_women(df):
190
+ athlete_df = df.drop_duplicates(subset=["Name", "region"])
191
+
192
+ # Plot of men vs women participation over the years in the Olympics
193
+ men = (
194
+ athlete_df[athlete_df["Sex"] == "M"]
195
+ .groupby("Year")
196
+ .count()["Name"]
197
+ .reset_index()
198
+ )
199
+ women = (
200
+ athlete_df[athlete_df["Sex"] == "F"]
201
+ .groupby("Year")
202
+ .count()["Name"]
203
+ .reset_index()
204
+ )
205
+
206
+ final = men.merge(women, on="Year", how="left")
207
+ final.rename(columns={"Name_x": "Male", "Name_y": "Female"}, inplace=True)
208
+
209
+ final.fillna(0, inplace=True)
210
+
211
+ return final