引用地址:https://www.kaggle.com/selfishgene/psychology-of-a-professional-athlete
# plot the shot attempts as a function of time (from start of game) with several different binningsplt.rcParams['figure.figsize'] = (16, 16)plt.rcParams['font.size'] = 16binsSizes = [24,12,6]plt.figure();for k, binSizeInSeconds in enumerate(binsSizes): timeBins = np.arange(0,60*(4*12+3*5),binSizeInSeconds)+0.01 attemptsAsFunctionOfTime, b = np.histogram(data['secondsFromGameStart'], bins=timeBins) maxHeight = max(attemptsAsFunctionOfTime) + 30 barWidth = 0.999*(timeBins[1]-timeBins[0]) plt.subplot(len(binsSizes),1,k+1); plt.bar(timeBins[:-1],attemptsAsFunctionOfTime, align='edge', width=barWidth); plt.title(str(binSizeInSeconds) + ' second time bins') plt.vlines(x=[0,12*60,2*12*60,3*12*60,4*12*60,4*12*60+5*60,4*12*60+2*5*60,4*12*60+3*5*60], ymin=0,ymax=maxHeight, colors='r') plt.xlim((-20,3200)); plt.ylim((0,maxHeight)); plt.ylabel('attempts')plt.xlabel('time [seconds from start of game]')
2、特征值的转换
def FactorizeCategoricalVariable(inputDB, categoricalVarName): opponentCategories = inputDB[categoricalVarName].value_counts().index.tolist() outputDB = pd.DataFrame() for category in opponentCategories: featureName = categoricalVarName + ': ' + str(category) outputDB[featureName] = (inputDB[categoricalVarName] == category).astype(int) return outputDBfeaturesDB = pd.DataFrame()featuresDB['homeGame'] = data['matchup'].apply(lambda x: 1 if (x.find('@') < 0) else 0)featuresDB = pd.concat([featuresDB, FactorizeCategoricalVariable(data, 'opponent')], axis=1)