from bs4 import BeautifulSoup
import urllib2
import socket

try:
        import urlparse
except ImportError:
        from urllib import parse as urlparse # python 3 @UnusedImport

directory = '/Users/cuthbert/Desktop/PowerRankingsMLB/'
urlBase = 'http://espn.go.com/mlb/powerrankings/_/year/'

def getPowerRankingsForYear(year):
    urlBaseYear = urlBase + str(year) + '/week/'
    minWeek = 1
    if year == 2005:
        minWeek = 16
    maxWeek = 26
    if year == 2014:
        maxWeek = 10
    for wk in range(minWeek, maxWeek + 1):
        urlBaseWeek = urlBaseYear + str(wk)
        outFP = directory + str(year) + '-' + str(wk) + '.html'
        html = getHTMLfromURL(urlBaseWeek)
        with open(outFP, 'w') as f:
            f.write(html)
        print urlBaseWeek
        #print outFP

def getHTMLfromURL(url, referer = None):
        opener = urllib2.build_opener()
        if referer is None:
                opener.addheaders = [('User-agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/536.30.1 (KHTML, like Gecko) Version/6.0.5 Safari/536.30.1')]
        else:
                opener.addheaders = [
                        ('Referer', referer),
                        ('User-agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/536.30.1 (KHTML, like Gecko) Version/6.0.5 Safari/536.30.1')
                        ]

        try:
                fh = opener.open(url, None, 4) # 4 seconds is enough... files should be pretty small
                allHTML = fh.read()
                fh.close()
                return allHTML
        except urllib2.URLError as unused_e:
                return None
        except socket.timeout as unused_e:
                return None

def getHTMLfromFile(fp):
    with open(fp) as f:
        x = f.read()
    return x

def getRanks(html):
    soup = BeautifulSoup(html)
    allTD = soup.find_all('td')
    allRanks = []
    for td in allTD:
        try:
            if 'pr-rank' in td['class']:
                allRanks.append(td)
            
        except KeyError:
            pass
        
    allTuples = []
    for rankTd in allRanks:
        rank = int(rankTd.contents[0])
        team = rankTd.next_sibling.contents[1].contents[0].contents[0]
        record = rankTd.next_sibling.contents[1].contents[2].contents[0]
        wins, losses = record.split('-')
        wins = int(wins)
        losses = int(losses)
        thisTuple = (rank, team, wins, losses)
        allTuples.append(thisTuple)
    return allTuples

def calculateOneWeekBias(rankList):
    bias = {}
    for teamTuple in rankList:
        teamBias = 0
        rank = teamTuple[0]
        name = teamTuple[1]
        winLoss = teamTuple[2] - teamTuple[3]
        for ttOthers in rankList:
            if teamTuple == ttOthers:
                continue # not actually necessary since bias will be zero...
            orank = ttOthers[0]
            owinLoss = ttOthers[2] - ttOthers[3]
            thisBias = 0
            ## remember a "higher ranking team" has a lower "rank" score!
            if winLoss > owinLoss and rank > orank:
                thisBias = orank - rank
            elif winLoss < owinLoss and rank < orank:
                thisBias = orank - rank
            #print teamTuple, winLoss, ttOthers, owinLoss, ": ", thisBias, rank, orank
            teamBias += thisBias
        #print name, teamBias
        bias[name] = teamBias
    return bias

def runOneWeek(year, week):
    fp = directory + str(year) + "-" + str(week) + '.html'
    html = getHTMLfromFile(fp)
    ranks = getRanks(html)
    bias = calculateOneWeekBias(ranks)
    return bias

def runOneYear(year):
    minWeek = 1
    if year == 2005:
        minWeek = 16
    maxWeek = 26
    if year == 2014:
        maxWeek = 10
    bias = {}
    for wk in range(minWeek, maxWeek + 1):
        weekBias = runOneWeek(year, wk)
        for x in weekBias:
            if x not in bias:
                bias[x] = weekBias[x]
            else:
                bias[x] += weekBias[x]
    return bias

def runFullDataset():
    minYear = 2005
    maxYear = 2014
    bias = {}
    for year in range(minYear, maxYear + 1):
        yearBias = runOneYear(year)
        for x in yearBias:
            if x not in bias:
                bias[x] = yearBias[x]
            else:
                bias[x] += yearBias[x]
    for x in sorted(list(bias.keys())):
        print x, bias[x]
    return bias


def dlRecordsAll():
    for year in range(2005, 2015):
        dlRecordForYear(year)

def dlRecordForYear(year):
    outFP = directory + 'record-' + str(year) + '.html'    
    urlBase = 'http://espn.go.com/mlb/standings/_/year/' + str(year)
    #print outFP
    print urlBase
    html = getHTMLfromURL(urlBase)
    with open(outFP, 'w') as f:
        f.write(html)    

def extractRecordsForYear(year):
    fp = directory + 'record-' + str(year) + '.html'
    html = getHTMLfromFile(fp)
    soup = BeautifulSoup(html)
    allTR = soup.find_all('tr')
    allStats = []
    for tr in allTR:
        try:
            if 'oddrow' in tr['class'] or 'evenrow' in tr['class']:
                allStats.append(tr)
            
        except KeyError:
            pass
    
    returnStats = []
    for statline in allStats:
        try:
            name = statline.contents[0].find_all('a')[0].contents[0]
        except (IndexError, AttributeError) as unused: # Miami -- name change
            try:
                name = statline.contents[0].contents[0]
                name = name.split('-')[-1] # in case Miami clinches HA!
            except:
                continue
        wins = statline.contents[1].contents[0]
        losses = statline.contents[2].contents[0]
        recordTuple =  (str(name), int(wins), int(losses))
        #print recordTuple
        returnStats.append(recordTuple)
    return returnStats

def extractAllRecords():
    allStats = {}
    for year in range(2005, 2015):
        yearStats = extractRecordsForYear(year)
        for teamResult in yearStats:
            if teamResult[0] not in allStats:
                allStats[teamResult[0]] = {'wins': 0, 'losses': 0}
            allStats[teamResult[0]]['wins'] += teamResult[1]
            allStats[teamResult[0]]['losses'] += teamResult[2]
    return allStats

if __name__ == '__main__':
#     allStats = extractAllRecords()
#     for team in sorted(list(allStats.keys())):
#         allWins = allStats[team]['wins']
#         allLosses = allStats[team]['losses']
#         allResults = (team, allWins, allLosses, allWins - allLosses, allWins + allLosses)
#         print allResults
    #print extractRecordsForYear(2006)
    #dlRecordsAll()
    runFullDataset()
    #for i in range(2006, 2015):
    #    getPowerRankingsForYear(i)
    #html = getHTMLfromFile('/Users/cuthbert/Desktop/testPR.html')
    #rl = getRanks(html)
    #print calculateOneWeekBias(rl)