This is officially what happens when a geek gets bored. A friend of mine was over tonight, and he showed me the 'say' utility on Mac OSX. Well, you can imagine where this went. Say no more:
Now just sit back and have a good chuckle.
#/usr/bin/env python
from htmlentitydefs import name2codepoint as n2cp
import re
import urllib2
import os
import time
import string
class HTMLDecode(object):
'''http://snippets.dzone.com/posts/show/4569
There is absolutely no reason for this to be in a class. I just put it in one.
'''
__slots__ = ['substitute_entity','dcdhtmlent']
def substitute_entity(self, match):
ent = match.group(3)
if match.group(1) == "#":
if match.group(2) == '':
return unichr(int(ent))
elif match.group(2) == 'x':
return unichr(int('0x'+ent, 16))
else:
cp = n2cp.get(ent)
if cp:
return unichr(cp)
else:
return match.group()
def dcdhtmlent(self, string):
entity_re = re.compile(r'&(#?)(x?)(\w+);')
return entity_re.subn(self.substitute_entity, string)[0]
class SpeakFarkHeadlines(object):
'''This is what happens when you are bored and/or easily amused.'''
__slots__ = ['headlines','speak']
def __init__(self):
self.headlines = []
dcdr = HTMLDecode()
response = urllib2.urlopen('http://www.fark.com')
headline_re = re.compile(r'<span\sclass="headline">(.*?)</span>')
while True:
try:
search = re.search(headline_re, dcdr.dcdhtmlent(response.next().encode('UTF-8')))
if search:
self.headlines.append(search.group(1))
except StopIteration:
break
def speak(self):
for entry in self.headlines:
print entry # Below is nasty hack for shell string scanning and quotes
os.popen('say \'%s\'' % string.replace(entry, '\'', ''))
time.sleep(5)
app = SpeakFarkHeadlines()
app.speak()
Now just sit back and have a good chuckle.