1 import os, re, urlparse
2 from handler import Handler
3 from objectfs import ObjectFs
8 self.entries = {'gg': GoogleRoot()}
10 return self.entries.keys()
11 def join(self, hostname):
12 if hostname in self.entries:
13 return self.entries[hostname]
14 if '.' not in hostname:
16 result = HtmlNode('http://%s/' % (hostname,))
17 self.entries[hostname] = result
24 def __init__(self, url):
30 g = os.popen("lynx -source %r" % (self.url,), 'r')
36 class HtmlNode(UrlNode):
37 r_links = re.compile(r'<a\s[^>]*href="([^"]+)"[^>]*>(.*?)</a>',
38 re.IGNORECASE | re.DOTALL)
39 r_images = re.compile(r'<img\s[^>]*src="([^"]+[.]jpg)"', re.IGNORECASE)
41 def format(self, text, index,
42 TRANSTBL = ''.join([(32<=c<127 and c!=ord('/'))
44 for c in range(256)])):
45 return text.translate(TRANSTBL)
52 name = self.format(name, len(seen))
53 if name == '' or name.startswith('.'):
59 name = '%s_%d' % (basename, i)
63 for link, text in self.r_links.findall(data):
64 url = urlparse.urljoin(self.url, link)
65 yield uniquename(text), HtmlNode(url)
67 for link in self.r_images.findall(data):
68 text = os.path.basename(link)
69 url = urlparse.urljoin(self.url, link)
70 yield uniquename(text), RawNode(url)
72 yield '.source', RawNode(self.url)
75 class RawNode(UrlNode):
88 def join(self, query):
89 return GoogleSearch(query)
91 class GoogleSearch(HtmlNode):
92 r_links = re.compile(r'<a\sclass=l\s[^>]*href="([^"]+)"[^>]*>(.*?)</a>',
93 re.IGNORECASE | re.DOTALL)
95 def __init__(self, query):
96 self.url = 'http://www.google.com/search?q=' + query
98 def format(self, text, index):
99 text = text.replace('<b>', '').replace('</b>', '')
100 text = HtmlNode.format(self, text, index)
101 return '%d. %s' % (index, text)
104 if __name__ == '__main__':
106 handler = Handler('/home/arigo/mnt', ObjectFs(root))
107 handler.loop_forever()