zfec/setuptools-0.6c15dev.egg/setuptools/package_index.py

   1 """PyPI and direct package downloading"""
   2 import sys, os.path, re, urlparse, urllib2, shutil, random, socket, cStringIO
   3 import httplib, urllib
   4 from pkg_resources import *
   5 from distutils import log
   6 from distutils.errors import DistutilsError
   7 try:
   8     from hashlib import md5
   9 except ImportError:
  10     from md5 import md5
  11 from fnmatch import translate
  12 EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.]+)$')
  13 HREF = re.compile("""href\\s*=\\s*['"]?([^'"> ]+)""", re.I)
  14 # this is here to fix emacs' cruddy broken syntax highlighting
  15 PYPI_MD5 = re.compile(
  16     '<a href="([^"#]+)">([^<]+)</a>\n\s+\\(<a (?:title="MD5 hash"\n\s+)'
  17     'href="[^?]+\?:action=show_md5&amp;digest=([0-9a-f]{32})">md5</a>\\)'
  18 )
  19 URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):',re.I).match
  20 EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()
  21
  22 __all__ = [
  23     'PackageIndex', 'distros_for_url', 'parse_bdist_wininst',
  24     'interpret_distro_name',
  25 ]
  26
  27 def parse_bdist_wininst(name):
  28     """Return (base,pyversion) or (None,None) for possible .exe name"""
  29
  30     lower = name.lower()
  31     base, py_ver = None, None
  32
  33     if lower.endswith('.exe'):
  34         if lower.endswith('.win32.exe'):
  35             base = name[:-10]
  36         elif lower.startswith('.win32-py',-16):
  37             py_ver = name[-7:-4]
  38             base = name[:-16]
  39
  40     return base,py_ver
  41
  42 def egg_info_for_url(url):
  43     scheme, server, path, parameters, query, fragment = urlparse.urlparse(url)
  44     base = urllib2.unquote(path.split('/')[-1])
  45     if server=='sourceforge.net' and base=='download':    # XXX Yuck
  46         base = urllib2.unquote(path.split('/')[-2])
  47     if '#' in base: base, fragment = base.split('#',1)
  48     return base,fragment
  49
  50 def distros_for_url(url, metadata=None):
  51     """Yield egg or source distribution objects that might be found at a URL"""
  52     base, fragment = egg_info_for_url(url)
  53     for dist in distros_for_location(url, base, metadata): yield dist
  54     if fragment:
  55         match = EGG_FRAGMENT.match(fragment)
  56         if match:
  57             for dist in interpret_distro_name(
  58                 url, match.group(1), metadata, precedence = CHECKOUT_DIST
  59             ):
  60                 yield dist
  61
  62 def distros_for_location(location, basename, metadata=None):
  63     """Yield egg or source distribution objects based on basename"""
  64     if basename.endswith('.egg.zip'):
  65         basename = basename[:-4]    # strip the .zip
  66     if basename.endswith('.egg') and '-' in basename:
  67         # only one, unambiguous interpretation
  68         return [Distribution.from_location(location, basename, metadata)]
  69     if basename.endswith('.exe'):
  70         win_base, py_ver = parse_bdist_wininst(basename)
  71         if win_base is not None:
  72             return interpret_distro_name(
  73                 location, win_base, metadata, py_ver, BINARY_DIST, "win32"
  74             )
  75     # Try source distro extensions (.zip, .tgz, etc.)
  76     #
  77     for ext in EXTENSIONS:
  78         if basename.endswith(ext):
  79             basename = basename[:-len(ext)]
  80             return interpret_distro_name(location, basename, metadata)
  81     return []  # no extension matched
  82
  83 def distros_for_filename(filename, metadata=None):
  84     """Yield possible egg or source distribution objects based on a filename"""
  85     return distros_for_location(
  86         normalize_path(filename), os.path.basename(filename), metadata
  87     )
  88
  89
  90 def interpret_distro_name(location, basename, metadata,
  91     py_version=None, precedence=SOURCE_DIST, platform=None
  92 ):
  93     """Generate alternative interpretations of a source distro name
  94
  95     Note: if `location` is a filesystem filename, you should call
  96     ``pkg_resources.normalize_path()`` on it before passing it to this
  97     routine!
  98     """
  99     # Generate alternative interpretations of a source distro name
 100     # Because some packages are ambiguous as to name/versions split
 101     # e.g. "adns-python-1.1.0", "egenix-mx-commercial", etc.
 102     # So, we generate each possible interepretation (e.g. "adns, python-1.1.0"
 103     # "adns-python, 1.1.0", and "adns-python-1.1.0, no version").  In practice,
 104     # the spurious interpretations should be ignored, because in the event
 105     # there's also an "adns" package, the spurious "python-1.1.0" version will
 106     # compare lower than any numeric version number, and is therefore unlikely
 107     # to match a request for it.  It's still a potential problem, though, and
 108     # in the long run PyPI and the distutils should go for "safe" names and
 109     # versions in distribution archive names (sdist and bdist).
 110
 111     parts = basename.split('-')
 112     if not py_version:
 113         for i,p in enumerate(parts[2:]):
 114             if len(p)==5 and p.startswith('py2.'):
 115                 return # It's a bdist_dumb, not an sdist -- bail out
 116
 117     for p in range(1,len(parts)+1):
 118         yield Distribution(
 119             location, metadata, '-'.join(parts[:p]), '-'.join(parts[p:]),
 120             py_version=py_version, precedence = precedence,
 121             platform = platform
 122         )
 123
 124 REL = re.compile("""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I)
 125 # this line is here to fix emacs' cruddy broken syntax highlighting
 126
 127 def find_external_links(url, page):
 128     """Find rel="homepage" and rel="download" links in `page`, yielding URLs"""
 129
 130     for match in REL.finditer(page):
 131         tag, rel = match.groups()
 132         rels = map(str.strip, rel.lower().split(','))
 133         if 'homepage' in rels or 'download' in rels:
 134             for match in HREF.finditer(tag):
 135                 yield urlparse.urljoin(url, htmldecode(match.group(1)))
 136
 137     for tag in ("<th>Home Page", "<th>Download URL"):
 138         pos = page.find(tag)
 139         if pos!=-1:
 140             match = HREF.search(page,pos)
 141             if match:
 142                 yield urlparse.urljoin(url, htmldecode(match.group(1)))
 143
 144 user_agent = "Python-urllib/%s setuptools/%s" % (
 145     urllib2.__version__, require('setuptools')[0].version
 146 )
 147
 148
 149 class PackageIndex(Environment):
 150     """A distribution index that scans web pages for download URLs"""
 151
 152     def __init__(self, index_url="http://pypi.python.org/simple", hosts=('*',),
 153         *args, **kw
 154     ):
 155         Environment.__init__(self,*args,**kw)
 156         self.index_url = index_url + "/"[:not index_url.endswith('/')]
 157         self.scanned_urls = {}
 158         self.fetched_urls = {}
 159         self.package_pages = {}
 160         self.allows = re.compile('|'.join(map(translate,hosts))).match
 161         self.to_scan = []
 162
 163
 164
 165     def process_url(self, url, retrieve=False):
 166         """Evaluate a URL as a possible download, and maybe retrieve it"""
 167         if url in self.scanned_urls and not retrieve:
 168             return
 169         self.scanned_urls[url] = True
 170         if not URL_SCHEME(url):
 171             self.process_filename(url)
 172             return
 173         else:
 174             dists = list(distros_for_url(url))
 175             if dists:
 176                 if not self.url_ok(url):
 177                     return
 178                 self.debug("Found link: %s", url)
 179
 180         if dists or not retrieve or url in self.fetched_urls:
 181             map(self.add, dists)
 182             return  # don't need the actual page
 183
 184         if not self.url_ok(url):
 185             self.fetched_urls[url] = True
 186             return
 187
 188         self.info("Reading %s", url)
 189         self.fetched_urls[url] = True   # prevent multiple fetch attempts
 190         f = self.open_url(url, "Download error: %s -- Some packages may not be found!")
 191         if f is None: return
 192         self.fetched_urls[f.url] = True
 193         if 'html' not in f.headers.get('content-type', '').lower():
 194             f.close()   # not html, we can't process it
 195             return
 196
 197         base = f.url     # handle redirects
 198         page = f.read()
 199         f.close()
 200         if url.startswith(self.index_url) and getattr(f,'code',None)!=404:
 201             page = self.process_index(url, page)
 202         for match in HREF.finditer(page):
 203             link = urlparse.urljoin(base, htmldecode(match.group(1)))
 204             self.process_url(link)
 205
 206     def process_filename(self, fn, nested=False):
 207         # process filenames or directories
 208         if not os.path.exists(fn):
 209             self.warn("Not found: %s", fn)
 210             return
 211
 212         if os.path.isdir(fn) and not nested:
 213             path = os.path.realpath(fn)
 214             for item in os.listdir(path):
 215                 self.process_filename(os.path.join(path,item), True)
 216
 217         dists = distros_for_filename(fn)
 218         if dists:
 219             self.debug("Found: %s", fn)
 220             map(self.add, dists)
 221
 222     def url_ok(self, url, fatal=False):
 223         s = URL_SCHEME(url)
 224         if (s and s.group(1).lower()=='file') or self.allows(urlparse.urlparse(url)[1]):
 225             return True
 226         msg = "\nLink to % s ***BLOCKED*** by --allow-hosts\n"
 227         if fatal:
 228             raise DistutilsError(msg % url)
 229         else:
 230             self.warn(msg, url)
 231
 232     def scan_egg_links(self, search_path):
 233         for item in search_path:
 234             if os.path.isdir(item):
 235                 for entry in os.listdir(item):
 236                     if entry.endswith('.egg-link'):
 237                         self.scan_egg_link(item, entry)
 238
 239     def scan_egg_link(self, path, entry):
 240         lines = filter(None, map(str.strip, file(os.path.join(path, entry))))
 241         if len(lines)==2:
 242             for dist in find_distributions(os.path.join(path, lines[0])):
 243                 dist.location = os.path.join(path, *lines)
 244                 dist.precedence = SOURCE_DIST
 245                 self.add(dist)
 246
 247     def process_index(self,url,page):
 248         """Process the contents of a PyPI page"""
 249         def scan(link):
 250             # Process a URL to see if it's for a package page
 251             if link.startswith(self.index_url):
 252                 parts = map(
 253                     urllib2.unquote, link[len(self.index_url):].split('/')
 254                 )
 255                 if len(parts)==2 and '#' not in parts[1]:
 256                     # it's a package page, sanitize and index it
 257                     pkg = safe_name(parts[0])
 258                     ver = safe_version(parts[1])
 259                     self.package_pages.setdefault(pkg.lower(),{})[link] = True
 260                     return to_filename(pkg), to_filename(ver)
 261             return None, None
 262
 263         # process an index page into the package-page index
 264         for match in HREF.finditer(page):
 265             scan( urlparse.urljoin(url, htmldecode(match.group(1))) )
 266
 267         pkg, ver = scan(url)   # ensure this page is in the page index
 268         if pkg:
 269             # process individual package page
 270             for new_url in find_external_links(url, page):
 271                 # Process the found URL
 272                 base, frag = egg_info_for_url(new_url)
 273                 if base.endswith('.py') and not frag:
 274                     if ver:
 275                         new_url+='#egg=%s-%s' % (pkg,ver)
 276                     else:
 277                         self.need_version_info(url)
 278                 self.scan_url(new_url)
 279
 280             return PYPI_MD5.sub(
 281                 lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1,3,2), page
 282             )
 283         else:
 284             return ""   # no sense double-scanning non-package pages
 285
 286
 287
 288     def need_version_info(self, url):
 289         self.scan_all(
 290             "Page at %s links to .py file(s) without version info; an index "
 291             "scan is required.", url
 292         )
 293
 294     def scan_all(self, msg=None, *args):
 295         if self.index_url not in self.fetched_urls:
 296             if msg: self.warn(msg,*args)
 297             self.info(
 298                 "Scanning index of all packages (this may take a while)"
 299             )
 300         self.scan_url(self.index_url)
 301
 302     def find_packages(self, requirement):
 303         self.scan_url(self.index_url + requirement.unsafe_name+'/')
 304
 305         if not self.package_pages.get(requirement.key):
 306             # Fall back to safe version of the name
 307             self.scan_url(self.index_url + requirement.project_name+'/')
 308
 309         if not self.package_pages.get(requirement.key):
 310             # We couldn't find the target package, so search the index page too
 311             self.not_found_in_index(requirement)
 312
 313         for url in list(self.package_pages.get(requirement.key,())):
 314             # scan each page that might be related to the desired package
 315             self.scan_url(url)
 316
 317     def obtain(self, requirement, installer=None):
 318         self.prescan(); self.find_packages(requirement)
 319         for dist in self[requirement.key]:
 320             if dist in requirement:
 321                 return dist
 322             self.debug("%s does not match %s", requirement, dist)
 323         return super(PackageIndex, self).obtain(requirement,installer)
 324
 325
 326
 327
 328
 329     def check_md5(self, cs, info, filename, tfp):
 330         if re.match('md5=[0-9a-f]{32}$', info):
 331             self.debug("Validating md5 checksum for %s", filename)
 332             if cs.hexdigest()!=info[4:]:
 333                 tfp.close()
 334                 os.unlink(filename)
 335                 raise DistutilsError(
 336                     "MD5 validation failed for "+os.path.basename(filename)+
 337                     "; possible download problem?"
 338                 )
 339
 340     def add_find_links(self, urls):
 341         """Add `urls` to the list that will be prescanned for searches"""
 342         for url in urls:
 343             if (
 344                 self.to_scan is None        # if we have already "gone online"
 345                 or not URL_SCHEME(url)      # or it's a local file/directory
 346                 or url.startswith('file:')
 347                 or list(distros_for_url(url))   # or a direct package link
 348             ):
 349                 # then go ahead and process it now
 350                 self.scan_url(url)
 351             else:
 352                 # otherwise, defer retrieval till later
 353                 self.to_scan.append(url)
 354
 355     def prescan(self):
 356         """Scan urls scheduled for prescanning (e.g. --find-links)"""
 357         if self.to_scan:
 358             map(self.scan_url, self.to_scan)
 359         self.to_scan = None     # from now on, go ahead and process immediately
 360
 361     def not_found_in_index(self, requirement):
 362         if self[requirement.key]:   # we've seen at least one distro
 363             meth, msg = self.info, "Couldn't retrieve index page for %r"
 364         else:   # no distros seen for this name, might be misspelled
 365             meth, msg = (self.warn,
 366                 "Couldn't find index page for %r (maybe misspelled?)")
 367         meth(msg, requirement.unsafe_name)
 368         self.scan_all()
 369
 370     def download(self, spec, tmpdir):
 371         """Locate and/or download `spec` to `tmpdir`, returning a local path
 372
 373         `spec` may be a ``Requirement`` object, or a string containing a URL,
 374         an existing local filename, or a project/version requirement spec
 375         (i.e. the string form of a ``Requirement`` object).  If it is the URL
 376         of a .py file with an unambiguous ``#egg=name-version`` tag (i.e., one
 377         that escapes ``-`` as ``_`` throughout), a trivial ``setup.py`` is
 378         automatically created alongside the downloaded file.
 379
 380         If `spec` is a ``Requirement`` object or a string containing a
 381         project/version requirement spec, this method returns the location of
 382         a matching distribution (possibly after downloading it to `tmpdir`).
 383         If `spec` is a locally existing file or directory name, it is simply
 384         returned unchanged.  If `spec` is a URL, it is downloaded to a subpath
 385         of `tmpdir`, and the local filename is returned.  Various errors may be
 386         raised if a problem occurs during downloading.
 387         """
 388         if not isinstance(spec,Requirement):
 389             scheme = URL_SCHEME(spec)
 390             if scheme:
 391                 # It's a url, download it to tmpdir
 392                 found = self._download_url(scheme.group(1), spec, tmpdir)
 393                 base, fragment = egg_info_for_url(spec)
 394                 if base.endswith('.py'):
 395                     found = self.gen_setup(found,fragment,tmpdir)
 396                 return found
 397             elif os.path.exists(spec):
 398                 # Existing file or directory, just return it
 399                 return spec
 400             else:
 401                 try:
 402                     spec = Requirement.parse(spec)
 403                 except ValueError:
 404                     raise DistutilsError(
 405                         "Not a URL, existing file, or requirement spec: %r" %
 406                         (spec,)
 407                     )
 408         return getattr(self.fetch_distribution(spec, tmpdir),'location',None)
 409
 410
 411     def fetch_distribution(self,
 412         requirement, tmpdir, force_scan=False, source=False, develop_ok=False,
 413         local_index=None,
 414     ):
 415         """Obtain a distribution suitable for fulfilling `requirement`
 416
 417         `requirement` must be a ``pkg_resources.Requirement`` instance.
 418         If necessary, or if the `force_scan` flag is set, the requirement is
 419         searched for in the (online) package index as well as the locally
 420         installed packages.  If a distribution matching `requirement` is found,
 421         the returned distribution's ``location`` is the value you would have
 422         gotten from calling the ``download()`` method with the matching
 423         distribution's URL or filename.  If no matching distribution is found,
 424         ``None`` is returned.
 425
 426         If the `source` flag is set, only source distributions and source
 427         checkout links will be considered.  Unless the `develop_ok` flag is
 428         set, development and system eggs (i.e., those using the ``.egg-info``
 429         format) will be ignored.
 430         """
 431         # process a Requirement
 432         self.info("Searching for %s", requirement)
 433         skipped = {}
 434         dist = None
 435
 436         def find(env, req):
 437             # Find a matching distribution; may be called more than once
 438
 439             for dist in env[req.key]:
 440
 441                 if dist.precedence==DEVELOP_DIST and not develop_ok:
 442                     if dist not in skipped:
 443                         self.warn("Skipping development or system egg: %s",dist)
 444                         skipped[dist] = 1
 445                     continue
 446
 447                 if dist in req and (dist.precedence<=SOURCE_DIST or not source):
 448                     return dist
 449
 450
 451
 452         if force_scan:
 453             self.prescan()
 454             self.find_packages(requirement)
 455             dist = find(self, requirement)
 456
 457         if local_index is not None:
 458             dist = dist or find(local_index, requirement)
 459
 460         if dist is None and self.to_scan is not None:
 461             self.prescan()
 462             dist = find(self, requirement)
 463
 464         if dist is None and not force_scan:
 465             self.find_packages(requirement)
 466             dist = find(self, requirement)
 467
 468         if dist is None:
 469             self.warn(
 470                 "No local packages or download links found for %s%s",
 471                 (source and "a source distribution of " or ""),
 472                 requirement,
 473             )
 474         else:
 475             self.info("Best match: %s", dist)
 476             return dist.clone(location=self.download(dist.location, tmpdir))
 477
 478
 479     def fetch(self, requirement, tmpdir, force_scan=False, source=False):
 480         """Obtain a file suitable for fulfilling `requirement`
 481
 482         DEPRECATED; use the ``fetch_distribution()`` method now instead.  For
 483         backward compatibility, this routine is identical but returns the
 484         ``location`` of the downloaded distribution instead of a distribution
 485         object.
 486         """
 487         dist = self.fetch_distribution(requirement,tmpdir,force_scan,source)
 488         if dist is not None:
 489             return dist.location
 490         return None
 491
 492
 493     def gen_setup(self, filename, fragment, tmpdir):
 494         match = EGG_FRAGMENT.match(fragment)
 495         dists = match and [d for d in
 496             interpret_distro_name(filename, match.group(1), None) if d.version
 497         ] or []
 498
 499         if len(dists)==1:   # unambiguous ``#egg`` fragment
 500             basename = os.path.basename(filename)
 501
 502             # Make sure the file has been downloaded to the temp dir.
 503             if os.path.dirname(filename) != tmpdir:
 504                 dst = os.path.join(tmpdir, basename)
 505                 from setuptools.command.easy_install import samefile
 506                 if not samefile(filename, dst):
 507                     shutil.copy2(filename, dst)
 508                     filename=dst
 509
 510             file = open(os.path.join(tmpdir, 'setup.py'), 'w')
 511             file.write(
 512                 "from setuptools import setup\n"
 513                 "setup(name=%r, version=%r, py_modules=[%r])\n"
 514                 % (
 515                     dists[0].project_name, dists[0].version,
 516                     os.path.splitext(basename)[0]
 517                 )
 518             )
 519             file.close()
 520             return filename
 521
 522         elif match:
 523             raise DistutilsError(
 524                 "Can't unambiguously interpret project/version identifier %r; "
 525                 "any dashes in the name or version should be escaped using "
 526                 "underscores. %r" % (fragment,dists)
 527             )
 528         else:
 529             raise DistutilsError(
 530                 "Can't process plain .py files without an '#egg=name-version'"
 531                 " suffix to enable automatic setup script generation."
 532             )
 533
 534     dl_blocksize = 8192
 535     def _download_to(self, url, filename):
 536         self.info("Downloading %s", url)
 537         # Download the file
 538         fp, tfp, info = None, None, None
 539         try:
 540             if '#' in url:
 541                 url, info = url.split('#', 1)
 542             fp = self.open_url(url)
 543             if isinstance(fp, urllib2.HTTPError):
 544                 raise DistutilsError(
 545                     "Can't download %s: %s %s" % (url, fp.code,fp.msg)
 546                 )
 547             cs = md5()
 548             headers = fp.info()
 549             blocknum = 0
 550             bs = self.dl_blocksize
 551             size = -1
 552             if "content-length" in headers:
 553                 size = int(headers["Content-Length"])
 554                 self.reporthook(url, filename, blocknum, bs, size)
 555             tfp = open(filename,'wb')
 556             while True:
 557                 block = fp.read(bs)
 558                 if block:
 559                     cs.update(block)
 560                     tfp.write(block)
 561                     blocknum += 1
 562                     self.reporthook(url, filename, blocknum, bs, size)
 563                 else:
 564                     break
 565             if info: self.check_md5(cs, info, filename, tfp)
 566             return headers
 567         finally:
 568             if fp: fp.close()
 569             if tfp: tfp.close()
 570
 571     def reporthook(self, url, filename, blocknum, blksize, size):
 572         pass    # no-op
 573
 574
 575     def open_url(self, url, warning=None):
 576         if url.startswith('file:'): return local_open(url)
 577         try:
 578             return open_with_auth(url)
 579         except urllib2.HTTPError, v:
 580             return v
 581         except urllib2.URLError, v:
 582             reason = v.reason
 583         except httplib.HTTPException, v:
 584             reason = "%s: %s" % (v.__doc__ or v.__class__.__name__, v)
 585         if warning:
 586             self.warn(warning, reason)
 587         else:
 588             raise DistutilsError("Download error for %s: %s" % (url, reason))
 589
 590     def _download_url(self, scheme, url, tmpdir):
 591         # Determine download filename
 592         #
 593         name, fragment = egg_info_for_url(url)
 594         if name:
 595             while '..' in name:
 596                 name = name.replace('..','.').replace('\\','_')
 597         else:
 598             name = "__downloaded__"    # default if URL has no path contents
 599
 600         if name.endswith('.egg.zip'):
 601             name = name[:-4]    # strip the extra .zip before download
 602
 603         filename = os.path.join(tmpdir,name)
 604
 605         # Download the file
 606         #
 607         if scheme=='svn' or scheme.startswith('svn+'):
 608             return self._download_svn(url, filename)
 609         elif scheme=='file':
 610             return urllib2.url2pathname(urlparse.urlparse(url)[2])
 611         else:
 612             self.url_ok(url, True)   # raises error if not allowed
 613             return self._attempt_download(url, filename)
 614
 615
 616     def scan_url(self, url):
 617         self.process_url(url, True)
 618
 619
 620     def _attempt_download(self, url, filename):
 621         headers = self._download_to(url, filename)
 622         if 'html' in headers.get('content-type','').lower():
 623             return self._download_html(url, headers, filename)
 624         else:
 625             return filename
 626
 627     def _download_html(self, url, headers, filename):
 628         file = open(filename)
 629         for line in file:
 630             if line.strip():
 631                 # Check for a subversion index page
 632                 if re.search(r'<title>([^- ]+ - )?Revision \d+:', line):
 633                     # it's a subversion index page:
 634                     file.close()
 635                     os.unlink(filename)
 636                     return self._download_svn(url, filename)
 637                 break   # not an index page
 638         file.close()
 639         os.unlink(filename)
 640         raise DistutilsError("Unexpected HTML page found at "+url)
 641
 642     def _download_svn(self, url, filename):
 643         url = url.split('#',1)[0]   # remove any fragment for svn's sake
 644         self.info("Doing subversion checkout from %s to %s", url, filename)
 645         os.system("svn checkout -q %s %s" % (url, filename))
 646         return filename
 647
 648     def debug(self, msg, *args):
 649         log.debug(msg, *args)
 650
 651     def info(self, msg, *args):
 652         log.info(msg, *args)
 653
 654     def warn(self, msg, *args):
 655         log.warn(msg, *args)
 656
 657 # This pattern matches a character entity reference (a decimal numeric
 658 # references, a hexadecimal numeric reference, or a named reference).
 659 entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
 660
 661 def uchr(c):
 662     if not isinstance(c, int):
 663         return c
 664     if c>255: return unichr(c)
 665     return chr(c)
 666
 667 def decode_entity(match):
 668     what = match.group(1)
 669     if what.startswith('#x'):
 670         what = int(what[2:], 16)
 671     elif what.startswith('#'):
 672         what = int(what[1:])
 673     else:
 674         from htmlentitydefs import name2codepoint
 675         what = name2codepoint.get(what, match.group(0))
 676     return uchr(what)
 677
 678 def htmldecode(text):
 679     """Decode HTML entities in the given text."""
 680     return entity_sub(decode_entity, text)
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698 def open_with_auth(url):
 699     """Open a urllib2 request, handling HTTP authentication"""
 700
 701     scheme, netloc, path, params, query, frag = urlparse.urlparse(url)
 702
 703     if scheme in ('http', 'https'):
 704         auth, host = urllib.splituser(netloc)
 705     else:
 706         auth = None
 707
 708     if auth:
 709         auth = "Basic " + urllib2.unquote(auth).encode('base64').strip()
 710         new_url = urlparse.urlunparse((scheme,host,path,params,query,frag))
 711         request = urllib2.Request(new_url)
 712         request.add_header("Authorization", auth)
 713     else:
 714         request = urllib2.Request(url)
 715
 716     request.add_header('User-Agent', user_agent)
 717     fp = urllib2.urlopen(request)
 718
 719     if auth:
 720         # Put authentication info back into request URL if same host,
 721         # so that links found on the page will work
 722         s2, h2, path2, param2, query2, frag2 = urlparse.urlparse(fp.url)
 723         if s2==scheme and h2==host:
 724             fp.url = urlparse.urlunparse((s2,netloc,path2,param2,query2,frag2))
 725
 726     return fp
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739 def fix_sf_url(url):
 740     return url      # backward compatibility
 741
 742 def local_open(url):
 743     """Read a local path, with special support for directories"""
 744     scheme, server, path, param, query, frag = urlparse.urlparse(url)
 745     filename = urllib2.url2pathname(path)
 746     if os.path.isfile(filename):
 747         return urllib2.urlopen(url)
 748     elif path.endswith('/') and os.path.isdir(filename):
 749         files = []
 750         for f in os.listdir(filename):
 751             if f=='index.html':
 752                 body = open(os.path.join(filename,f),'rb').read()
 753                 break
 754             elif os.path.isdir(os.path.join(filename,f)):
 755                 f+='/'
 756             files.append("<a href=%r>%s</a>" % (f,f))
 757         else:
 758             body = ("<html><head><title>%s</title>" % url) + \
 759                 "</head><body>%s</body></html>" % '\n'.join(files)
 760         status, message = 200, "OK"
 761     else:
 762         status, message, body = 404, "Path not found", "Not found"
 763
 764     return urllib2.HTTPError(url, status, message,
 765             {'content-type':'text/html'}, cStringIO.StringIO(body))
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779 # this line is a kludge to keep the trailing blank lines for pje's editor