#!/usr/bin/env python
"""
This script uploads files specified in its command line to the GAVO
data center.

This script expects the "credentials" (i.e., the station id and the access key)
in a file "stationinfo" in the directory/ies of the input file(s).  You can
upload files from various subdirectories, the stationinfo will be read
anew for each file.

The stationinfo file has the format
<station id><any whitespace><access key>


There are two modes of calling this script:

Either give the files you want to upload on the command line (on Unix,
you can use wildcards), like this:

python uploadLM.py /path/to/data/*.txt

or have the script upload all files looking like data files (i.e.,
extensions txt, csv, or gz) from a directory:

python uploadLM.py -d /path/to/data.

In the second case, the script will not upload files containing the current
(UTC) date's YYYYMMDD -- this lets you apply it to "live" acquisition
directories when you name your files appropriately.  In that second
case, the script will move all uploaded files to a subdirectory uploaded
of /path/to/data.

The script will usually log to stderr.  To log to files, use the -l option 
to pass the name of a directory to stuff the logs into.

"""

import os

# The root URL of the uploader
rootURL = os.environ.get("GAVO_UPLOADBASE",
  "http://dc.zah.uni-heidelberg.de/lightmeter/q/upload/custom")

PROXY = None
# If you are forced to use a proxy to access the web, enter it here,
# like this:
#PROXY = "dwdmx5.dwd.de", 80

from cStringIO import StringIO
from email.Message import Message
from email.MIMEMultipart import MIMEMultipart
import datetime
import glob
import gzip
import httplib
import optparse
import os
import re
import sys
import urlparse


class UploadError(Exception):
  pass


def compress(data, fName):
  """returns the content of fName as a gzip-readable string.
  """
  compr = StringIO()
  g = gzip.GzipFile(fName, "w", 5, compr)
  g.write(data)
  g.close()
  return compr.getvalue()


def preprocessFile(fileName):
  """normalizes the content of fileName and returns it and a possibly 
  mogrified name.

  Basically, we remove all CRs and compress the result to something gzip can
  handle if it's not compressed yet.
  """
  f = open(fileName)
  content = f.read()
  f.close()
  if not fileName.endswith(".gz"):
    content = compress(content.replace("\r", ""), fileName)
    fileName = fileName+".gz"
  return content, fileName


class FormData(MIMEMultipart):
  """is a container for multipart/form-data encoded messages.

  This is usually used for file uploads.
  """
  def __init__(self):
    MIMEMultipart.__init__(self)
    self.epilogue = ""
  
  def addFile(self, paramName, fileName):
    """attaches the contents of fileName under the http parameter name
    paramName.
    """
    content, fileName = preprocessFile(fileName)
    msg = Message()
    msg.set_type("application/octet-stream")
    msg["Content-Disposition"] = "form-data"
    msg.set_param("name", paramName, "Content-Disposition")
    msg.set_param("filename", fileName, "Content-Disposition")
    self.uploadName = fileName
    self.uploadSize = len(content)
    msg.set_payload(content)
    self.attach(msg)

  def addEmptyFile(self, paramName, fileName):
    msg = Message()
    msg.set_type("application/octet-stream")
    msg["Content-Disposition"] = "form-data"
    msg.set_param("name", paramName, "Content-Disposition")
    msg.set_param("filename", fileName, "Content-Disposition")
    self.uploadName = fileName
    self.uploadSize = 0
    msg.set_payload("")
    self.attach(msg)

  def addParam(self, paramName, paramVal):
    """adds a form parameter paramName with the (string) value paramVal
    """
    msg = Message()
    msg["Content-Disposition"] = "form-data"
    msg.set_param("name", paramName, "Content-Disposition")
    msg.set_payload(paramVal)
    self.attach(msg)


def _genForm(fName):
  """returns a FormData instance for uploading fName to the lightmeter server.
  """
  form = FormData()
  form.addFile("inFile", fName)
  form.addParam("_charset_", "UTF-8")
  form.addParam("__nevow_form__", "upload")
  return form


def _genRemovalForm(fName):
  """returns a FormData instance for removing fName from the lightmeter server.
  """
  form = FormData()
  form.addEmptyFile("inFile", os.path.basename(fName))
  form.addParam("_charset_", "UTF-8")
  form.addParam("__nevow_form__", "upload")
  form.addParam("remove", "True")
  return form


def encodeMultipartFormdata(msg):
  """returns a safer version of as_string for msg.
  """
  msg.set_boundary("====================bnd%x"%(long(id(msg))))
  BOUNDARY = msg.get_boundary()
  res = []
  for part in msg.get_payload():
    res.append('--' + BOUNDARY)
    for hdr in part.items():
      res.append('%s: %s'%hdr)
    res.append('')
    if isinstance(part.get_payload(), basestring):
      res.append(part.get_payload())
    else:
      raise NotImplemented("Cannot encode recursive multiparts yet")
  res.append('--' + BOUNDARY + '--')
  res.append('')
  contentType = 'multipart/form-data; boundary=%s' % BOUNDARY
  return contentType, "\r\n".join(res)+"\r\nfoobar"


def _guessError(serverReply):
# guess the error message from the server.  I don't want to
# depend on BeautifulSoup, and I'm not ready to build a machine
# endpoint just yet, so I'm doing re-based screen scraping like it's 1999.
  mat = re.search('<div class="errors">(.*?)</div>', serverReply)
  if mat:
    mat = re.search("<li>(.*?)</li>", mat.group(1))
    if mat:
      return mat.group(1)
  return "Error in content or transfer"


def raiseIfUploadError(uploadSize, uploadName, serverResponse):
  """raises an UploadError if serverResponse does not look like the
  upload was successful.
  """
  if serverResponse.status!=200:
    raise UploadError("Bad status %s (404 in all likelihood means bad"
      " credentials)"%serverResponse.status)
  expectation = "File %s uploaded, %d bytes"%(
    uploadName, uploadSize)
  reply = serverResponse.data
  if expectation not in reply:
    raise UploadError(_guessError(reply))


def raiseIfRemovalError(fName, serverResponse):
  if serverResponse.status!=200:
    raise UploadError("Bad status %s"%serverResponse.status)
  fName = os.path.basename(fName)
  if fName.endswith(".gz"):
    expectation = "File %s removed."%fName
  else:
    expectation = "File %s.gz removed."%fName
  if expectation not in serverResponse.data:
    raise UploadError(_guessError(serverResponse.data))


def _getCredentials(stationInfo):
  try:
    f = open(stationInfo)
    content = f.read()
    f.close()
    content = re.sub("(?m)#.*$", "", content).strip()
  except IOError, msg:
    raise UploadError("Cannot obtain station info (%s)"%msg)
  try:
    id, phrase = content.split()
  except ValueError:
    raise UploadError("Invalid station info")
  return id, phrase


def _getUploadPath(basePath, stationInfo):
  """returns the server-side query path for uploading.

  stationInfo is a path to a stationInfo file.  The request functions
  generate that path by inspecting the directory part of the uploaded
  file and looking for a file stationinfo within that directory.
  """
  return basePath+"/%s/%s"%_getCredentials(stationInfo)


if sys.hexversion<0x2050000:  # python <2.5 doesn't have hostname attribute
  class _ParseResult(tuple):
    @property
    def hostname(self):
      netloc = self[1]
      if "@" in netloc:
        netloc = netloc.split("@", 1)[1]
      if ":" in netloc:
        netloc = netloc.split(":", 1)[0]
      return netloc.lower() or None

    @property
    def port(self):
      netloc = self[1]
      if "@" in netloc:
        netloc = netloc.split("@", 1)[1]
      if ":" in netloc:
        port = netloc.split(":", 1)[1]
        return int(port, 10)
      return None

    @property
    def path(self):
      return self[2]

  def parseURL(url):
    return _ParseResult(urlparse.urlparse(url))
else:
  parseURL = urlparse.urlparse


def _sendRequest(mime, payload, stationInfo):
  """sends a request posting payload to the server.

  The function returns the response object from the server with an added 
  data attribute containing the response text.
  """
  if PROXY:
    conn = httplib.HTTPConnection(PROXY[0], PROXY[1])
    uploadPath = _getUploadPath(rootURL, stationInfo) 
  else:
    dest = parseURL(rootURL)
    conn = httplib.HTTPConnection(dest.hostname, dest.port)
    uploadPath = _getUploadPath(dest.path, stationInfo) 
  conn.connect()
  conn.request("POST", uploadPath, payload, {
    "Content-Type": mime,
    })
  response = conn.getresponse()
  response.data = response.read()
  conn.close()
  return response


def _getStationInfoFor(fName):
  return os.path.join(os.path.dirname(fName), "stationinfo")


def upload(fName):
  """uploads fName to the server.

  This may raise all kinds of exceptions (UploadError and IOError are probably 
  the most common ones.
  """
  form = _genForm(fName)
  mime, payload = encodeMultipartFormdata(form)
  response = _sendRequest(mime, payload, _getStationInfoFor(fName))
  raiseIfUploadError(form.uploadSize, form.uploadName, response)


def removeFromServer(fName):
  """asks the server to remove fName from its data holdings.
  """
  form = _genRemovalForm(fName)
  mime, payload = encodeMultipartFormdata(form)
  response = _sendRequest(mime, payload,
    _getStationInfoFor(fName))
  raiseIfRemovalError(fName, response)



def getTodaysPattern():
  return datetime.datetime.utcnow().strftime("%Y%m%d")


def _getLogFile(opts):
  if opts.logDir:
    ensureDirectory(opts.logDir)
    return open(os.path.join(opts.logDir, getTodaysPattern()+".log"), "a")
  else:
    return sys.stdout


def getUploadableFilesFrom(srcDir):
  """returns a list of names of uploadable files from srcDir.

  All files ending with .gz, .csv, and .txt are considered uploadable,
  except those containing YYYYMMDD for today's (UTC) date.
  """
  res = []
  for pattern in ["*.gz", "*.csv", "*.txt"]:
    res.extend(glob.glob(os.path.join(srcDir, pattern)))
  todaysPat = getTodaysPattern()
  return [name for name in res if not todaysPat in name]


def ensureDirectory(dirName):
  if not os.path.isdir(dirName):
    os.makedirs(dirName)


def parseCommandLine():
  """returns an options object and the list of file names to upload.
  """
  from optparse import OptionParser
  parser = OptionParser(usage="%prog [options] {file name}")
  parser.add_option("-d", "--upload-from", action="store",
    metavar="DIR", help="Upload all data files from DIR except those"
    " looking current (implies -m)", dest="dataDir", default=None)
  parser.add_option("-l", "--log-to", action="store",
    metavar="DIR", help="Write log information to DIR rather than stdout",
    dest="logDir", default=None)
  parser.add_option("-m", "--move-uploaded", action="store_true",
    dest="moveUploaded", help="Move files uploaded to a subdirectory"
    " uploaded?")
  parser.add_option("-R", "--remove", action="store_true",
    dest="removeArgs", help="Remove named files from the server")
  opts, args = parser.parse_args()
  if opts.dataDir:
    if args:
      sys.exit("No file names allowed with -d option")
    if opts.removeArgs:
      sys.exit("Cannot use -r with -d")
    opts.moveUploaded = True
    ensureDirectory(os.path.join(opts.dataDir, "uploaded"))
    files = getUploadableFilesFrom(opts.dataDir)
  else:
    files = args
  return opts, files


def uploadFiles(opts, files, logFile):
  for fName in files:
    logFile.write("Uploading %s... "%fName)
    logFile.flush()
    try:
      upload(fName)
      if opts.moveUploaded:
        destName = os.path.join(
          os.path.dirname(fName),
          "uploaded",
          os.path.basename(fName))
        os.rename(fName, destName)
    except (UploadError, IOError), msg:
      logFile.write("failed (%s).  Try manually.\n"%msg)
    else:
      logFile.write("ok.\n")


def removeFiles(opts, files, logFile):
  for fName in files:
    logFile.write("Removing %s... "%fName)
    logFile.flush()
    try:
      removeFromServer(fName)
    except (UploadError, IOError), msg:
      logFile.write("failed (%s).\nAsk gavo@ari.uni-heidelberg.de\n"%msg)
    else:
      logFile.write("ok.\n")
  logFile.write("Please note that removing a source file does not remove"
    " any data\nalready ingested into the database.  Contact\n"
    "gavo@ari.uni-heidelberg.de if necessary.\n")



def main():
  opts, files = parseCommandLine()
  logFile = _getLogFile(opts)
  if opts.removeArgs:
    removeFiles(opts, files, logFile)
  else:
    uploadFiles(opts, files, logFile)


if __name__=="__main__":
  main()

# vi:et:sw=2:ts=2:sta:ai
