#!/usr/bin/env python
# Copyright (c) 2012 Arista Networks, Inc.  All rights reserved.
# Arista Networks, Inc. Confidential and Proprietary.

import collections
import os, sys, re, gzip
import Tac

# The FetchLog utility uses this library to analyze logrotated files.
# It contains two parts:
# 1. A save operation that stores a snapshot of the current virtual file
#    that can be loaded later.
# 2. A dump operation that uses the saved snapshot to analyze where the
#    new content starts off.
#
# The assumptions are based on how logrotate works. A virtual log file
# starts off as a normal file (messages), gets appended to, gzipped
# (messages.gz), renamed (messages.1.gz) and deleted. Any files except
# the most recent (uncompressed) file should not be altered (but could be
# renamed or deleted) and therefore their inode numbers do not change.

VirtualFileCursor = collections.namedtuple( 
   "VirtualFileCursor",
   "inodes firstLines currentFileSize runId" )

class VirtualFile( object ):
   '''A VirtualFile is a list of log files corresponding to
   a certain name (e.g., Aaa-1234). Some of the files might be
   logrotated. The content of this virtual file is the 
   concatenation of the uncompressed contents of all files in 
   order of mtime. When log rotation happens, the beginning part
   of the virtual file might be removed.

   A cursor is an object that stores a certain state of a virtual
   file. By moving a virtual file to a specific cursor, we can
   force the virtual file to return contents written after the cursor.
   '''
   def __init__( self, name, files, logdir ):
      assert files
      self.name_ = name
      self.logdir_ = logdir
      self.files_ = self._fileSort( files )
      self.currentFileIndex_ = 0
      self.currentFilePos_ = 0
      self.cursor_ = None

   def name( self ):
      return self.name_

   def createCursor( self, runId=None ):
      '''Return a VirtualFileCursor object based on the current state
      of this virtual file (EOF).'''
      currentFilePath = self._path( self.files_[ -1 ] )
      return VirtualFileCursor( [ os.stat( self._path( x ) ).st_ino \
                                     for x in self.files_ ],
                                [ self._open( x ).readline() \
                                     for x in self.files_ ],
                                os.stat( currentFilePath ).st_size,
                                runId )

   def _fileSort( self, files ):
      '''get physical file information and sort them based on 
      file creation time.'''
      return sorted( files, key=lambda x: os.stat( self._path( x ) ).st_mtime )

   def _path( self, name ):
      return '%s/%s' % ( self.logdir_, name )

   def _open( self, filename, pos=0 ):
      '''Open the file and seek to the position.'''
      if filename.endswith( '.gz' ):
         openfunc = gzip.open
      else:
         openfunc = open
      f = openfunc( self._path( filename ), 'rb' )
      if pos:
         f.seek( pos, 0 )
      return f

   def _firstLine( self, filename ):
      try:
         return self._open( filename ).readline()
      except IOError:
         return ''

   def currentFile( self ):
      return self.files_[ -1 ]

   def files( self ):
      return self.files_

   def mtime( self ):
      '''Returns a mtime that corresponds to the current file.'''
      return os.stat( self._path( self.currentFile() ) ).st_mtime

   def _startingFilePos( self, currentIndex, cursor ):
      '''Given the filename, return the starting position to read from.
      Return -1 if the file should be skipped.
      '''
      if not cursor:
         return 0

      filename = self.files_[ currentIndex ]
      isCurrentFile = ( currentIndex == len( self.files_ ) - 1 )
      inode = os.stat( self._path( filename ) ).st_ino

      if isCurrentFile:
         # This is the current file - we should dump everything unless
         # it has the same inode number as the current file in the cursor
         # in which case we start with the cursor's current file size.
         #
         # Example: messages -> messages with more content
         if inode == cursor.inodes[ -1 ]:
            return cursor.currentFileSize
         else:
            return 0

      # This is a logrotated file - there are several cases.
      # 1. it matches one of the cursor's logrotated files (ignore)
      # 2. it matches the cursor's current file (start with current size)
      # 3. it doesn't matches anything (start with 0)

      # Since inode number can be reused in case files are deleted,
      # we match both inode and first line of content.
      try:
         inodeIndex = cursor.inodes[ :-1 ].index( inode )
      except ValueError:
         inodeIndex = -1
      firstLine = self._firstLine( filename )

      if inodeIndex >= 0 and cursor.firstLines[ inodeIndex ] == firstLine:
         # This file is a logrotated file in the cursor, 
         # so it should be ignored. Note inode numbers can be reused
         # so we check both inodes and first line contents.
         return -1
      elif cursor.firstLines[ -1 ] == firstLine:
         # If the logrotated file matches the first line of the cursor's
         # current file, use the cursor's current file size. Note if the
         # cursor's current file was logrotated, the inode number would have
         # changed, so we cannot depend on it being the same.
         return cursor.currentFileSize
      else:
         # For everything else, just dump everything. This can happen if
         # logrotate happened too fast and all cursor's files were deleted.
         return 0

   def tell( self ):
      '''Return the cursor.'''
      return self.cursor_

   def seek( self, cursor ):
      '''Move file pointer by the saved file cursor.'''
      self.cursor_ = cursor
      currentIndex = 0
      currentPos = 0
      while currentIndex < len( self.files_ ):
         currentPos = self._startingFilePos( currentIndex, cursor )
         if currentPos >= 0:
            break
         currentIndex += 1
      self.currentFileIndex_ = currentIndex
      self.currentFilePos_ = currentPos

   def empty( self ):
      '''Whether the file is empty.'''
      return self.currentFileIndex_ >= len( self.files_ ) or \
          ( self.currentFileIndex_ == len( self.files_ ) - 1 and
            self.currentFilePos_ >= os.stat( 
                self._path( self.files_[ -1 ] ) ).st_size )

   def __iter__( self ):
      # Instead of returning one line at a time we return the whole file
      # for better performance
      currentPos = self.currentFilePos_
      for i in xrange( self.currentFileIndex_, len( self.files_ ) ):
         try:
            f = self._open( self.files_[ i ], currentPos )
            yield f.read()
         except IOError, e:
            yield "! IOError: %s\n" % e
         currentPos = 0
      raise StopIteration

   def read( self ):
      return ''.join( x for x in self )

   def readlines( self ):
      return self.read().splitlines()

   def readTail( self, maxsize ):
      # Read from the end of the file up to maxsize bytes

      # Move file's cursor on a first approximation so we don't end up reading 
      # files we don't need.
      totalSize = 0
      for i in xrange( len( self.files_ ) - 1, self.currentFileIndex_, -1 ):
         totalSize += os.stat( self._path( self.files_[ i ] ) ).st_size
         if totalSize > maxsize:
            # Reset the cursor
            self.currentFileIndex_ = i
            self.currentFilePos_ = 0
            break

      data = self.read()
      if len( data ) > maxsize:
         data = ( "... [truncated to last %d bytes] ...\n" % maxsize +
                  data[ -maxsize: ] )
      return data

defaultMatch = r'[^\.]*'

def logFiles( logdir, match=None ):
   '''Return a dictionary of name : virtualLogFile.'''
   try:
      files = os.listdir( logdir )
   except OSError:
      return {}

   agentLogFiles = {}
   if not match:
      match = defaultMatch

   matchRe = re.compile( match )
   for f in files:
      m = matchRe.match( f )
      if m:
         # check if we have 'name' specified, if not, use
         # the matched strong
         k = m.groupdict().get( 'name', m.group( 0 ) )
         if k:
            if k not in agentLogFiles:
               agentLogFiles[ k ] = []
            agentLogFiles[ k ].append( f )

   return { name : VirtualFile( name, files, logdir )
            for name, files in agentLogFiles.iteritems() }

def _saveVirtualFiles( vfiles, savefile, runId=None ):
   snapshot = {}
   for name, f in vfiles.iteritems():
      try:
         snapshot[ name ] = f.createCursor( runId )
      except IOError:
         pass

   if snapshot:
      if savefile is None:
         output = sys.stdout
      else:
         output = file( savefile, "w+" )
      output.write( repr( snapshot ) )

def save( logdir, savefile, match=None ):
   vfiles = logFiles( logdir, match )
   _saveVirtualFiles( vfiles, savefile )

def dump( logdir, savefile, match=None, runId=None ):
   '''Dump contents after the saved file.
   It returns a dictionary of { name: virtual files }
   '''
   # Get current log files
   files = logFiles( logdir, match )
   if savefile:
      saveTime = 0
      try:
         content = file( savefile ).read()
         snapshot = eval( content ) # pylint: disable=eval-used
         for name, cursor in snapshot.iteritems():
            if name in files:
               files[ name ].seek( cursor )
         saveTime = os.stat( savefile ).st_mtime
      except IOError:
         # No savefile? ignore
         pass
      except ( SyntaxError, TypeError, ValueError ):
         print "! %s is not a valid savefile (%r)" % (
            savefile, content[ :1024 ] )

      # update the existing savefile
      _saveVirtualFiles( files, savefile, runId )

      # Filter out files that have not changed after the last savefile,
      # so we don't print out old empty files.
      if saveTime:
         for name in [ n for n, f in files.iteritems() 
                       if f.empty() and f.mtime() < saveTime ]:
            del files[ name ]
   return files
