Appendix B


//
// Sample code to read the text out of a PowerPoint '97 presentation.
//
#include <ole2.h>
#include <stdio.h>
#include <time.h>

// Stolen from app\sertypes.h
// system dependent sizes
// system dependent sizes
// 
typedef signed long sint4; // signed 4-byte integral value
typedef signed short sint2; // signed 4-byte integral value
typedef unsigned long uint4; // unsigned 4-byte integral value
typedef unsigned short uint2; // 2-byte
typedef char bool1; // 1-byte boolean
typedef unsigned char ubyte1; // unsigned byte value
typedef uint2 psrType;
typedef uint4 psrSize; // each record is preceeded by
// pssTypeType and pssSizeType.
typedef uint2 psrInstance;
typedef uint2 psrVersion;
typedef uint4 psrReference; // Saved object reference
#define PSFLAG_CONTAINER 0xFF // If the version field of a record
                              // header takes on this value, the
                              // record header marks the start of
                              // a container.
// PowerPoint97 Record Header
typedef unsigned long DWord;

int AssertionFailed( const char* file, int line, const char* expr )
/*=================*/
{
   // AR: Message box the assert
   return( TRUE );
} /* AssertionFailed */

#define Assert( expr ) \
{ \
static char _str[] = #expr; \
\
if( !(int)(expr) ) \
AssertionFailed( __FILE__, __LINE__, _str ); \
} /* Assert */

static BOOL ReadText( WCHAR* buffer, unsigned long bufferSize, unsigned long* pSizeRet );
// Returns TRUE if more text exists. Fills buffer upto bufferSize. Actual size used is
// pSizeRet.

struct RecordHeader
{
   psrVersion recVer : 4; // may be PSFLAG_CONTAINER
   psrInstance recInstance : 12;
   psrType recType;
   psrSize recLen;
};

struct PSR_CurrentUserAtom
{
   uint4 size;
   uint4 magic; // Magic number to ensure this is a PowerPoint file.
   uint4 offsetToCurrentEdit; // Offset in main stream to current edit field.
   uint2 lenUserName;
   uint2 docFileVersion;
   ubyte1 majorVersion;
   ubyte1 minorVersion;
};

struct PSR_UserEditAtom
{
   sint4 lastSlideID; // slideID
   uint4 version; // This is major/minor/build which did the edit
   uint4 offsetLastEdit; // File offset of last edit
   uint4 offsetPersistDirectory; // Offset to PersistPtrs for
                                 // this file version.
   uint4 documentRef;
   uint4 maxPersistWritten; // Addr of last persist ref written to the file (max seen so far).
   sint2 lastViewType; // enum view type
};

struct PSR_SlidePersistAtom
{
   uint4 psrReference;
   uint4 flags;
   sint4 numberTexts;
   sint4 slideId;
   uint4 reserved;
};

#define CURRENT_USER_STREAM L"Current User"
#define DOCUMENT_STREAM L"PowerPoint Document"
#define HEADER_MAGIC_NUM -476987297

const int PST_UserEditAtom = 4085;
const int PST_PersistPtrIncrementalBlock = 6002; // Incremental diffs on persists
const int PST_SlidePersistAtom = 1011;
const int PST_TextCharsAtom = 4000; // Unicode in text
const int PST_TextBytesAtom = 4008; // non-unicode text
class PPSPersistDirectory;

struct ParseContext
{
   ParseContext(ParseContext *pNext) : m_pNext(pNext), m_nCur(0)
   {
   }
   RecordHeader m_rh;
   uint4 m_nCur;
   ParseContext *m_pNext;
};

const int SLIDELISTCHUNKSIZE=32;
struct SlideListChunk
{
   SlideListChunk( SlideListChunk* next, psrReference newOne ) :
   pNext( next ), numInChunk(1)
   {
      refs[0] = newOne;
   }
   SlideListChunk *pNext;
   DWord numInChunk;
   psrReference refs[SLIDELISTCHUNKSIZE];
};

class FileReader
{
public:
   FileReader(IStorage *pStg);
   ~FileReader();
   BOOL ReadText( WCHAR *pBuff, ULONG size, ULONG *pSizeRet );
// Reads next size chars from file. Returns TRUE if there is more
// text to read.
   BOOL IsPowerPoint()
   {
      return m_isPP;
   } // Returns true if this is a PowerPoint '97 file.
   void ReadPersistDirectory();
   void PPSReadUserEditAtom( DWord offset, PSR_UserEditAtom& userEdit );
   void ReadSlideList();
protected:
   BOOL ReadCurrentUser(IStream *pStm);
   void *ReadRecord( RecordHeader& rh );
   BOOL Parse();
   IStream *GetDocStream();
   BOOL DoesClientRead( psrType type )
   {
      return FALSE;
   }
   void ReleaseRecord( RecordHeader& rh, void* diskRecBuf );
   DWord ParseForSlideLists();
   void AddSlideToList( psrReference refToAdd );
   BOOL StartParse( DWord offset );
   BOOL FillBufferWithText();
   BOOL FindNextSlide( DWord& offset );
private:
   PSR_CurrentUserAtom m_currentUser;
   IStream * m_pDocStream;
   IStorage * m_pPowerPointStg;
   BOOL m_isPP;
   ParseContext* m_pParseContexts;
   WCHAR* m_pCurText;
   unsigned long m_curTextPos;
   unsigned long m_curTextLength;
   PSR_UserEditAtom* m_pLastUserEdit;
   PPSPersistDirectory* m_pPersistDirectory;
   SlideListChunk* m_pFirstChunk;
   int m_curSlideNum;
   WCHAR* m_pClientBuf;
   unsigned long m_clientBufSize;
   unsigned long m_clientBufPos;
   ULONG* m_pSizeRet;
};

FileReader::FileReader(IStorage *pStg) :
m_pPowerPointStg(pStg),
m_isPP(FALSE),
m_pParseContexts(NULL),
m_curTextPos(0),
m_pLastUserEdit( NULL ),
m_pPersistDirectory( NULL ),
m_pDocStream( NULL ),
m_pFirstChunk( NULL ),
m_curSlideNum(0),
m_pCurText( NULL ),
m_pClientBuf( NULL ),
m_clientBufSize( 0 ),
m_clientBufPos( 0 )
{
   IStream *pStm = NULL;
   m_pPowerPointStg->AddRef();
   HRESULT hr = pStg->OpenStream( CURRENT_USER_STREAM, NULL, STGM_READ | STGM_DIRECT | STGM_SHARE_EXCLUSIVE, NULL, &pStm );
   if ( SUCCEEDED(hr) && ReadCurrentUser(pStm) )
      m_isPP = TRUE;
   pStm->Release();
}

FileReader::~FileReader()
{
   m_pPowerPointStg->Release();
}

BOOL FileReader::FillBufferWithText()
{
   unsigned long amtToCopy = min( (m_curTextLength - m_curTextPos), (m_clientBufSize - m_clientBufPos) );
   unsigned long loop = amtToCopy;
   while ( loop-- )
      m_pClientBuf[ m_clientBufPos++ ] = m_pCurText[ m_curTextPos++ ];
   if ( m_curTextPos == m_curTextLength )
   {
      delete [] m_pCurText;
      m_pCurText = NULL;
      m_curTextPos = 0;
      m_curTextLength = 0;
   }
   *m_pSizeRet += amtToCopy;
   return(m_clientBufSize == m_clientBufPos); // If client's buffer is full return TRUE.
}

void FileReader::AddSlideToList( psrReference refToAdd )
{
   if ( m_pFirstChunk == NULL )
      m_pFirstChunk = new SlideListChunk(NULL, refToAdd);
   else
   {
      if ( m_pFirstChunk->numInChunk+1 > SLIDELISTCHUNKSIZE )
         m_pFirstChunk = new SlideListChunk(m_pFirstChunk, refToAdd);
      else
      {
         m_pFirstChunk->refs[m_pFirstChunk->numInChunk] = refToAdd;
         m_pFirstChunk->numInChunk++;
      }
   }
}

IStream *FileReader::GetDocStream()
{
   if ( m_pDocStream == NULL )
   {
      if ( !m_isPP )
         return NULL;
      HRESULT hr = m_pPowerPointStg->OpenStream( DOCUMENT_STREAM, NULL, STGM_READ | STGM_DIRECT | STGM_SHARE_EXCLUSIVE, NULL, &m_pDocStream );
      if (FAILED(hr))
      {
         fprintf(stderr,"Error (%d) opening PowerPoint Document Stream.\n",(int)hr);
         return NULL;
      }
   }
   return m_pDocStream;
}

BOOL FileReader::ReadCurrentUser(IStream *pStm)
{
   ULONG nRd=0;
   RecordHeader rh;
   BOOL isPP = FALSE;
   if ( SUCCEEDED( pStm->Read(&rh, sizeof(rh), &nRd) ) )
   {
      if ( SUCCEEDED( pStm->Read(&m_currentUser, sizeof(PSR_CurrentUserAtom), &nRd) ) )
      {
         if ( nRd != sizeof(PSR_CurrentUserAtom) )
            return FALSE;
      }
      isPP = ( m_currentUser.size == sizeof( m_currentUser ) )&&
             ( m_currentUser.magic == HEADER_MAGIC_NUM )&&
             ( m_currentUser.lenUserName <= 255 );
   }
   return isPP;
}

class PPSDirEntry
{
   PPSDirEntry()
   : m_pNext( NULL ), m_pOffsets( NULL ), m_tableSize( 0 )
   {
   }
   PPSDirEntry* m_pNext;
   DWord* m_pOffsets;
   DWord m_tableSize;
public:
   ~PPSDirEntry()
   {
      delete m_pOffsets; m_pOffsets = NULL;
   }
   friend class PPSPersistDirectory;
}; 

// class PPSDirEntry
class PPSPersistDirectory
{
public:
   PPSPersistDirectory();
   ~PPSPersistDirectory();
   void AddEntry( DWord cOffsets, DWord* pOffsets );
   DWord GetPersistObjStreamPos( DWord ref );
   DWord NumberOfAlreadySavedPersists();
private:
   PPSDirEntry* m_pFirstDirEntry;
};

PPSPersistDirectory::PPSPersistDirectory() : m_pFirstDirEntry( NULL )
{
}

PPSPersistDirectory::~PPSPersistDirectory()
{
   while ( m_pFirstDirEntry )
   {
      PPSDirEntry* pDirEntry = m_pFirstDirEntry;
      m_pFirstDirEntry = m_pFirstDirEntry->m_pNext;
      delete pDirEntry;
   }
}

void PPSPersistDirectory::AddEntry( DWord cOffsets, DWord* pOffsets )
{
   PPSDirEntry* pDirEntry = new PPSDirEntry();
   pDirEntry->m_tableSize = cOffsets;
   pDirEntry->m_pOffsets = new DWord[cOffsets];
   memcpy( pDirEntry->m_pOffsets, pOffsets, cOffsets * sizeof( DWord ) );
// append to the end of the entry list
   PPSDirEntry** ppDirEntry = &m_pFirstDirEntry;
   while ( NULL != *ppDirEntry )
      ppDirEntry = &(*ppDirEntry)->m_pNext;
   *ppDirEntry = pDirEntry;
}

DWord PPSPersistDirectory::GetPersistObjStreamPos( DWord ref )
{
   PPSDirEntry* pEntry = m_pFirstDirEntry;
   while ( pEntry )
   {
      DWord* pOffsets = pEntry->m_pOffsets;
      while ( (DWord)( (char*)pOffsets - (char*)pEntry->m_pOffsets ) < pEntry->m_tableSize * sizeof( DWord ) )
      {
         DWord nRefs = pOffsets[0] >> 20;
         DWord base = pOffsets[0] & 0xFFFFF; // 1-based
         if ( ( base <= ref )&&( ref < base + nRefs ) )
            return pOffsets[ 1 + ref - base ];
         pOffsets += nRefs + 1;
      }
      pEntry = pEntry->m_pNext;
   }
   return(DWord) -1;
}

DWord PPSPersistDirectory::NumberOfAlreadySavedPersists()
{
   DWord count = 0;
   PPSDirEntry* pEntry = m_pFirstDirEntry;
   while ( pEntry )
   {
      DWord* pOffsets = pEntry->m_pOffsets;
      while ( (DWord)( pEntry->m_pOffsets - pOffsets ) < pEntry->m_tableSize * sizeof( DWord ) )
      {
         DWord nRefs = pOffsets[0] >> 20;
         count += nRefs;
         pOffsets += nRefs + 1;
      }
      pEntry = pEntry->m_pNext;
   }
   return count;
}

void FileReader::PPSReadUserEditAtom( DWord offset, PSR_UserEditAtom& userEdit )
{
   LARGE_INTEGER li;
   li.LowPart = offset;
   li.HighPart = 0;
   GetDocStream()->Seek(li,STREAM_SEEK_SET, NULL);
   RecordHeader rh;
   GetDocStream()->Read(&rh, sizeof(rh), NULL);
   Assert( rh.recType == PST_UserEditAtom );
   Assert( rh.recLen == sizeof( PSR_UserEditAtom ) );
   li.LowPart = offset;
   GetDocStream()->Read(&userEdit, sizeof(userEdit), NULL);
}

void *FileReader::ReadRecord( RecordHeader& rh )
// Return values:
// NULL and rh.recVer == PSFLAG_CONTAINER: no record was read in.
// record header indicated start of container.
// NULL and rh.recVer != PSFLAG_CONTAINER: client must read in record.
{
   IStream *pStm = GetDocStream();
// read record header, verify
   pStm->Read(&rh, sizeof(rh), NULL); //AR: Check Error
// if client will read, do not read in record
   if ( DoesClientRead( rh.recType ) )
      return NULL;
// If container, return NULL
   if (rh.recVer == PSFLAG_CONTAINER)
      return NULL;
// Allocate buffer for disk record. Client must call ReleaseRecord() or
// pass the atom up to CObject::ConstructContents() which will
// then release it.
   void* buffer = new char[rh.recLen];
// read in record
   pStm->Read(buffer, rh.recLen, NULL);
// NOTE: ByteSwapping & versioning not done by this simple reader.
   return(buffer);
}

void FileReader::ReleaseRecord( RecordHeader& rh, void* diskRecBuf )
{
   if (rh.recType && rh.recVer!=PSFLAG_CONTAINER)
      delete [] (char*)diskRecBuf;
   rh.recType = 0; // consume the record so that record doesn't
// get processed again.
}

void FileReader::ReadPersistDirectory()
{
   if ( NULL != m_pLastUserEdit )
      return; // already read
   PSR_UserEditAtom userEdit;
   DWord offsetToEdit = m_currentUser.offsetToCurrentEdit;
   while ( 0 < offsetToEdit )
   {
      PPSReadUserEditAtom( offsetToEdit, userEdit );
      if ( NULL == m_pLastUserEdit )
      {
         m_pPersistDirectory = new PPSPersistDirectory();
         m_pLastUserEdit = new PSR_UserEditAtom;
         *m_pLastUserEdit = userEdit;
      }
      LARGE_INTEGER li;
      li.LowPart = userEdit.offsetPersistDirectory;
      li.HighPart = 0;
      GetDocStream()->Seek(li,STREAM_SEEK_SET, NULL); // AR: check that seek succeeded.
      RecordHeader rh;
      DWord *pDiskRecord = (DWord*) ReadRecord(rh);
      Assert( PST_PersistPtrIncrementalBlock == rh.recType );
      m_pPersistDirectory->AddEntry( rh.recLen / sizeof( DWord ), pDiskRecord );
      ReleaseRecord( rh, pDiskRecord );
      offsetToEdit = userEdit.offsetLastEdit;
   }
}

// PPStorage::ReadPersistDirectory
void FileReader::ReadSlideList()
{
   Assert( m_pLastUserEdit != NULL );
   DWord offsetToDoc = m_pPersistDirectory->GetPersistObjStreamPos( m_pLastUserEdit->documentRef );
   LARGE_INTEGER li;
   li.LowPart = offsetToDoc;
   li.HighPart = 0;
   GetDocStream()->Seek(li,STREAM_SEEK_SET, NULL);
   ParseForSlideLists();
}

DWord FileReader::ParseForSlideLists()
{
   IStream *pStm = GetDocStream();
   RecordHeader rh;
   DWord nRd=0;
// Stack based parsing for SlideLists
   pStm->Read(&rh, sizeof(rh), &nRd);
   if ( ( rh.recVer != PSFLAG_CONTAINER ) && ( (rh.recVer & 0x0F)!=0x0F ) )
   {
      if ( rh.recType == PST_SlidePersistAtom )
      {
         PSR_SlidePersistAtom spa;
         Assert( sizeof(spa) == rh.recLen );
         pStm->Read(&spa, sizeof(spa), &nRd);
         AddSlideToList( spa.psrReference );
      }
      else
      {
         LARGE_INTEGER li;
         li.LowPart = rh.recLen;
         li.HighPart = 0;
         pStm->Seek(li,STREAM_SEEK_CUR, NULL);
      }
      nRd += rh.recLen;
   }
   else
   {
      DWord nCur = 0;
      while ( nCur < rh.recLen )
      {
         nCur += ParseForSlideLists();
      }
      nRd += nCur;
   }
   return nRd;
}

BOOL FileReader::ReadText( WCHAR *pBuff, ULONG size, ULONG *pSizeRet )
{
   DWord offset;
   *pSizeRet = 0;
   m_pSizeRet = pSizeRet;
   m_pClientBuf = pBuff;
   m_clientBufSize = size;
   m_clientBufPos = 0;
   for ( ;; )
   {
      if ( ( m_pParseContexts == NULL ) )
      {
         if ( FindNextSlide(offset) )
         {
            if ( StartParse( offset ) )
               return TRUE;
         }
         else
            return FALSE; // DONE parsing, no more slides
      }
      else
      {
         if ( m_pClientBuf )
         {
            if ( FillBufferWithText() ) // Use existing text first.
               return TRUE;
         }
         if ( Parse() ) // restart parse where we left off.
            return TRUE;
      }
   }
}

BOOL FileReader::StartParse( DWord offset )
{
   LARGE_INTEGER li;
   li.LowPart = offset;
   li.HighPart = 0;
   GetDocStream()->Seek(li,STREAM_SEEK_SET, NULL);
   m_pParseContexts = new ParseContext( NULL );
   GetDocStream()->Read(&m_pParseContexts->m_rh, sizeof(RecordHeader), NULL);
   return Parse();
}

BOOL FileReader::Parse()
{
   IStream *pStm = GetDocStream();
   RecordHeader rh;
   DWord nRd=0;
   Assert( m_pParseContexts );
// Restarting a parse might complete a container so we test this initially.
   if ( m_pParseContexts->m_nCur >= m_pParseContexts->m_rh.recLen )
   {
      Assert( m_pParseContexts->m_nCur == m_pParseContexts->m_rh.recLen );
      ParseContext* pParseContext = m_pParseContexts;
      m_pParseContexts = m_pParseContexts->m_pNext;
      delete pParseContext;
   }
   do
   {
      pStm->Read(&rh, sizeof(RecordHeader), NULL);
      if ( ( rh.recVer != PSFLAG_CONTAINER ) && ( (rh.recVer & 0x0F)!=0x0F ) )
      {
         if ( rh.recType == PST_TextCharsAtom )
         {
            m_curTextPos = 0;
            m_curTextLength = rh.recLen/2;
            Assert( m_pCurText == NULL );
            m_pCurText = new WCHAR[rh.recLen/2];
            pStm->Read(m_pCurText, rh.recLen, &nRd);
            wprintf( L"-%s-\n", m_pCurText );
            if ( FillBufferWithText() )
               return TRUE; // Stop parsing if buffer is full, and return control to client
         }
         else if ( rh.recType == PST_TextBytesAtom )
         {
            Assert( m_pCurText == NULL );
            m_curTextPos = 0;
            m_curTextLength = rh.recLen;
            m_pCurText = new WCHAR[rh.recLen];
            pStm->Read(m_pCurText, rh.recLen, &nRd);
            char *pHack = (char *) m_pCurText;
            unsigned int back2 = rh.recLen*2-1;
            unsigned int back1 = rh.recLen-1;
            for (unsigned int i=0;i<rh.recLen;i++)
            {
               pHack[back2-1] = pHack[back1];
               pHack[back2] = 0;
               back2 -=2;
               back1--;
            }
            if ( FillBufferWithText() )
               return TRUE; // Stop parsing if buffer is full, and return control to client
         }
         else
         {
            LARGE_INTEGER li;
            ULARGE_INTEGER ul;
            li.LowPart = rh.recLen;
            li.HighPart = 0;
            pStm->Seek(li,STREAM_SEEK_CUR,&ul);
         }
         m_pParseContexts->m_nCur += rh.recLen;
         m_pParseContexts->m_nCur += sizeof( RecordHeader ); // Atom rh's add towards containing container's size.
      }
      else
      {
         m_pParseContexts = new ParseContext( m_pParseContexts );
         m_pParseContexts->m_rh = rh;
      }
      if ( m_pParseContexts->m_nCur >= m_pParseContexts->m_rh.recLen )
      {
         Assert( m_pParseContexts->m_nCur == m_pParseContexts->m_rh.recLen );
         ParseContext* pParseContext = m_pParseContexts;
         m_pParseContexts = m_pParseContexts->m_pNext;
         delete pParseContext;
      }
   } while ( m_pParseContexts && ( m_pParseContexts->m_nCur < m_pParseContexts->m_rh.recLen ) );
   return FALSE;
}

BOOL FileReader::FindNextSlide( DWord& offset )
{
   if ( m_curSlideNum == 0 )
   {
      Assert( m_pLastUserEdit != NULL );
      offset = m_pPersistDirectory->GetPersistObjStreamPos( m_pLastUserEdit->documentRef );
      m_curSlideNum++;
      return TRUE;
   }
   else
   {
      uint4 curSlideNum = m_curSlideNum++;
      SlideListChunk *pCur = m_pFirstChunk;
      while ( pCur && ( curSlideNum > pCur->numInChunk ) )
      {
         curSlideNum -= pCur->numInChunk;
         pCur = pCur->pNext;
      }
      if ( pCur == NULL )
         return FALSE;
      offset = m_pPersistDirectory->GetPersistObjStreamPos( pCur->refs[curSlideNum-1] );
      return TRUE;
   }
}

static BOOL ReadText( void** ppContext, IStorage* pStgFrom, WCHAR* buffer, unsigned long bufferSize, unsigned long* pSizeRet )
{
   FileReader* pFI = NULL;
   if ( *ppContext == NULL )
   {
      pFI = new FileReader( pStgFrom );
      *ppContext = pFI;
      if ( !pFI->IsPowerPoint() )
      {
         delete pFI;
         *pSizeRet = 0;
         return FALSE;
      }
      pFI->ReadPersistDirectory();
      pFI->ReadSlideList();
   }
   else
   {
      pFI = (FileReader *)*ppContext;
   }
   BOOL bRet = pFI->ReadText(buffer, bufferSize, pSizeRet);
   if ( !bRet )
   {
      delete pFI;
      *ppContext = NULL;
   }
   return bRet;
}

void main(int argc, char **argv)
{
   OLECHAR wc[256];
   HRESULT hr;
   IStorage *pStgFrom = NULL;
   if (argc < 2)
   {
      fprintf(stderr,"Usage dblock <file to be read>\n");
      exit(0);
   }
   MultiByteToWideChar( CP_ACP, MB_PRECOMPOSED, argv[1], -1, wc, 255);
   hr = StgOpenStorage(wc, NULL, STGM_READ | STGM_DIRECT |
                       STGM_SHARE_DENY_WRITE, NULL, 0, &pStgFrom);
   if (FAILED(hr))
   {
      fprintf(stderr,"Error (%d) opening docfile: %s\n",(int)hr,argv[1]);
   }
   else
   {
      WCHAR wcBuf[6];
      ULONG sizeUsed;
      BOOL fContinue = TRUE;
      void *pContext = NULL;
      while ( fContinue )
      {
         fContinue = ReadText( &pContext, pStgFrom, wcBuf, 5, &sizeUsed );
         wcBuf[sizeUsed] = 0;
         wprintf(L"-%s-\n", wcBuf);
      }
   }
}