Appendix B
//
// Sample code to read the text out of a PowerPoint '97 presentation.
//
#include <ole2.h>
#include <stdio.h>
#include <time.h>
// Stolen from app\sertypes.h
// system dependent sizes
// system dependent sizes
//
typedef signed long sint4; // signed 4-byte integral value
typedef signed short sint2; // signed 4-byte integral value
typedef unsigned long uint4; // unsigned 4-byte integral value
typedef unsigned short uint2; // 2-byte
typedef char bool1; // 1-byte boolean
typedef unsigned char ubyte1; // unsigned byte value
typedef uint2 psrType;
typedef uint4 psrSize; // each record is preceeded by
// pssTypeType and pssSizeType.
typedef uint2 psrInstance;
typedef uint2 psrVersion;
typedef uint4 psrReference; // Saved object reference
#define PSFLAG_CONTAINER 0xFF // If the version field of a record
// header takes on this value, the
// record header marks the start of
// a container.
// PowerPoint97 Record Header
typedef unsigned long DWord;
int AssertionFailed( const char* file, int line, const char* expr )
/*=================*/
{
// AR: Message box the assert
return( TRUE );
} /* AssertionFailed */
#define Assert( expr ) \
{ \
static char _str[] = #expr; \
\
if( !(int)(expr) ) \
AssertionFailed( __FILE__, __LINE__, _str ); \
} /* Assert */
static BOOL ReadText( WCHAR* buffer, unsigned long bufferSize, unsigned long* pSizeRet );
// Returns TRUE if more text exists. Fills buffer upto bufferSize. Actual size used is
// pSizeRet.
struct RecordHeader
{
psrVersion recVer : 4; // may be PSFLAG_CONTAINER
psrInstance recInstance : 12;
psrType recType;
psrSize recLen;
};
struct PSR_CurrentUserAtom
{
uint4 size;
uint4 magic; // Magic number to ensure this is a PowerPoint file.
uint4 offsetToCurrentEdit; // Offset in main stream to current edit field.
uint2 lenUserName;
uint2 docFileVersion;
ubyte1 majorVersion;
ubyte1 minorVersion;
};
struct PSR_UserEditAtom
{
sint4 lastSlideID; // slideID
uint4 version; // This is major/minor/build which did the edit
uint4 offsetLastEdit; // File offset of last edit
uint4 offsetPersistDirectory; // Offset to PersistPtrs for
// this file version.
uint4 documentRef;
uint4 maxPersistWritten; // Addr of last persist ref written to the file (max seen so far).
sint2 lastViewType; // enum view type
};
struct PSR_SlidePersistAtom
{
uint4 psrReference;
uint4 flags;
sint4 numberTexts;
sint4 slideId;
uint4 reserved;
};
#define CURRENT_USER_STREAM L"Current User"
#define DOCUMENT_STREAM L"PowerPoint Document"
#define HEADER_MAGIC_NUM -476987297
const int PST_UserEditAtom = 4085;
const int PST_PersistPtrIncrementalBlock = 6002; // Incremental diffs on persists
const int PST_SlidePersistAtom = 1011;
const int PST_TextCharsAtom = 4000; // Unicode in text
const int PST_TextBytesAtom = 4008; // non-unicode text
class PPSPersistDirectory;
struct ParseContext
{
ParseContext(ParseContext *pNext) : m_pNext(pNext), m_nCur(0)
{
}
RecordHeader m_rh;
uint4 m_nCur;
ParseContext *m_pNext;
};
const int SLIDELISTCHUNKSIZE=32;
struct SlideListChunk
{
SlideListChunk( SlideListChunk* next, psrReference newOne ) :
pNext( next ), numInChunk(1)
{
refs[0] = newOne;
}
SlideListChunk *pNext;
DWord numInChunk;
psrReference refs[SLIDELISTCHUNKSIZE];
};
class FileReader
{
public:
FileReader(IStorage *pStg);
~FileReader();
BOOL ReadText( WCHAR *pBuff, ULONG size, ULONG *pSizeRet );
// Reads next size chars from file. Returns TRUE if there is more
// text to read.
BOOL IsPowerPoint()
{
return m_isPP;
} // Returns true if this is a PowerPoint '97 file.
void ReadPersistDirectory();
void PPSReadUserEditAtom( DWord offset, PSR_UserEditAtom& userEdit );
void ReadSlideList();
protected:
BOOL ReadCurrentUser(IStream *pStm);
void *ReadRecord( RecordHeader& rh );
BOOL Parse();
IStream *GetDocStream();
BOOL DoesClientRead( psrType type )
{
return FALSE;
}
void ReleaseRecord( RecordHeader& rh, void* diskRecBuf );
DWord ParseForSlideLists();
void AddSlideToList( psrReference refToAdd );
BOOL StartParse( DWord offset );
BOOL FillBufferWithText();
BOOL FindNextSlide( DWord& offset );
private:
PSR_CurrentUserAtom m_currentUser;
IStream * m_pDocStream;
IStorage * m_pPowerPointStg;
BOOL m_isPP;
ParseContext* m_pParseContexts;
WCHAR* m_pCurText;
unsigned long m_curTextPos;
unsigned long m_curTextLength;
PSR_UserEditAtom* m_pLastUserEdit;
PPSPersistDirectory* m_pPersistDirectory;
SlideListChunk* m_pFirstChunk;
int m_curSlideNum;
WCHAR* m_pClientBuf;
unsigned long m_clientBufSize;
unsigned long m_clientBufPos;
ULONG* m_pSizeRet;
};
FileReader::FileReader(IStorage *pStg) :
m_pPowerPointStg(pStg),
m_isPP(FALSE),
m_pParseContexts(NULL),
m_curTextPos(0),
m_pLastUserEdit( NULL ),
m_pPersistDirectory( NULL ),
m_pDocStream( NULL ),
m_pFirstChunk( NULL ),
m_curSlideNum(0),
m_pCurText( NULL ),
m_pClientBuf( NULL ),
m_clientBufSize( 0 ),
m_clientBufPos( 0 )
{
IStream *pStm = NULL;
m_pPowerPointStg->AddRef();
HRESULT hr = pStg->OpenStream( CURRENT_USER_STREAM, NULL, STGM_READ | STGM_DIRECT | STGM_SHARE_EXCLUSIVE, NULL, &pStm );
if ( SUCCEEDED(hr) && ReadCurrentUser(pStm) )
m_isPP = TRUE;
pStm->Release();
}
FileReader::~FileReader()
{
m_pPowerPointStg->Release();
}
BOOL FileReader::FillBufferWithText()
{
unsigned long amtToCopy = min( (m_curTextLength - m_curTextPos), (m_clientBufSize - m_clientBufPos) );
unsigned long loop = amtToCopy;
while ( loop-- )
m_pClientBuf[ m_clientBufPos++ ] = m_pCurText[ m_curTextPos++ ];
if ( m_curTextPos == m_curTextLength )
{
delete [] m_pCurText;
m_pCurText = NULL;
m_curTextPos = 0;
m_curTextLength = 0;
}
*m_pSizeRet += amtToCopy;
return(m_clientBufSize == m_clientBufPos); // If client's buffer is full return TRUE.
}
void FileReader::AddSlideToList( psrReference refToAdd )
{
if ( m_pFirstChunk == NULL )
m_pFirstChunk = new SlideListChunk(NULL, refToAdd);
else
{
if ( m_pFirstChunk->numInChunk+1 > SLIDELISTCHUNKSIZE )
m_pFirstChunk = new SlideListChunk(m_pFirstChunk, refToAdd);
else
{
m_pFirstChunk->refs[m_pFirstChunk->numInChunk] = refToAdd;
m_pFirstChunk->numInChunk++;
}
}
}
IStream *FileReader::GetDocStream()
{
if ( m_pDocStream == NULL )
{
if ( !m_isPP )
return NULL;
HRESULT hr = m_pPowerPointStg->OpenStream( DOCUMENT_STREAM, NULL, STGM_READ | STGM_DIRECT | STGM_SHARE_EXCLUSIVE, NULL, &m_pDocStream );
if (FAILED(hr))
{
fprintf(stderr,"Error (%d) opening PowerPoint Document Stream.\n",(int)hr);
return NULL;
}
}
return m_pDocStream;
}
BOOL FileReader::ReadCurrentUser(IStream *pStm)
{
ULONG nRd=0;
RecordHeader rh;
BOOL isPP = FALSE;
if ( SUCCEEDED( pStm->Read(&rh, sizeof(rh), &nRd) ) )
{
if ( SUCCEEDED( pStm->Read(&m_currentUser, sizeof(PSR_CurrentUserAtom), &nRd) ) )
{
if ( nRd != sizeof(PSR_CurrentUserAtom) )
return FALSE;
}
isPP = ( m_currentUser.size == sizeof( m_currentUser ) )&&
( m_currentUser.magic == HEADER_MAGIC_NUM )&&
( m_currentUser.lenUserName <= 255 );
}
return isPP;
}
class PPSDirEntry
{
PPSDirEntry()
: m_pNext( NULL ), m_pOffsets( NULL ), m_tableSize( 0 )
{
}
PPSDirEntry* m_pNext;
DWord* m_pOffsets;
DWord m_tableSize;
public:
~PPSDirEntry()
{
delete m_pOffsets; m_pOffsets = NULL;
}
friend class PPSPersistDirectory;
};
// class PPSDirEntry
class PPSPersistDirectory
{
public:
PPSPersistDirectory();
~PPSPersistDirectory();
void AddEntry( DWord cOffsets, DWord* pOffsets );
DWord GetPersistObjStreamPos( DWord ref );
DWord NumberOfAlreadySavedPersists();
private:
PPSDirEntry* m_pFirstDirEntry;
};
PPSPersistDirectory::PPSPersistDirectory() : m_pFirstDirEntry( NULL )
{
}
PPSPersistDirectory::~PPSPersistDirectory()
{
while ( m_pFirstDirEntry )
{
PPSDirEntry* pDirEntry = m_pFirstDirEntry;
m_pFirstDirEntry = m_pFirstDirEntry->m_pNext;
delete pDirEntry;
}
}
void PPSPersistDirectory::AddEntry( DWord cOffsets, DWord* pOffsets )
{
PPSDirEntry* pDirEntry = new PPSDirEntry();
pDirEntry->m_tableSize = cOffsets;
pDirEntry->m_pOffsets = new DWord[cOffsets];
memcpy( pDirEntry->m_pOffsets, pOffsets, cOffsets * sizeof( DWord ) );
// append to the end of the entry list
PPSDirEntry** ppDirEntry = &m_pFirstDirEntry;
while ( NULL != *ppDirEntry )
ppDirEntry = &(*ppDirEntry)->m_pNext;
*ppDirEntry = pDirEntry;
}
DWord PPSPersistDirectory::GetPersistObjStreamPos( DWord ref )
{
PPSDirEntry* pEntry = m_pFirstDirEntry;
while ( pEntry )
{
DWord* pOffsets = pEntry->m_pOffsets;
while ( (DWord)( (char*)pOffsets - (char*)pEntry->m_pOffsets ) < pEntry->m_tableSize * sizeof( DWord ) )
{
DWord nRefs = pOffsets[0] >> 20;
DWord base = pOffsets[0] & 0xFFFFF; // 1-based
if ( ( base <= ref )&&( ref < base + nRefs ) )
return pOffsets[ 1 + ref - base ];
pOffsets += nRefs + 1;
}
pEntry = pEntry->m_pNext;
}
return(DWord) -1;
}
DWord PPSPersistDirectory::NumberOfAlreadySavedPersists()
{
DWord count = 0;
PPSDirEntry* pEntry = m_pFirstDirEntry;
while ( pEntry )
{
DWord* pOffsets = pEntry->m_pOffsets;
while ( (DWord)( pEntry->m_pOffsets - pOffsets ) < pEntry->m_tableSize * sizeof( DWord ) )
{
DWord nRefs = pOffsets[0] >> 20;
count += nRefs;
pOffsets += nRefs + 1;
}
pEntry = pEntry->m_pNext;
}
return count;
}
void FileReader::PPSReadUserEditAtom( DWord offset, PSR_UserEditAtom& userEdit )
{
LARGE_INTEGER li;
li.LowPart = offset;
li.HighPart = 0;
GetDocStream()->Seek(li,STREAM_SEEK_SET, NULL);
RecordHeader rh;
GetDocStream()->Read(&rh, sizeof(rh), NULL);
Assert( rh.recType == PST_UserEditAtom );
Assert( rh.recLen == sizeof( PSR_UserEditAtom ) );
li.LowPart = offset;
GetDocStream()->Read(&userEdit, sizeof(userEdit), NULL);
}
void *FileReader::ReadRecord( RecordHeader& rh )
// Return values:
// NULL and rh.recVer == PSFLAG_CONTAINER: no record was read in.
// record header indicated start of container.
// NULL and rh.recVer != PSFLAG_CONTAINER: client must read in record.
{
IStream *pStm = GetDocStream();
// read record header, verify
pStm->Read(&rh, sizeof(rh), NULL); //AR: Check Error
// if client will read, do not read in record
if ( DoesClientRead( rh.recType ) )
return NULL;
// If container, return NULL
if (rh.recVer == PSFLAG_CONTAINER)
return NULL;
// Allocate buffer for disk record. Client must call ReleaseRecord() or
// pass the atom up to CObject::ConstructContents() which will
// then release it.
void* buffer = new char[rh.recLen];
// read in record
pStm->Read(buffer, rh.recLen, NULL);
// NOTE: ByteSwapping & versioning not done by this simple reader.
return(buffer);
}
void FileReader::ReleaseRecord( RecordHeader& rh, void* diskRecBuf )
{
if (rh.recType && rh.recVer!=PSFLAG_CONTAINER)
delete [] (char*)diskRecBuf;
rh.recType = 0; // consume the record so that record doesn't
// get processed again.
}
void FileReader::ReadPersistDirectory()
{
if ( NULL != m_pLastUserEdit )
return; // already read
PSR_UserEditAtom userEdit;
DWord offsetToEdit = m_currentUser.offsetToCurrentEdit;
while ( 0 < offsetToEdit )
{
PPSReadUserEditAtom( offsetToEdit, userEdit );
if ( NULL == m_pLastUserEdit )
{
m_pPersistDirectory = new PPSPersistDirectory();
m_pLastUserEdit = new PSR_UserEditAtom;
*m_pLastUserEdit = userEdit;
}
LARGE_INTEGER li;
li.LowPart = userEdit.offsetPersistDirectory;
li.HighPart = 0;
GetDocStream()->Seek(li,STREAM_SEEK_SET, NULL); // AR: check that seek succeeded.
RecordHeader rh;
DWord *pDiskRecord = (DWord*) ReadRecord(rh);
Assert( PST_PersistPtrIncrementalBlock == rh.recType );
m_pPersistDirectory->AddEntry( rh.recLen / sizeof( DWord ), pDiskRecord );
ReleaseRecord( rh, pDiskRecord );
offsetToEdit = userEdit.offsetLastEdit;
}
}
// PPStorage::ReadPersistDirectory
void FileReader::ReadSlideList()
{
Assert( m_pLastUserEdit != NULL );
DWord offsetToDoc = m_pPersistDirectory->GetPersistObjStreamPos( m_pLastUserEdit->documentRef );
LARGE_INTEGER li;
li.LowPart = offsetToDoc;
li.HighPart = 0;
GetDocStream()->Seek(li,STREAM_SEEK_SET, NULL);
ParseForSlideLists();
}
DWord FileReader::ParseForSlideLists()
{
IStream *pStm = GetDocStream();
RecordHeader rh;
DWord nRd=0;
// Stack based parsing for SlideLists
pStm->Read(&rh, sizeof(rh), &nRd);
if ( ( rh.recVer != PSFLAG_CONTAINER ) && ( (rh.recVer & 0x0F)!=0x0F ) )
{
if ( rh.recType == PST_SlidePersistAtom )
{
PSR_SlidePersistAtom spa;
Assert( sizeof(spa) == rh.recLen );
pStm->Read(&spa, sizeof(spa), &nRd);
AddSlideToList( spa.psrReference );
}
else
{
LARGE_INTEGER li;
li.LowPart = rh.recLen;
li.HighPart = 0;
pStm->Seek(li,STREAM_SEEK_CUR, NULL);
}
nRd += rh.recLen;
}
else
{
DWord nCur = 0;
while ( nCur < rh.recLen )
{
nCur += ParseForSlideLists();
}
nRd += nCur;
}
return nRd;
}
BOOL FileReader::ReadText( WCHAR *pBuff, ULONG size, ULONG *pSizeRet )
{
DWord offset;
*pSizeRet = 0;
m_pSizeRet = pSizeRet;
m_pClientBuf = pBuff;
m_clientBufSize = size;
m_clientBufPos = 0;
for ( ;; )
{
if ( ( m_pParseContexts == NULL ) )
{
if ( FindNextSlide(offset) )
{
if ( StartParse( offset ) )
return TRUE;
}
else
return FALSE; // DONE parsing, no more slides
}
else
{
if ( m_pClientBuf )
{
if ( FillBufferWithText() ) // Use existing text first.
return TRUE;
}
if ( Parse() ) // restart parse where we left off.
return TRUE;
}
}
}
BOOL FileReader::StartParse( DWord offset )
{
LARGE_INTEGER li;
li.LowPart = offset;
li.HighPart = 0;
GetDocStream()->Seek(li,STREAM_SEEK_SET, NULL);
m_pParseContexts = new ParseContext( NULL );
GetDocStream()->Read(&m_pParseContexts->m_rh, sizeof(RecordHeader), NULL);
return Parse();
}
BOOL FileReader::Parse()
{
IStream *pStm = GetDocStream();
RecordHeader rh;
DWord nRd=0;
Assert( m_pParseContexts );
// Restarting a parse might complete a container so we test this initially.
if ( m_pParseContexts->m_nCur >= m_pParseContexts->m_rh.recLen )
{
Assert( m_pParseContexts->m_nCur == m_pParseContexts->m_rh.recLen );
ParseContext* pParseContext = m_pParseContexts;
m_pParseContexts = m_pParseContexts->m_pNext;
delete pParseContext;
}
do
{
pStm->Read(&rh, sizeof(RecordHeader), NULL);
if ( ( rh.recVer != PSFLAG_CONTAINER ) && ( (rh.recVer & 0x0F)!=0x0F ) )
{
if ( rh.recType == PST_TextCharsAtom )
{
m_curTextPos = 0;
m_curTextLength = rh.recLen/2;
Assert( m_pCurText == NULL );
m_pCurText = new WCHAR[rh.recLen/2];
pStm->Read(m_pCurText, rh.recLen, &nRd);
wprintf( L"-%s-\n", m_pCurText );
if ( FillBufferWithText() )
return TRUE; // Stop parsing if buffer is full, and return control to client
}
else if ( rh.recType == PST_TextBytesAtom )
{
Assert( m_pCurText == NULL );
m_curTextPos = 0;
m_curTextLength = rh.recLen;
m_pCurText = new WCHAR[rh.recLen];
pStm->Read(m_pCurText, rh.recLen, &nRd);
char *pHack = (char *) m_pCurText;
unsigned int back2 = rh.recLen*2-1;
unsigned int back1 = rh.recLen-1;
for (unsigned int i=0;i<rh.recLen;i++)
{
pHack[back2-1] = pHack[back1];
pHack[back2] = 0;
back2 -=2;
back1--;
}
if ( FillBufferWithText() )
return TRUE; // Stop parsing if buffer is full, and return control to client
}
else
{
LARGE_INTEGER li;
ULARGE_INTEGER ul;
li.LowPart = rh.recLen;
li.HighPart = 0;
pStm->Seek(li,STREAM_SEEK_CUR,&ul);
}
m_pParseContexts->m_nCur += rh.recLen;
m_pParseContexts->m_nCur += sizeof( RecordHeader ); // Atom rh's add towards containing container's size.
}
else
{
m_pParseContexts = new ParseContext( m_pParseContexts );
m_pParseContexts->m_rh = rh;
}
if ( m_pParseContexts->m_nCur >= m_pParseContexts->m_rh.recLen )
{
Assert( m_pParseContexts->m_nCur == m_pParseContexts->m_rh.recLen );
ParseContext* pParseContext = m_pParseContexts;
m_pParseContexts = m_pParseContexts->m_pNext;
delete pParseContext;
}
} while ( m_pParseContexts && ( m_pParseContexts->m_nCur < m_pParseContexts->m_rh.recLen ) );
return FALSE;
}
BOOL FileReader::FindNextSlide( DWord& offset )
{
if ( m_curSlideNum == 0 )
{
Assert( m_pLastUserEdit != NULL );
offset = m_pPersistDirectory->GetPersistObjStreamPos( m_pLastUserEdit->documentRef );
m_curSlideNum++;
return TRUE;
}
else
{
uint4 curSlideNum = m_curSlideNum++;
SlideListChunk *pCur = m_pFirstChunk;
while ( pCur && ( curSlideNum > pCur->numInChunk ) )
{
curSlideNum -= pCur->numInChunk;
pCur = pCur->pNext;
}
if ( pCur == NULL )
return FALSE;
offset = m_pPersistDirectory->GetPersistObjStreamPos( pCur->refs[curSlideNum-1] );
return TRUE;
}
}
static BOOL ReadText( void** ppContext, IStorage* pStgFrom, WCHAR* buffer, unsigned long bufferSize, unsigned long* pSizeRet )
{
FileReader* pFI = NULL;
if ( *ppContext == NULL )
{
pFI = new FileReader( pStgFrom );
*ppContext = pFI;
if ( !pFI->IsPowerPoint() )
{
delete pFI;
*pSizeRet = 0;
return FALSE;
}
pFI->ReadPersistDirectory();
pFI->ReadSlideList();
}
else
{
pFI = (FileReader *)*ppContext;
}
BOOL bRet = pFI->ReadText(buffer, bufferSize, pSizeRet);
if ( !bRet )
{
delete pFI;
*ppContext = NULL;
}
return bRet;
}
void main(int argc, char **argv)
{
OLECHAR wc[256];
HRESULT hr;
IStorage *pStgFrom = NULL;
if (argc < 2)
{
fprintf(stderr,"Usage dblock <file to be read>\n");
exit(0);
}
MultiByteToWideChar( CP_ACP, MB_PRECOMPOSED, argv[1], -1, wc, 255);
hr = StgOpenStorage(wc, NULL, STGM_READ | STGM_DIRECT |
STGM_SHARE_DENY_WRITE, NULL, 0, &pStgFrom);
if (FAILED(hr))
{
fprintf(stderr,"Error (%d) opening docfile: %s\n",(int)hr,argv[1]);
}
else
{
WCHAR wcBuf[6];
ULONG sizeUsed;
BOOL fContinue = TRUE;
void *pContext = NULL;
while ( fContinue )
{
fContinue = ReadText( &pContext, pStgFrom, wcBuf, 5, &sizeUsed );
wcBuf[sizeUsed] = 0;
wprintf(L"-%s-\n", wcBuf);
}
}
}