Plan 9 from Bell Labs’s /usr/web/sources/plan9/sys/src/cmd/gs/lib/pdf_base.ps

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


%    Copyright (C) 1994-2003 artofcode LLC.  All rights reserved.
% 
% This software is provided AS-IS with no warranty, either express or
% implied.
% 
% This software is distributed under license and may not be copied,
% modified or distributed except as expressly authorized under the terms
% of the license contained in the file LICENSE in this distribution.
% 
% For more information about licensing, please refer to
% http://www.ghostscript.com/licensing/. For information on
% commercial licensing, go to http://www.artifex.com/licensing/ or
% contact Artifex Software, Inc., 101 Lucas Valley Road #110,
% San Rafael, CA  94903, U.S.A., +1(415)492-9861.

% $Id: pdf_base.ps,v 1.48 2005/09/16 19:01:30 ray Exp $
% pdf_base.ps
% Basic parser for PDF reader.

% This handles basic parsing of the file (including the trailer
% and cross-reference table), as well as objects, object references,
% streams, and name/number trees; it doesn't include any facilities for
% making marks on the page.

/.setlanguagelevel where { pop 2 .setlanguagelevel } if
.currentglobal true .setglobal
/pdfdict where { pop } { /pdfdict 100 dict def } ifelse
pdfdict begin

% Define the name interpretation dictionary for reading values.
/valueopdict mark
  (<<) cvn { mark } bind	% don't push an actual mark!
  (>>) cvn { { .dicttomark } stopped {
      (   **** File has an unbalanced >> \(close dictionary\).\n)
      pdfformaterror
    } if
  } bind
  ([) cvn { mark } bind		% ditto
  (]) cvn dup load
%  /true true		% see .pdfexectoken below
%  /false false		% ibid.
%  /null null		% ibid.
  /F dup cvx		% see Objects section below
  /R dup cvx		% see Objects section below
  /stream dup cvx	% see Streams section below
.dicttomark readonly def

% ------ Utilities ------ %

% Define a scratch string.  The PDF language definition says that
% no line in a PDF file can exceed 255 characters.
/pdfstring 255 string def

% Read the previous line of a file.  If we aren't at a line boundary,
% read the line containing the current position.
% Skip any blank lines.
/prevline		% - prevline <startpos> <substring>
 { PDFfile fileposition dup () pdfstring
   2 index 257 sub 0 .max PDFfile exch setfileposition
    {		% Stack: initpos linepos line string
      PDFfile fileposition
      PDFfile 2 index readline pop
      dup length 0 gt
       { 3 2 roll 5 -2 roll pop pop 2 index }
       { pop }
      ifelse
		% Stack: initpos linepos line string startpos
      PDFfile fileposition 5 index ge { exit } if
      pop
    }
   loop pop pop 3 -1 roll pop
 } bind def

% Handle the PDF 1.2 #nn escape convention when reading from a file.
% This should eventually be done in C.
/.pdffixname {			% <execname> .pdffixname <execname'>
  PDFversion 1.2 ge {
    dup .namestring (#) search {
      name#escape cvn exch pop
    } {
      pop
    } ifelse
  } if
} bind def
/name#escape			% <post> <(#)> <pre> name#escape <string>
{ exch pop
  1 index 2 () /SubFileDecode filter dup (x) readhexstring
		% Stack: post pre stream char t/f
  not {	% tolerate, but complain about bad syntax
    pop closefile (#) concatstrings exch
    (   **** Warning: Invalid hex following '#' name escape, using literal '#' in name.\n)
    pdfformaterror
  } {
    exch closefile concatstrings
    exch 2 1 index length 2 sub getinterval
  } ifelse
  (#) search { name#escape } if concatstrings
} bind def

% Execute a file, interpreting its executable names in a given
% dictionary.  The name procedures may do whatever they want
% to the operand stack.
/.pdftokenerror {		% <count> <opdict> <errtoken> .pdftokenerror -
  BXlevel 0 le {
    (   **** Unknown operator: ') pdfformaterror
    dup =string cvs pdfformaterror 
    % Attempt a retry scan of the element after changing to PDFScanInvNum
    << /PDFScanInvNum true >> setuserparams
    =string cvs
    token pop exch pop dup type
    dup /integertype eq exch /realtype eq or {
      exch pop exch pop
      (', processed as number, value: ) pdfformaterror
      dup =string cvs pdfformaterror (\n) pdfformaterror
     << /PDFScanInvNum null >> setuserparams    % reset to default scanning rules
      false 	% suppress any stack cleanup
    } {
      % error was non-recoverable with modified scanning rules
    ('\n) pdfformaterror
      true
    } ifelse
  } {
    true
  } ifelse
  { % clean up the operand stack if this was non-recoverable
  pop pop count exch sub { pop } repeat	% pop all the operands
  } if
} bind def
/.pdfexectoken {		% <count> <opdict> <exectoken> .pdfexectoken ?
  PDFDEBUG {
    pdfdict /PDFSTEPcount known not { pdfdict /PDFSTEPcount 1 .forceput } if
    PDFSTEP {
      pdfdict /PDFtokencount 2 copy .knownget { 1 add } { 1 } ifelse .forceput
      PDFSTEPcount 1 gt {
	pdfdict /PDFSTEPcount PDFSTEPcount 1 sub .forceput
      } {
        dup ==only
        (    step # ) print PDFtokencount =only 
	( ? ) print flush 1 false .outputpage
	(%stdin) (r) file 255 string readline {
	  token {
            exch pop pdfdict /PDFSTEPcount 3 -1 roll .forceput
	  } {
	    pdfdict /PDFSTEPcount 1 .forceput
	  } ifelse % token
	} {
	  pop /PDFSTEP false def	 % EOF on stdin
	} ifelse % readline
      } ifelse % PDFSTEPcount > 1
    } {
      dup ==only () = flush
    } ifelse % PDFSTEP
  } if % PDFDEBUG
  2 copy .knownget {
    exch pop exch pop exch pop exec
  } {
		% Normally, true, false, and null would appear in opdict
		% and be treated as "operators".  However, there is a
		% special fast case in the PostScript interpreter for names
		% that are defined in, and only in, systemdict and/or
		% userdict: putting these three names in the PDF dictionaries
		% destroys this property for them, slowing down their
		% interpretation in all PostScript code.  Therefore, we
		% check for them explicitly here instead.
    dup dup dup /true eq exch /false eq or exch /null eq or {
      exch pop exch pop //systemdict exch get
    } {
      .pdftokenerror
    } ifelse
  } ifelse
} bind def
/.pdfrun {			% <file> <opdict> .pdfrun -
	% Construct a procedure with the stack depth, file and opdict
	% bound into it.
  1 index cvlit count 2 sub 3 1 roll mark mark 5 2 roll
  {	% Stack: ..operands.. count opdict file
    token {
      dup type /nametype eq {
	dup xcheck {
	  .pdfexectoken
	} {
	  .pdffixname
	  exch pop exch pop PDFDEBUG {
            PDFSTEPcount 1 le {
              dup ==only ( ) print flush
            } if
          } if
	} ifelse
      } {
	exch pop exch pop PDFDEBUG {
          PDFSTEPcount 1 le {
            dup ==only ( ) print flush
          } if
        } if
      } ifelse
    } {
      (%%EOF) cvn cvx .pdfexectoken
    } ifelse
  }
  aload pop .packtomark cvx
  /loop cvx 2 packedarray cvx
  { stopped /PDFsource } aload pop
  PDFsource
  { store { stop } if } aload pop .packtomark cvx
  /PDFsource 3 -1 roll store exec
} bind def

% Execute a file, like .pdfrun, for a marking context.
% This temporarily rebinds LocalResources and DefaultQstate.
/.pdfruncontext {		% <resdict> <file> <opdict> .pdfruncontext -
  /.pdfrun load LocalResources DefaultQstate
  /LocalResources 7 -1 roll store
  /DefaultQstate qstate store
  3 .execn
  /DefaultQstate exch store
  /LocalResources exch store
} bind def

% Get the depth of the PDF operand stack.  The caller sets pdfemptycount
% before calling .pdfrun or .pdfruncontext.  It is initially set by
% pdf_main, and is also set by any routine which changes the operand
% stack depth (currently .pdfpaintproc, although there are other callers
% of .pdfrun{context} which have not been checked for opstack depth.
/.pdfcount {		% - .pdfcount <count>
  count pdfemptycount sub
} bind def

% ================================ Objects ================================ %

% Since we may have more than 64K objects, we have to use a 2-D array to
% hold them (and the parallel Generations structure).
/lshift 9 def
/lnshift lshift neg def
/lsubmask 1 lshift bitshift 1 sub def
/lsublen lsubmask 1 add def
/larray {	% - larray <larray>
  [ [] ]
} bind def
/lstring {	% - lstring <lstring>
  [ () ]
} bind def
/ltype {	% <lseq> type <type>
  0 get type
} bind def
/lget {		% <lseq> <index> lget <value>
  dup //lsubmask and 3 1 roll //lnshift bitshift get exch get
} bind def
/lput {		% <lseq> <index> <value> lput -
  3 1 roll
  dup //lsubmask and 4 1 roll //lnshift bitshift get
  3 1 roll put
} bind def
/llength {	% <lseq> llength <length>
  dup length 1 sub dup //lshift bitshift
  3 1 roll get length add
} bind def
% lgrowto assumes newlength > llength(lseq)
/growto {	% <string/array> <length> growto <string'/array'>
  1 index type /stringtype eq { string } { array } ifelse
  2 copy copy pop exch pop
} bind def
/lgrowto {	% <lseq> <newlength> lgrowto <lseq'>
    dup //lsubmask add //lnshift bitshift dup 3 index length gt {
	% Add more sub-arrays.  Start by completing the last existing one.
		% Stack: lseq newlen newtoplen
    3 -1 roll dup llength 1 sub //lsubmask or 1 add lgrowto
		% Stack: newlen newtoplen lseq
    [ exch aload pop
    counttomark 2 add -1 roll		% newtoplen
    counttomark sub { dup 0 0 getinterval lsublen growto } repeat
    dup 0 0 getinterval ] exch
  } {
    pop
  } ifelse
	% Expand the last sub-array.
  1 sub //lsubmask and 1 add
  exch dup dup length 1 sub 2 copy
		% Stack: newsublen lseq lseq len-1 lseq len-1
  get 5 -1 roll growto put
} bind def
/lforall {	% <lseq> <proc> lforall -
  /forall cvx 2 packedarray cvx forall
} bind def

% We keep track of PDF objects using the following PostScript variables:
%
%	Generations (lstring): Generations[N] holds 1+ the current
%	    generation number for object number N.  (As far as we can tell,
%	    this is needed only for error checking.)  For free objects,
%	    Generations[N] is 0.
%
%	Objects (larray): If object N is loaded, Objects[N] is the actual
%	    object; otherwise, Objects[N] is an executable integer giving
%	    the file offset of the object's location in the file.  If
%	    ObjectStream[N] is non-zero then Objects[N] contains the index
%	    into the object stream instead of the file offset of the object.
%
%	ObjectStream (larray): If object N is in an object stream then
%	    ObjectStream[N] holds the object number of the object stream.
%	    Otherwise ObjectStream[N] contains 0.  If ObjectStream[N]
%	    is non-zero then Objects[N] contains  the index into the object
%	    stream.
%
%	GlobalObjects (dictionary): If object N has been resolved in
%	    global VM, GlobalObjects[N] is the same as Objects[N]
%	    (except that GlobalObjects itself is stored in global VM,
%	    so the entry will not be deleted at the end of the page).
%
%	IsGlobal (lstring): IsGlobal[N] = 1 iff object N was resolved in
%	    global VM.  This is an accelerator to avoid having to do a
%	    dictionary lookup in GlobalObjects when resolving every object.

% Initialize the PDF object tables.
/initPDFobjects {		% - initPDFobjects -
  /ObjectStream larray def
  /Objects larray def
  /Generations lstring def
  .currentglobal true .setglobal
  /GlobalObjects 20 dict def
  .setglobal
  /IsGlobal lstring def
} bind def

% Grow the tables to a specified size.
/growPDFobjects {		% <minsize> growPDFobjects -
  dup ObjectStream llength gt {
    dup ObjectStream exch lgrowto /ObjectStream exch def
  } if
  dup Objects llength gt {
    dup Objects exch lgrowto /Objects exch def
  } if
  dup Generations llength gt {
    dup Generations exch lgrowto /Generations exch def
  } if
  dup IsGlobal llength gt {
    dup IsGlobal exch lgrowto /IsGlobal exch def
  } if
  pop
} bind def

% We represent an unresolved object reference by a procedure of the form
% {obj# gen# resolveR}.  This is not a possible PDF object, because PDF has
% no way to represent procedures.  Since PDF in fact has no way to represent
% any PostScript object that doesn't evaluate to itself, we can 'force'
% a possibly indirect object painlessly with 'exec'.
% Note that since we represent streams by executable dictionaries
% (see below), we need both an xcheck and a type check to determine
% whether an object has been resolved.
/resolved? {		% <object#> resolved? <value> true
			% <object#> resolved? false
  Objects 1 index lget dup xcheck {	% Check if executable
    dup type /integertype eq {		% Check if an integer
		% Check whether the object is in GlobalObjects.
      pop IsGlobal 1 index lget 0 eq {	% 0 --> Not in GlabalObjects
	pop false			% The object is not resolved
      } {				% The object is in GlobalObjects
		% Update Objects from GlobalObjects
	PDFDEBUG { (%Global=>local: ) print dup == } if
	GlobalObjects 1 index get dup Objects 4 1 roll lput true
      } ifelse
    } {				% Else object is executable but not integer
      exch pop true		% Therefore must be executable dict. (stream)
    } ifelse
  } {				% Else object is not executable.
    exch pop true		% Therefore it must have been resolved.
  } ifelse
} bind def
/oforce /exec load def
/oget {		% <array> <index> oget <object>
		% <dict> <key> oget <object>
		% Before release 6.20, this procedure stored the resolved
		% object back into the referring slot.  In order to support
		% PDF linearization, we no longer do this.
  get oforce
} bind def
/oforce_array { % <array> oforce_array <array>
  [ exch { oforce } forall ]
} bind def
/oforce_elems { % <array> oforce_elems <first> ... <last>
  { oforce } forall
} bind def
% A null value in a dictionary is equivalent to an omitted key;
% we must check for this specially.
/knownoget {	% <dict> <key> knownoget <value> true
		% <dict> <key> knownoget false
		% See oget above regarding this procedure.
  .knownget {
    oforce dup null eq { pop false } { true } ifelse
  } {
    false
  } ifelse
} bind def

% PDF 1.1 defines a 'foreign file reference', but not its meaning.
% Per the specification, we convert these to nulls.
/F {		% <file#> <object#> <generation#> F <object>
		% Some PDF 1.1 files use F as a synonym for f!
   .pdfcount 3 lt { f } { pop pop pop null } ifelse
} bind def

% Verify the generation number for a specified object
% Note:  The values in Generations is the generation number plus 1.
% If the value in Generations is zero then the object is free.
/checkgeneration {  % <object#> <generation#> checkgeneration <object#> <OK>
  Generations 2 index lget 1 sub 1 index eq {	% If generation # match ...
    pop true					% Then return true
  } {					% Else not a match ...
    QUIET not {				% Create warning message if not QUIET
      Generations 2 index lget 0 eq {	% Check if object is free ...
	(   **** Warning: reference to free object: )
      } {
	(   **** Warning: wrong generation: )
      } ifelse
      2 index =string cvs concatstrings ( ) concatstrings	% put obj #
      exch =string cvs concatstrings ( R\n) concatstrings	% put gen #
      pdfformaterror			% Output warning message
    } {					% Else QUIET ...
      pop				% Pop generation umber
    } ifelse false			% Return false if gen # not match
  } ifelse
} bind def
/R {		% <object#> <generation#> R <object>
  /resolveR cvx 3 packedarray cvx
} bind def

% If we encounter an object definition while reading sequentially,
% we just store it away and keep going.
/objopdict mark
  valueopdict { } forall
  /endobj dup cvx
.dicttomark readonly def

/obj {			% <object#> <generation#> obj <object>
  PDFfile objopdict .pdfrun
} bind def

/endobj {		% <object#> <generation#> <object> endobj <object>
  3 1 roll
		% Read the xref entry if we haven't yet done so.
		% This is only needed for generation # checking.
  1 index resolved? {
    pop
  } if
  checkgeneration {
		% The only global objects we bother to save are
		% (resource) dictionaries.
    1 index dup gcheck exch type /dicttype eq and {
      PDFDEBUG { (%Local=>global: ) print dup == } if
      GlobalObjects 1 index 3 index put
      IsGlobal 1 index 1 put
    } if
    Objects exch 2 index lput
  } {
    pop pop null
  } ifelse
} bind def

% When resolving an object reference in an object stream, we stop at
% the end of file.  Note:  Objects in an object stream do not have either
% a starting 'obj' or and ending 'endobj'.
/resolveobjstreamopdict mark
  valueopdict { } forall
  (%%EOF) cvn { exit } bind
.dicttomark readonly def

% Note: This version of this function is not currently being used.
% Resolve all objects in an object stream
/resolveobjectstream {		% <object stream #> resolveobjectstream -
  PDFDEBUG { (%Resolving object stream: ) print } if
  0 resolveR	% Get the objectstream dict, all objstrms use 0 as the gen #
  dup /First get		% Save location of first object onto the stack
  1 index /N get		% Save number of objects onto the stack
  2 index false resolvestream	% Convert stream dict into a stream
  /ReusableStreamDecode filter	% We need to be able to position stream
		% Objectstreams begin with list of object numbers and locations
		% Create two arrays to hold object numbers and stream location
  1 index array			% Array for holding object number
  2 index array			% Array for holding stream object location
		% Get the object numbers and locations.
  0 1 5 index 1 sub {		% Loop and collect obj # and locations
		% Stack: objstreamdict First N objectstream [obj#] [loc] index
    2 index 1 index 		% Setup to put obj# into object number array
    5 index token pop put	% Get stream, then get obj# and put into array
    1 index 1 index 		% Setup to put object loc into location array
    5 index token pop put	% Get stream, get obj loc and put into array
    pop				% Remove loop index
  } for
  		% Create a bytestring big enough for reading any object data
  		% Scan for the size of the largest object
  0 0				% Init max object size and previous location
  2 index {			% Loop through all object locations
				% Stack:  ... maxsize prevloc currentloc
    dup 4 1 roll		% Save copy of object location into stack
    exch sub				% Object size = currentloc - prevloc
    .max			% Determine maximum object size
    exch			% Put max size under previous location
  } forall
  pop				% Remove previous location
  .bigstring			% Create bytestring based upon max obj size
		% Move to the start of the object data
  3 index 6 index		% Get objectstream and start of first object
  setfileposition		% Move to the start of the data
  		% Read the data for all objects except the last.  We do
		% not know the size of the last object so we need to treat
		% it as a special case.
  0 1 6 index 2 sub {
    dup 4 index exch get 	% Get our current object number
		% Stack: objstreamdict First N objectstream [obj#] [loc]
		%        bytestring loopindex object#
    dup resolved? {		% If we already have this object
    	(yyy) = pstack (yyy) = flush xxx
      pop pop			% Remove object and object number
      1 add 2 index exch get	% Get location of next object
      6 index add 6 index exch	% Form location of next object and get stream
      setfileposition		% Move to the start of the next object data
    } {				% Else this is a new object ...
		% We are going to create a string for reading the object
      2 index 0			% use our working string
	  	% Determine the size of the object
      5 index 4 index 1 add get	% Get location of the next object
      6 index 5 index get	% Get location of this object
      sub			% Size of object = next loc - this loc
      getinterval		% Create string for reading object
      6 index exch readstring pop	% Read object
      /ReusableStreamDecode filter 	% Convert string into a stream
      resolveobjstreamopdict .pdfrun	% Get PDF object
      Objects exch 2 index exch lput	 % Put object into Objects array
      pop pop			% Remove object # and loop index
    } ifelse
  } for
  pop pop			% Remove our working string and loc array
  		% Now read the last object in the object stream.  Since it
		% is the last object, we can use the original stream and
		% terminate when we hit the end of the stream
		% Stack: objstreamdict First N objectstream [obj#]
  2 index 1 sub get	 	% Get our current object number
  dup resolved? not {		% If we do not already have this object
    exch	 		% Get our object stream
    resolveobjstreamopdict .pdfrun	% Get PDF object
    Objects exch 2 index exch lput	% Put object into Objects array
  } if
  pop pop pop pop		% Clear stack
} bind def

% Resolve all objects in an object stream
/resolveobjectstream {		% <object stream #> resolveobjectstream -
  PDFDEBUG { (%Resolving object stream: ) print } if
  0 resolveR	% Get the objectstream dict, all objstrms use 0 as the gen #
  dup /Type get /ObjStm ne {	% Verify type is object stream
    (   **** Incorrect Type in object stream dictionary.\n) pdfformaterror
    /resolveobjectstream cvx /typecheck signalerror
  } if
  dup /N get			% Save number of objects onto the stack
  1 index false resolvestream	% Convert stream dict into a stream
  /ReusableStreamDecode filter	% We need to be able to position stream
		% Objectstreams begin with list of object numbers and locations
  1 index array			% Create array for holding object number
		% Get the object numbers
  0 1 4 index 1 sub {		% Loop and collect obj numbers
		% Stack: objstreamdict N PDFDEBUG objectstream [obj#] loopindex
    1 index 1 index 		% Setup to put obj# into object number array
    4 index token pop put	% Get stream, then get obj# and put into array
    2 index token pop pop pop	% Get stream, get obj loc and clear stack
  } for
		% Move to the start of the object data
  1 index 4 index /First get	% Get objectstream and start of first object
  setfileposition		% Move to the start of the data
  		% We disable PDFDEBUG while reading the data stream.  We will
		% print the data later
  PDFDEBUG /PDFDEBUG false def	% Save PDFDEBUG and disable it while reading
  		% Read the data for all objects.  We check to see if we get
		% the number of objects that we expect.
		% Stack: objstreamdict N objectstream [obj#] PDFDEBUG
  mark 4 -1 roll 		% Get objectstream
  count 5 index add		% Determine stack depth with objects
  /PDFObjectStkCount exch def
  resolveobjstreamopdict .pdfrun % Get PDF objects
  PDFObjectStkCount count ne {	% Check stack depth
    (   **** Incorrect object count in object stream.\n) pdfformaterror
    /resolveobjectstream cvx /rangecheck signalerror
  } if
  		% We have the object data
  counttomark array astore	% Put objects into an array
  exch pop			% Remove mark
  exch /PDFDEBUG exch def	% Restore PDFDEBUG flag
		% Save the objects into Objects
  0 1 2 index length 1 sub {	% Loop through all objects
		% Stack: objstreamdict N [obj#] [objects] loopindex
    dup 3 index exch get 	% Get our current object number
    dup resolved? {		% If we already have this object
      pop pop			% Remove object and object number
    } {				% Else if we do not have this object
      PDFDEBUG { (%Resolving compressed object: [) print dup =only ( 0]) = } if
      Objects exch 3 index	% Put the object into Objects
      3 index get
      PDFDEBUG { dup === flush } if
      lput
    } ifelse
    pop 			% Remove loop index
  } for
  pop pop pop pop		% Remove objstream, N, (obj#], and [objects]
} bind def

% When resolving an object reference, we stop at the endobj or endstream.
/resolveopdict mark
  valueopdict { } forall
  /endstream { endobj exit } bind
  /endobj { endobj exit } bind
                % OmniForm generates PDF file with endobj missing in some
                % objects. AR ignores this. So we have to do it too.
  /obj { pop pop endobj exit } bind
.dicttomark readonly def

/resolveR {		% <object#> <generation#> resolveR <object>
  PDFDEBUG {
    PDFSTEPcount 1 le {
      (%Resolving: ) print 2 copy 2 array astore ==
    } if
  } if
  1 index resolved? {		% If object has already been resolved ...
    exch pop exch pop		% then clear stack and return object
  } {				% Else if not resolved ...
    PDFfile fileposition 3 1 roll	% Save current file position
    1 index Objects exch lget		% Get location of object from xref
    3 1 roll checkgeneration {		% Verify the generation number
			% Stack: savepos objpos obj#
       ObjectStream 1 index lget dup 0 eq { % Check if obj in not an objstream
	 pop exch PDFoffset add PDFfile exch setfileposition
	 PDFfile token pop 2 copy ne
	  { (   **** Unrecoverable error in xref!\n) pdfformaterror
	    /resolveR cvx /rangecheck signalerror
	  }
	 if pop PDFfile token pop
	 PDFfile token pop /obj ne
	  { (   **** Unrecoverable error in xref!\n) pdfformaterror
	    /resolveR cvx /rangecheck signalerror
	  }
	 if
	 pdf_run_resolve	% PDFfile resolveopdict .pdfrun
      } {			% Else the object is in an ObjectStream
	      	% Process an objectstream object.  We are going to resolve all
	      	% of the objects in sthe stream and place them into the Objects
	      	% array.
		% Stack: savepos objpos obj# objectstream#
	resolveobjectstream
        resolved? {		% If object has already been resolved ...
	  exch pop		% Remove object pos from stack.
	} {
	  pop pop null		% Pop objpos and obj#, put null for object
	} ifelse
      } ifelse
    } {				% Else the generation number is wrong
	    % Don't cache if the generation # is wrong.
	pop pop null		% Pop objpos and obj#, put null for object
    } ifelse			% ifelse generation number is correct
    exch PDFfile exch setfileposition	% Return to original file position
  } ifelse
} bind def      

% ================================ Streams ================================ %

% We represent a stream by an executable dictionary that contains,
% in addition to the contents of the original stream dictionary:
%	/File - the file or string where the stream contents are stored,
%	  if the stream is not an external one.
%	/FilePosition - iff File is a file, the position in the file
%	  where the contents start.
%	/StreamKey - the key used to decrypt this stream, if any.
% We do the real work of constructing the data stream only when the
% contents are needed.

% Construct a stream.  The length is not reliable in the face of
% different end-of-line conventions, but it's all we've got.
%
% PDF files are inconsistent about what may fall between the 'stream' keyword
% and the actual stream data, and it appears that no one algorithm can
% detect this reliably.  We used to try to guess whether the file included
% extraneous \r and/or \n characters, but we no longer attempt to do so,
% especially since the PDF 1.2 specification states flatly that the only
% legal terminators following the 'stream' keyword are \n or \r\n, both of
% which are properly skipped and discarded by the token operator.
% Unfortunately, this doesn't account for other whitespace characters that
% may have preceded the EOL, such as spaces or tabs. Thus we back up one
% character and scan until we find the \n terminator.
/stream {	% <dict> stream <modified_dict>
  dup /Length oget 0 eq {
    dup /Filter undef	% don't confuse any filters that require data
  } if
  dup /F known dup PDFsource PDFfile eq or {
    not {
      dup /File PDFfile put
      % make sure that we are just past the EOL \n character
      PDFfile dup fileposition 1 sub setfileposition	% back up one
      { PDFfile read pop dup 13 eq {
	  % If there had been a \n, token would have advanced over it
	  % thus, if the terminator was \r, we have a format error!
	  (   **** Warning: stream operator not terminated by valid EOL.\n) pdfformaterror
	  pop exit	% fileposition is OK (just past the \r).
	} if 
	10 eq { exit } if
      } loop	% scan past \n
      dup /FilePosition PDFfile fileposition put
      PDFDEBUG {
        PDFSTEPcount 1 le {
          (%FilePosition: ) print dup /FilePosition get ==
        } if
      } if
    } if
    % Some (bad) PDf files have invalid stream lengths.  This causes problems
    % if we reposition beyond the end of the file.  So we compare the given
    % length to number of bytes left in the file.
    dup /Length oget 
    dup PDFfile bytesavailable lt {	% compare to to bytes left in file
      PDFfile fileposition 		% reposition to the end of stream
      add PDFfile exch setfileposition
    } {
      pop				% bad stream length - do not reposition.
      					% This will force a length warning below
    } ifelse
  } {
    pop
	% We're already reading from a stream, which we can't reposition.
	% Capture the sub-stream contents in a string.
    dup /Length oget string PDFsource exch readstring
    not {
      (   **** Warning: Unexpected EOF in stream!\n) pdfformaterror
      /stream cvx /rangecheck signalerror
    } if
    1 index exch /File exch put
  } ifelse
  PDFsource {token} stopped {
    pop null
  } {
    not { null } if
  } ifelse
  dup /endobj eq {
    % Another case that Acrobat Reader handles -- 'endobj' without 'endstream'.
    (   **** Warning: stream missing 'endstream'.\n) pdfformaterror
    pop /endstream		% fake a valid endstream
  } if
  /endstream ne { 
    (   **** Warning: stream Length incorrect.\n) pdfformaterror
    dup /Length undef % prevent the use of the incorrect length.
    cvx endobj exit   % exit from .pdfrun now.
  } if
  cvx
} bind def
/endstream {
  exit
} bind def

% Contrary to the published PDF (1.3) specification, Acrobat Reader
% accepts abbreviated filter names everywhere, not just for in-line images,
% and some applications (notably htmldoc) rely on this.
/unabbrevfilterdict mark
  /AHx /ASCIIHexDecode  /A85 /ASCII85Decode  /CCF /CCITTFaxDecode
  /DCT /DCTDecode  /Fl /FlateDecode  /LZW /LZWDecode  /RL /RunLengthDecode
.dicttomark readonly def

% Extract and apply filters.
/filterparms {		% <dict> <DPkey> <Fkey> filterparms
			%   <dict> <parms> <filternames>
  2 index exch knownoget {
    exch 2 index exch knownoget {
		% Both filters and parameters.
      exch dup type /nametype eq {
	1 array astore exch
	dup type /arraytype ne { 1 array astore } if exch
      } if
    } {
		% Filters, but no parameters.
      null exch
      dup type /nametype eq { 1 array astore } if
    } ifelse
  } {
		% No filters: ignore parameters, if any.
    pop null { }
  } ifelse
} bind def
/filtername {		% <filtername> filtername <filtername'>
  //unabbrevfilterdict 1 index .knownget { exch pop } if
  dup /Filter resourcestatus { pop pop } {
    Repaired exch	% this error is not the creator's fault	
    (   **** ERROR: Unable to process ) pdfformaterror
    64 string cvs pdfformaterror
    ( data. Page will be missing data.\n) pdfformaterror
    /Repaired exch store % restore the previous "Repaired" state
    % provide a filter that returns EOF (no data)
    /.EOFDecode
  } ifelse
} bind def
/applyfilters {		% <parms> <source> <filternames> applyfilters <stream>
  2 index null eq {
    { filtername filter }
  } {
    {		% Stack: parms source filtername
      2 index 0 oget dup null eq { pop } {
        exch filtername dup /JBIG2Decode eq { exch jbig2cachectx exch } if
      } ifelse filter
      exch dup length 1 sub 1 exch getinterval exch
    }
  } ifelse forall exch pop
} bind def

% JBIG2 streams have an optional 'globals' stream obj for
% sharing redundant data between page images. Here we resolve
% that stream reference (if any) and run it through the decoder,
% creating a special -jbig2globalctx- postscript object our
% JBIG2Decode filter implementation looks for in the parm dict.
/jbig2cachectx { % <parmdict> jbig2cachectx <parmdict>
  dup /JBIG2Globals knownoget {
    dup /Length oget
    % make global ctx
    PDFfile fileposition 3 1 roll % resolvestream is not reentrant
    exch true resolvestream exch .bytestring
    .readbytestring pop .jbig2makeglobalctx
    PDFfile 3 -1 roll setfileposition
    1 index exch
    /.jbig2globalctx exch put
  } if
} bind def

% Resolve a stream dictionary to a PostScript stream.
% Streams with no filters require special handling:
%     - Whether we are going to interpret the stream, or If we are just
%       going to read data from them, we impose a SubFileDecode filter
%         that reads just the requisite amount of data.
% Note that, in general, resolving a stream repositions PDFfile.
% Clients must save and restore the position of PDFfile themselves.
/resolvestream {	% <streamdict> <readdata?> resolvestream <stream>
  1 index /F knownoget {
		% This stream is stored on an external file.
    (r) file 3 -1 roll
    /FDecodeParms /FFilter filterparms
		% Stack: readdata? file dict parms filternames
    4 -1 roll exch
    pdf_decrypt_stream
    applyfilters
  } {
    exch dup /FilePosition .knownget {
      1 index /File get exch setfileposition
    } if
		% Stack: readdata? dict
    /DecodeParms /Filter filterparms
		% Stack: readdata? dict parms filternames
    2 index /File get exch
		% Stack: readdata? dict parms file/string filternames
    pdf_decrypt_stream		% add decryption if needed
    dup length 0 eq {
		% All the PDF filters have EOD markers, but in this case
		% there is no specified filter.
      pop exch pop
		% Stack: readdata? dict file/string
      2 index 1 index type /filetype eq or {
              % Use length for any files or reading data from any source.
        1 index /Length knownoget not { 0 } if
      } {
      0       % Otherwise length of 0 for whole string
      } ifelse
      2 index /IDFlag known { pop } { () /SubFileDecode filter } ifelse
    } {
      applyfilters
    } ifelse
  } ifelse
		% Stack: readdata? dict file
  exch pop exch pop
} bind def

% ============================ Name/number trees ============================ %

/nameoget {		% <nametree> <key> nameoget <obj|null>
  exch /Names exch .treeget
} bind def

/numoget {		% <numtree> <key> numoget <obj|null>
  exch /Nums exch .treeget
} bind def

/.treeget {		% <key> <leafkey> <tree> .treeget <obj|null>
  dup /Kids knownoget {
    exch pop .branchget
  } {
    exch get .leafget
  } ifelse
} bind def

/.branchget {		%  <key> <leafkey> <kids> .branchget <obj|null>
  dup length 0 eq {
    pop pop pop null
  } {
    dup length -1 bitshift 2 copy oget
			% Stack: key leafkey kids mid kids[mid]
    dup /Limits oget aload pop
			% Stack: key leafkey kids mid kids[mid] min max
    6 index lt {
      pop pop
      1 add 1 index length 1 index sub getinterval .branchget
    } {
      5 index gt {
	pop
	0 exch getinterval .branchget
      } {
	exch pop exch pop .treeget
      } ifelse
    } ifelse
  } ifelse
} bind def

/.leafget {		% <key> <pairs> .leafget <obj|null>
  dup length 2 eq {
    dup 0 get 2 index eq { 1 oget } { pop null } ifelse
    exch pop
  } {
    dup length -1 bitshift -2 and 2 copy oget
			% Stack: key pairs mid pairs[mid]
    3 index gt { 0 exch } { 1 index length 1 index sub } ifelse
    getinterval .leafget
  } ifelse
} bind def

end			% pdfdict
.setglobal

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to webmaster@9p.io.