Object
Creates a new PDF and saves it. If a block is passed, the PDF instance can be processed before saving.
# File lib/origami/pdf.rb, line 130 def create(output, options = {}) pdf = PDF.new yield(pdf) if block_given? pdf.save(output, options) end
Deserializes a PDF dump.
# File lib/origami/pdf.rb, line 140 def deserialize(filename) Zlib::GzipReader.open(filename) { |gz| pdf = Marshal.load(gz.read) } pdf end
Creates a new PDF instance.
parser |
The Parser object creating the document. If none is specified, some default structures are automatically created to get a minimal working document. |
# File lib/origami/pdf.rb, line 153 def initialize(parser = nil) @header = PDF::Header.new @revisions = [] add_new_revision @revisions.first.trailer = Trailer.new if parser @parser = parser else init end end
Reads and parses a PDF file from disk.
# File lib/origami/pdf.rb, line 121 def read(filename, options = {}) filename = File.expand_path(filename) if filename.is_a?(::String) PDF::LinearParser.new(options).parse(filename) end
Adds a new object to the PDF file. If this object has no version number, then a new one will be automatically computed and assignated to him. It returns a Reference to this Object.
object |
The object to add. |
# File lib/origami/pdf.rb, line 449 def <<(object) owner = object.pdf # # Does object belongs to another PDF ? # if owner and not owner.equal?(self) import object else add_to_revision(object, @revisions.last) end end
Returns the current Catalog Dictionary.
# File lib/origami/catalog.rb, line 46 def Catalog cat = get_doc_attr(:Root) case cat when Catalog then cat when Dictionary then casted = Catalog.new(cat) casted.no, casted.generation = cat.no, cat.generation casted.set_indirect(true) casted.set_pdf(self) casted else raise InvalidPDFError, "Broken catalog" end end
Sets the current Catalog Dictionary.
# File lib/origami/catalog.rb, line 67 def Catalog=(cat) #unless cat.is_a?(Catalog) # raise TypeError, "Expected type Catalog, received #{cat.class}" #end cat = Catalog.new(cat) unless cat.is_a? Catalog if @revisions.last.trailer.Root delete_object(@revisions.last.trailer[:Root]) end @revisions.last.trailer.Root = self << cat end
Add a field to the Acrobat form.
field |
The Field to add. |
# File lib/origami/acroform.rb, line 51 def add_fields(*fields) raise TypeError, "Expected Field arguments" unless fields.all? { |f| f.is_a?(Field) } self.Catalog.AcroForm ||= InteractiveForm.new.set_indirect(true) self.Catalog.AcroForm.Fields ||= [] self.Catalog.AcroForm.Fields.concat(fields) fields.each do |field| field.set_indirect(true) end self end
Ends the current Revision, and starts a new one.
# File lib/origami/pdf.rb, line 495 def add_new_revision root = @revisions.last.trailer[:Root] unless @revisions.empty? @revisions << Revision.new(self) @revisions.last.trailer = Trailer.new @revisions.last.trailer.Root = root self end
Adds a new object to a specific revision. If this object has no version number, then a new one will be automatically computed and assignated to him. It returns a Reference to this Object.
object |
The object to add. |
revision |
The revision to add the object to. |
# File lib/origami/pdf.rb, line 480 def add_to_revision(object, revision) object.set_indirect(true) object.set_pdf(self) object.no, object.generation = alloc_new_object_number if object.no == 0 revision.body[object.reference] = object object.reference end
Returns a new number/generation for future object.
# File lib/origami/pdf.rb, line 627 def alloc_new_object_number no = 1 # Deprecated number allocation policy (first available) #no = no + 1 while get_object(no) objset = self.indirect_objects self.indirect_objects.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm| objstm.each{|obj| objset << obj} end allocated = objset.collect{|obj| obj.no}.compact no = allocated.max + 1 unless allocated.empty? [ no, 0 ] end
Appends a page or list of pages to the end of the page tree.
# File lib/origami/page.rb, line 33 def append_page(page = Page.new, *more) raise InvalidPDFError, "Invalid page tree" if not self.Catalog or not self.Catalog.Pages or not self.Catalog.Pages.is_a?(PageTreeNode) pages = [ page ].concat(more).map! do |pg| if pg.pdf and pg.pdf != self # Page from another document must be exported. pg.export else pg end end treeroot = self.Catalog.Pages treeroot.Kids ||= [] #:nodoc: treeroot.Kids.concat(pages) treeroot.Count = treeroot.Kids.length pages.each do |page| page.Parent = treeroot end self end
# File lib/origami/pdf.rb, line 397 def append_subobj(root, objset, opts) if objset.find{ |o| root.equal?(o) }.nil? objset << root unless opts[:only_keys] if root.is_a?(Dictionary) root.each_pair { |name, value| objset << name if opts[:only_keys] append_subobj(name, objset, opts) if opts[:include_keys] and not opts[:only_keys] append_subobj(value, objset, opts) } elsif root.is_a?(Array) or (root.is_a?(ObjectStream) and opts[:include_objectstreams]) root.each { |subobj| append_subobj(subobj, objset, opts) } end end end
Attachs an embedded file to the PDF.
path |
The path to the file to attach. |
options |
A set of options to configure the attachment. |
# File lib/origami/file.rb, line 35 def attach_file(path, options = {}) # # Default options. # params = { :Register => true, # Shall the file be registered in the name directory ? :EmbeddedName => nil, # The inner filename of the attachment. :Filter => :FlateDecode, # The stream filter used to store data. }.update(options) if path.is_a? FileSpec filespec = path params[:EmbeddedName] ||= '' else if path.respond_to?(:read) fd = path params[:EmbeddedName] ||= '' else fd = File.open(File.expand_path(path), 'r').binmode params[:EmbeddedName] ||= File.basename(path) end fstream = EmbeddedFileStream.new if ''.respond_to? :force_encoding fstream.data = fd.read.force_encoding('binary') # 1.9 else fstream.data = fd.read end fd.close fstream.setFilter(params[:Filter]) filespec = FileSpec.new(:F => fstream) end name = params[:EmbeddedName] fspec = FileSpec.new.setType(:Filespec).setF(name.dup).setEF( filespec ) register( Names::Root::EMBEDDEDFILES, name.dup, fspec ) if params[:Register] == true fspec end
Creates a new AcroForm with specified fields.
# File lib/origami/acroform.rb, line 40 def create_acroform(*fields) acroform = self.Catalog.AcroForm ||= InteractiveForm.new.set_indirect(true) self.add_fields(*fields) acroform end
Modifies or creates a metadata stream.
# File lib/origami/metadata.rb, line 96 def create_metadata(info = {}) skeleton = <?packet begin="#{"\xef\xbb\xbf"}" id="W5M0MpCehiHzreSzNTczkc9d"?> <x:xmpmeta xmlns:x="adobe:ns:meta/"> <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <rdf:Description rdf:about="" xmlns:pdf="http://ns.adobe.com/pdf/1.3/"> </rdf:Description> </rdf:RDF> </x:xmpmeta><?xpacket end="w"?> xml = if self.Catalog.Metadata.is_a?(Stream) self.Catalog.Metadata.data else skeleton end doc = REXML::Document.new(xml) desc = doc.elements['*/*/rdf:Description'] info.each do |name, value| elt = REXML::Element.new "pdf:#{name}" elt.text = value desc.elements << elt end xml = ""; doc.write(xml, 3) if self.Catalog.Metadata.is_a?(Stream) self.Catalog.Metadata.data = xml else self.Catalog.Metadata = Stream.new(xml) end self.Catalog.Metadata end
# File lib/origami/xfa.rb, line 32 def create_xfa_form(xdp, *fields) acroform = create_acroform(*fields) acroform.XFA = Stream.new(xdp, :Filter => :FlateDecode) acroform end
# File lib/origami/metadata.rb, line 59 def creation_date; get_document_info_field(:CreationDate) end
# File lib/origami/metadata.rb, line 57 def creator; get_document_info_field(:Creator) end
Decrypts the current document (only RC4 40..128 bits).
passwd |
The password to decrypt the document. |
# File lib/origami/encryption.rb, line 59 def decrypt(passwd = "") unless self.is_encrypted? raise EncryptionError, "PDF is not encrypted" end encrypt_dict = get_doc_attr(:Encrypt) handler = Encryption::Standard::Dictionary.new(encrypt_dict.dup) unless handler.Filter == :Standard raise EncryptionNotSupportedError, "Unknown security handler : '#{handler.Filter.to_s}'" end case handler.V.to_i when 1,2 then str_algo = stm_algo = Encryption::ARC4 when 4,5 if handler[:CF].is_a?(Dictionary) cfs = handler[:CF] if handler[:StrF].is_a?(Name) and cfs[handler[:StrF]].is_a?(Dictionary) cfdict = cfs[handler[:StrF]] str_algo = if cfdict[:CFM] == :V2 then Encryption::ARC4 elsif cfdict[:CFM] == :AESV2 then Encryption::AES elsif cfdict[:CFM] == :None then Encryption::Identity elsif cfdict[:CFM] == :AESV3 and handler.V.to_i == 5 then Encryption::AES else raise EncryptionNotSupportedError, "Unsupported encryption version : #{handler.V}" end else str_algo = Encryption::Identity end if handler[:StmF].is_a?(Name) and cfs[handler[:StmF]].is_a?(Dictionary) cfdict = cfs[handler[:StmF]] stm_algo = if cfdict[:CFM] == :V2 then Encryption::ARC4 elsif cfdict[:CFM] == :AESV2 then Encryption::AES elsif cfdict[:CFM] == :None then Encryption::Identity elsif cfdict[:CFM] == :AESV3 and handler.V.to_i == 5 then Encryption::AES else raise EncryptionNotSupportedError, "Unsupported encryption version : #{handler.V}" end else stm_algo = Encryption::Identity end else str_algo = stm_algo = Encryption::Identity end else raise EncryptionNotSupportedError, "Unsupported encryption version : #{handler.V}" end doc_id = get_doc_attr(:ID) unless doc_id.is_a?(Array) raise EncryptionError, "Document ID was not found or is invalid" unless handler.V.to_i == 5 else doc_id = doc_id.first end if handler.is_user_password?(passwd, doc_id) encryption_key = handler.compute_user_encryption_key(passwd, doc_id) elsif handler.is_owner_password?(passwd, doc_id) if handler.V.to_i < 5 user_passwd = handler.retrieve_user_password(passwd) encryption_key = handler.compute_user_encryption_key(user_passwd, doc_id) else encryption_key = handler.compute_owner_encryption_key(passwd) end else raise EncryptionInvalidPasswordError end #self.extend(Encryption::EncryptedDocument) #self.encryption_dict = encrypt_dict #self.encryption_key = encryption_key #self.stm_algo = self.str_algo = algorithm encrypt_metadata = (handler.EncryptMetadata != false) self.extend(Encryption::EncryptedDocument) self.encryption_dict = handler self.encryption_key = encryption_key self.stm_algo,self.str_algo = stm_algo,str_algo # # Should be fixed to exclude only the active XRefStream # metadata = self.Catalog.Metadata self.indirect_objects.each do |indobj| encrypted_objects = [] case indobj when String,Stream then encrypted_objects << indobj when Dictionary,Array then encrypted_objects |= indobj.strings_cache end encrypted_objects.each do |obj| case obj when String next if obj.equal?(encrypt_dict[:U]) or obj.equal?(encrypt_dict[:O]) or obj.equal?(encrypt_dict[:UE]) or obj.equal?(encrypt_dict[:OE]) or obj.equal?(encrypt_dict[:Perms]) or (obj.parent.is_a?(Signature::DigitalSignature) and obj.equal?(obj.parent[:Contents])) obj.extend(Encryption::EncryptedString) unless obj.is_a?(Encryption::EncryptedString) obj.encryption_handler = handler obj.encryption_key = encryption_key obj.algorithm = str_algo obj.decrypt! when Stream next if obj.is_a?(XRefStream) or (not encrypt_metadata and obj.equal?(metadata)) obj.extend(Encryption::EncryptedStream) unless obj.is_a?(Encryption::EncryptedStream) obj.encryption_handler = handler obj.encryption_key = encryption_key obj.algorithm = stm_algo end end end self end
Remove an object.
# File lib/origami/pdf.rb, line 533 def delete_object(no, generation = 0) case no when Reference target = no when ::Integer target = Reference.new(no, generation) else raise TypeError, "Invalid parameter type : #{no.class}" end @revisions.each do |rev| rev.body.delete(target) end end
# File lib/origami/xreftable.rb, line 34 def delete_xrefstm(xrefstm) prev = xrefstm.Prev delete_object(xrefstm.reference) if prev.is_a?(Integer) and (prev_stm = get_object_by_offset(prev)).is_a?(XRefStream) delete_xrefstm(prev_stm) end end
Tries to delinearize the document if it has been linearized. This operation is xrefs destructive, should be fixed in the future to merge tables.
# File lib/origami/linearization.rb, line 50 def delinearize! raise LinearizationError, 'Not a linearized document' unless is_linearized? # # Saves the first trailer. # prev_trailer = @revisions.first.trailer lin_dict = @revisions.first.objects.first hints = lin_dict[:H] # # Removes hint streams used by linearization. # if hints.is_a?(::Array) if hints.length > 0 and hints[0].is_a?(Integer) hint_stream = get_object_by_offset(hints[0]) delete_object(hint_stream.reference) if hint_stream.is_a?(Stream) end if hints.length > 2 and hints[2].is_a?(Integer) overflow_stream = get_object_by_offset(hints[2]) delete_object(overflow_stream.reference) if overflow_stream.is_a?(Stream) end end # # Update the trailer. # last_trailer = (@revisions.last.trailer ||= Trailer.new) last_trailer.dictionary ||= Dictionary.new if prev_trailer.has_dictionary? last_trailer.dictionary = last_trailer.dictionary.merge(prev_trailer.dictionary) else xrefstm = get_object_by_offset(last_trailer.startxref) raise LinearizationError, 'Cannot find trailer info while delinearizing document' unless xrefstm.is_a?(XRefStream) last_trailer.dictionary[:Root] = xrefstm[:Root] last_trailer.dictionary[:Encrypt] = xrefstm[:Encrypt] last_trailer.dictionary[:Info] = xrefstm[:Info] last_trailer.dictionary[:ID] = xrefstm[:ID] end # # Remove all xrefs. # Fix: Should be merged instead. # remove_xrefs # # Remove the linearization revision. # remove_revision(0) self end
Iterates over each Acroform Field.
# File lib/origami/acroform.rb, line 77 def each_field(&b) if self.has_form? if self.Catalog.AcroForm.has_key?(:Fields) self.Catalog.AcroForm[:Fields].each {|field| b.call(field.solve)} end end end
# File lib/origami/catalog.rb, line 159 def each_name(root, &b) namesroot = get_names_root(root) return if namesroot.nil? each_name_from_node(namesroot, [], &b) self end
Calls block for each named destination.
# File lib/origami/destinations.rb, line 40 def each_named_dest(&b) each_name(Names::Root::DESTS, &b) end
Calls block for each named embedded file.
# File lib/origami/file.rb, line 97 def each_named_embedded_file(&b) each_name(Names::Root::EMBEDDEDFILES, &b) end
Calls block for each named page.
# File lib/origami/page.rb, line 109 def each_named_page(&b) each_name(Names::Root::PAGES, &b) end
Calls block for each named JavaScript script.
# File lib/origami/actions.rb, line 40 def each_named_script(&b) each_name(Names::Root::JAVASCRIPT, &b) end
Iterate through each page, returns self.
# File lib/origami/page.rb, line 83 def each_page(&b) raise InvalidPDFError, "Invalid page tree" if not self.Catalog or not self.Catalog.Pages or not self.Catalog.Pages.is_a?(PageTreeNode) self.Catalog.Pages.each_page(&b) self end
Enable the document Usage Rights.
rights |
list of rights defined in UsageRights::Rights |
# File lib/origami/signature.rb, line 287 def enable_usage_rights(cert, pkey, *rights) unless Origami::OPTIONS[:use_openssl] fail "OpenSSL is not present or has been disabled." end signfield_size = lambda{|crt, key, ca| datatest = "abcdefghijklmnopqrstuvwxyz" OpenSSL::PKCS7.sign(crt, key, datatest, ca, OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY).to_der.size + 128 } # # Load key pair # key = pkey.is_a?(OpenSSL::PKey::RSA) ? pkey : OpenSSL::PKey::RSA.new(pkey) certificate = cert.is_a?(OpenSSL::X509::Certificate) ? cert : OpenSSL::X509::Certificate.new(cert) # # Forge digital signature dictionary # digsig = Signature::DigitalSignature.new.set_indirect(true) self.Catalog.AcroForm ||= InteractiveForm.new #self.Catalog.AcroForm.SigFlags = InteractiveForm::SigFlags::APPENDONLY digsig.Type = :Sig #:nodoc: digsig.Contents = HexaString.new("\x00" * signfield_size[certificate, key, []]) #:nodoc: digsig.Filter = Name.new("Adobe.PPKLite") #:nodoc: digsig.Name = "ARE Acrobat Product v8.0 P23 0002337" #:nodoc: digsig.SubFilter = Name.new("adbe.pkcs7.detached") #:nodoc: digsig.ByteRange = [0, 0, 0, 0] #:nodoc: sigref = Signature::Reference.new #:nodoc: sigref.Type = :SigRef #:nodoc: sigref.TransformMethod = :UR3 #:nodoc: sigref.Data = self.Catalog sigref.TransformParams = UsageRights::TransformParams.new sigref.TransformParams.P = true #:nodoc: sigref.TransformParams.Type = :TransformParams #:nodoc: sigref.TransformParams.V = UsageRights::TransformParams::VERSION rights.each do |right| sigref.TransformParams[right.first] ||= [] sigref.TransformParams[right.first].concat(right[1..-1]) end digsig.Reference = [ sigref ] self.Catalog.Perms ||= Perms.new self.Catalog.Perms.UR3 = digsig # # Flattening the PDF to get file view. # compile # # Creating an empty Xref table to compute signature byte range. # rebuild_dummy_xrefs sigoffset = get_object_offset(digsig.no, digsig.generation) + digsig.sigOffset digsig.ByteRange[0] = 0 digsig.ByteRange[1] = sigoffset digsig.ByteRange[2] = sigoffset + digsig.Contents.size digsig.ByteRange[3] = filesize - digsig.ByteRange[2] until digsig.ByteRange[3] == filesize - digsig.ByteRange[2] # From that point the file size remains constant # # Correct Xrefs variations caused by ByteRange modifications. # rebuildxrefs filedata = output() signable_data = filedata[digsig.ByteRange[0],digsig.ByteRange[1]] + filedata[digsig.ByteRange[2],digsig.ByteRange[3]] signature = OpenSSL::PKCS7.sign(certificate, key, signable_data, [], OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY).to_der digsig.Contents[0, signature.size] = signature # # No more modification are allowed after signing. # self.freeze end
Encrypts the current document with the provided passwords. The document will be encrypted at writing-on-disk time.
userpasswd |
The user password. |
ownerpasswd |
The owner password. |
options |
A set of options to configure encryption. |
# File lib/origami/encryption.rb, line 198 def encrypt(options = {}) if self.is_encrypted? raise EncryptionError, "PDF is already encrypted" end # # Default encryption options. # params = { :user_passwd => '', :owner_passwd => '', :cipher => 'rc4', # :RC4 or :AES :key_size => 128, # Key size in bits :hardened => false, # Use newer password validation (since Reader X) :encrypt_metadata => true, # Metadata shall be encrypted? :permissions => Encryption::Standard::Permissions::ALL # Document permissions }.update(options) userpasswd, ownerpasswd = params[:user_passwd], params[:owner_passwd] case params[:cipher].upcase when 'RC4' algorithm = Encryption::ARC4 if (40..128) === params[:key_size] and params[:key_size] % 8 == 0 if params[:key_size] > 40 version = 2 revision = 3 else version = 1 revision = 2 end else raise EncryptionError, "Invalid RC4 key length" end when 'AES' algorithm = Encryption::AES if params[:key_size] == 128 version = revision = 4 elsif params[:key_size] == 256 version = 5 if params[:hardened] revision = 6 else revision = 5 end else raise EncryptionError, "Invalid AES key length (Only 128 and 256 bits keys are supported)" end else raise EncryptionNotSupportedError, "Cipher not supported : #{params[:cipher]}" end doc_id = (get_doc_attr(:ID) || gen_id).first handler = Encryption::Standard::Dictionary.new handler.Filter = :Standard #:nodoc: handler.V = version handler.R = revision handler.Length = params[:key_size] handler.P = -1 # params[:Permissions] if revision >= 4 handler.EncryptMetadata = params[:encrypt_metadata] handler.CF = Dictionary.new cryptfilter = Encryption::CryptFilterDictionary.new cryptfilter.AuthEvent = :DocOpen if revision == 4 cryptfilter.CFM = :AESV2 else cryptfilter.CFM = :AESV3 end cryptfilter.Length = params[:key_size] >> 3 handler.CF[:StdCF] = cryptfilter handler.StmF = handler.StrF = :StdCF end handler.set_passwords(ownerpasswd, userpasswd, doc_id) encryption_key = handler.compute_user_encryption_key(userpasswd, doc_id) fileInfo = get_trailer_info fileInfo[:Encrypt] = self << handler self.extend(Encryption::EncryptedDocument) self.encryption_dict = handler self.encryption_key = encryption_key self.stm_algo = self.str_algo = algorithm self end
Executes a JavaScript script in the current document context.
# File lib/origami/javascript.rb, line 679 def eval_js(code) js_engine.exec(code) end
Exports the document to a dot Graphiz file.
filename |
The path where to save the file. |
# File lib/origami/export.rb, line 34 def export_to_graph(filename) def appearance(object) #:nodoc: label = object.type.to_s case object when Catalog fontcolor = "red" color = "mistyrose" shape = "ellipse" when Name, Number label = object.value fontcolor = "brown" color = "lightgoldenrodyellow" shape = "polygon" when String label = object.value unless (object.is_binary_data? or object.length > 50) fontcolor = "red" color = "white" shape = "polygon" when Array fontcolor = "darkgreen" color = "lightcyan" shape = "ellipse" else fontcolor = "blue" color = "aliceblue" shape = "ellipse" end { :label => label, :fontcolor => fontcolor, :color => color, :shape => shape } end def add_edges(pdf, fd, object) #:nodoc: if object.is_a?(Array) or object.is_a?(ObjectStream) object.each { |subobj| subobj = subobj.solve if subobj.is_a?(Reference) fd << "\t#{object.object_id} -> #{subobj.object_id}\n" unless subobj.nil? } elsif object.is_a?(Dictionary) object.each_pair { |name, subobj| subobj = subobj.solve if subobj.is_a?(Reference) fd << "\t#{object.object_id} -> #{subobj.object_id} [label=\"#{name.value}\",fontsize=9];\n" unless subobj.nil? } end if object.is_a?(Stream) object.dictionary.each_pair { |key, value| value = value.solve if value.is_a?(Reference) fd << "\t#{object.object_id} -> #{value.object_id} [label=\"#{key.value}\",fontsize=9];\n" unless value.nil? } end end graphname = "PDF" if graphname.nil? or graphname.empty? fd = File.open(filename, "w") begin fd << "digraph #{graphname} {\n\n" objects = self.objects(:include_keys => false).find_all{ |obj| not obj.is_a?(Reference) } objects.each { |object| attr = appearance(object) fd << "\t#{object.object_id} [label=\"#{attr[:label]}\",shape=#{attr[:shape]},color=#{attr[:color]},style=filled,fontcolor=#{attr[:fontcolor]},fontsize=16];\n" if object.is_a?(Stream) object.dictionary.each { |value| unless value.is_a?(Reference) attr = appearance(value) fd << "\t#{value.object_id} [label=\"#{attr[:label]}\",shape=#{attr[:shape]},color=#{attr[:color]},style=filled,fontcolor=#{attr[:fontcolor]},fontsize=16];\n" end } end add_edges(self, fd, object) } fd << "\n}" ensure fd.close end end
Exports the document to a GraphML file.
filename |
The path where to save the file. |
# File lib/origami/export.rb, line 133 def export_to_graphml(filename) def declare_node(id, attr) #:nodoc: " <node id=\"#{id}\">\n" << " <data key=\"d0\">\n" << " <y:ShapeNode>\n" << " <y:NodeLabel>#{attr[:label]}</y:NodeLabel>\n" << #~ " <y:Shape type=\"#{attr[:shape]}\"/>\n" << " </y:ShapeNode>\n" << " </data>\n" << " </node>\n" end def declare_edge(id, src, dest, label = nil) #:nodoc: " <edge id=\"#{id}\" source=\"#{src}\" target=\"#{dest}\">\n" << " <data key=\"d1\">\n" << " <y:PolyLineEdge>\n" << " <y:LineStyle type=\"line\" width=\"1.0\" color=\"#000000\"/>\n" << " <y:Arrows source=\"none\" target=\"standard\"/>\n" << " <y:EdgeLabel>#{label.to_s}</y:EdgeLabel>\n" << " </y:PolyLineEdge>\n" << " </data>\n" << " </edge>\n" end def appearance(object) #:nodoc: label = object.type.to_s case object when Catalog fontcolor = "red" color = "mistyrose" shape = "doublecircle" when Name, Number label = object.value fontcolor = "orange" color = "lightgoldenrodyellow" shape = "polygon" when String label = object.value unless (object.is_binary_data? or object.length > 50) fontcolor = "red" color = "white" shape = "polygon" when Array fontcolor = "green" color = "lightcyan" shape = "ellipse" else fontcolor = "blue" color = "aliceblue" shape = "ellipse" end { :label => label, :fontcolor => fontcolor, :color => color, :shape => shape } end def add_edges(pdf, fd, object, id) #:nodoc: if object.is_a?(Array) or object.is_a?(ObjectStream) object.each { |subobj| subobj = subobj.solve if subobj.is_a?(Reference) unless subobj.nil? fd << declare_edge("e#{id}", "n#{object.object_id}", "n#{subobj.object_id}") id = id + 1 end } elsif object.is_a?(Dictionary) object.each_pair { |name, subobj| subobj = subobj.solve if subobj.is_a?(Reference) unless subobj.nil? fd << declare_edge("e#{id}", "n#{object.object_id}", "n#{subobj.object_id}", name.value) id = id + 1 end } end if object.is_a?(Stream) object.dictionary.each_pair { |key, value| value = value.solve if value.is_a?(Reference) unless value.nil? fd << declare_edge("e#{id}", "n#{object.object_id}", "n#{value.object_id}", key.value) id = id + 1 end } end id end @@edge_nb = 1 graphname = "PDF" if graphname.nil? or graphname.empty? fd = File.open(filename, "w") edge_nb = 1 begin fd << '<?xml version="1.0" encoding="UTF-8"?>' << "\n" fd << '<graphml xmlns="http://graphml.graphdrawing.org/xmlns/graphml"' << "\n" fd << ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' << "\n" fd << ' xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns/graphml ' << "\n" fd << ' http://www.yworks.com/xml/schema/graphml/1.0/ygraphml.xsd"' << "\n" fd << ' xmlns:y="http://www.yworks.com/xml/graphml">' << "\n" fd << '<key id="d0" for="node" yfiles.type="nodegraphics"/>' << "\n" fd << '<key id="d1" for="edge" yfiles.type="edgegraphics"/>' << "\n" fd << "<graph id=\"#{graphname}\" edgedefault=\"directed\">\n" objects = self.objects(:include_keys => false).find_all{ |obj| not obj.is_a?(Reference) } objects.each { |object| fd << declare_node("n#{object.object_id}", appearance(object)) if object.is_a?(Stream) object.dictionary.each { |value| unless value.is_a?(Reference) fd << declare_node(value.object_id, appearance(value)) end } end edge_nb = add_edges(self, fd, object, edge_nb) } fd << '</graph>' << "\n" fd << '</graphml>' ensure fd.close end end
Returns an array of Acroform fields.
# File lib/origami/acroform.rb, line 66 def fields if self.has_form? if self.Catalog.AcroForm.has_key?(:Fields) self.Catalog.AcroForm[:Fields].map {|field| field.solve} end end end
Returns an array of objects matching specified block.
# File lib/origami/pdf.rb, line 376 def find(params = {}, &b) options = { :only_indirect => false } options.update(params) objset = (options[:only_indirect] == true) ? self.indirect_objects : self.objects objset.find_all(&b) end
Lookup destination in the destination name directory.
# File lib/origami/destinations.rb, line 33 def get_destination_by_name(name) resolve_name Names::Root::DESTS, name end
Returns the document information dictionary if present.
# File lib/origami/metadata.rb, line 49 def get_document_info get_doc_attr :Info end
Lookup embedded file in the embedded files name directory.
# File lib/origami/file.rb, line 90 def get_embedded_file_by_name(name) resolve_name Names::Root::EMBEDDEDFILES, name end
Returns the corresponding named Field.
# File lib/origami/acroform.rb, line 88 def get_field(name) self.each_field do |field| return field if field[:T].solve == name end end
Returns a Hash of the information found in the metadata stream
# File lib/origami/metadata.rb, line 65 def get_metadata metadata_stm = self.Catalog.Metadata if metadata_stm.is_a?(Stream) doc = REXML::Document.new(metadata_stm.data) info = {} doc.elements.each('*/*/rdf:Description') do |description| description.attributes.each_attribute do |attr| case attr.prefix when 'pdf','xap' info[attr.name] = attr.value end end description.elements.each('*') do |element| value = (element.elements['.//rdf:li'] || element).text info[element.name] = value.to_s end end info end end
Get the n-th Page object.
# File lib/origami/page.rb, line 93 def get_page(n) raise InvalidPDFError, "Invalid page tree" if not self.Catalog or not self.Catalog.Pages or not self.Catalog.Pages.is_a?(PageTreeNode) self.Catalog.Pages.get_page(n) end
Lookup page in the page name directory.
# File lib/origami/page.rb, line 102 def get_page_by_name(name) resolve_name Names::Root::PAGES, name end
Lookup script in the scripts name directory.
# File lib/origami/actions.rb, line 33 def get_script_by_name(name) resolve_name Names::Root::JAVASCRIPT, name end
Returns true if the document has a document information dictionary.
# File lib/origami/metadata.rb, line 35 def has_document_info? has_attr? :Info end
Returns true if the document contains an acrobat form.
# File lib/origami/acroform.rb, line 33 def has_form? (not self.Catalog.nil?) and self.Catalog.has_key? :AcroForm end
Returns true if the document has a catalog metadata stream.
# File lib/origami/metadata.rb, line 42 def has_metadata? self.Catalog.Metadata.is_a?(Stream) end
# File lib/origami/signature.rb, line 377 def has_usage_rights? not self.Catalog.Perms.nil? and (not self.Catalog.Perms.has_key?(:UR3) or not self.Catalog.Perms.has_key?(:UR)) end
Similar to PDF#insert or PDF#<<, but for an object belonging to another document. Object will be recursively copied and new version numbers will be assigned. Returns the new reference to the imported object.
object |
The object to import. |
# File lib/origami/pdf.rb, line 469 def import(object) self.insert(object.export) end
Return an array of indirect objects.
# File lib/origami/pdf.rb, line 438 def indirect_objects @revisions.inject([]) do |set, rev| set.concat(rev.objects) end end
Inserts a page at position index into the document.
# File lib/origami/page.rb, line 61 def insert_page(index, page) raise InvalidPDFError, "Invalid page tree" if not self.Catalog or not self.Catalog.Pages or not self.Catalog.Pages.is_a?(PageTreeNode) # Page from another document must be exported. page = page.export if page.pdf and page.pdf != self self.Catalog.Pages.insert_page(index, page) self end
# File lib/origami/outputintents.rb, line 48 def is_a_pdfa1? self.Catalog.OutputIntents.is_a?(Array) and self.Catalog.OutputIntents.any?{|intent| intent = intent.solve; intent.S == OutputIntent::Intent::PDFA1 } and self.has_metadata? and ( doc = REXML::Document.new self.Catalog.Metadata.data; REXML::XPath.match(doc, "*/*/rdf:Description[@xmlns:pdfaid]").any? {|desc| desc.elements["pdfaid:conformance"].text == "A" and desc.elements["pdfaid:part"].text == "1" } ) end
Returns whether the PDF file is encrypted.
# File lib/origami/encryption.rb, line 51 def is_encrypted? has_attr? :Encrypt end
Returns whether the current document is linearized.
# File lib/origami/linearization.rb, line 36 def is_linearized? begin obj = @revisions.first.objects.sort_by{|obj| obj.file_offset}.first rescue return false end obj.is_a?(Dictionary) and obj.has_key? :Linearized end
Returns whether the document contains a digital signature.
# File lib/origami/signature.rb, line 273 def is_signed? begin self.Catalog.AcroForm.is_a?(Dictionary) and self.Catalog.AcroForm.has_key?(:SigFlags) and (self.Catalog.AcroForm.SigFlags & InteractiveForm::SigFlags::SIGNATURESEXIST != 0) rescue InvalidReferenceError false end end
Returns the JavaScript engine (if JavaScript support is present).
# File lib/origami/javascript.rb, line 686 def js_engine @js_engine ||= PDF::JavaScript::Engine.new(self) end
# File lib/origami/metadata.rb, line 56 def keywords; get_document_info_field(:Keywords) end
Returns an array of Objects whose name (in a Dictionary) is matching pattern.
# File lib/origami/pdf.rb, line 331 def ls(*patterns) return objects(:include_keys => false) if patterns.empty? result = [] patterns.map! do |pattern| pattern.is_a?(::String) ? Regexp.new(Regexp.escape(pattern)) : pattern end objects(:only_keys => true).each do |key| if patterns.any?{ |pattern| key.value.to_s.match(pattern) } value = key.parent[key] result << ( value.is_a?(Reference) ? value.solve : value ) end end result end
Returns a Hash of all names under specified root name directory. Returns nil if the directory does not exist.
# File lib/origami/catalog.rb, line 183 def ls_names(root) namesroot = get_names_root(root) return {} if namesroot.nil? names = names_from_node(namesroot) if names.length % 2 != 0 return InvalidNameTreeError, "Odd number of elements" end Hash[*names] end
Returns an array of Objects whose name (in a Dictionary) is matching pattern. Do not follow references.
# File lib/origami/pdf.rb, line 354 def ls_no_follow(*patterns) return objects(:include_keys => false) if patterns.empty? result = [] patterns.map! do |pattern| pattern.is_a?(::String) ? Regexp.new(Regexp.escape(pattern)) : pattern end objects(:only_keys => true).each do |key| if patterns.any?{ |pattern| key.value.to_s.match(pattern) } value = key.parent[key] result << value end end result end
# File lib/origami/metadata.rb, line 60 def mod_date; get_document_info_field(:ModDate) end
Returns an array of objects embedded in the PDF body.
include_objstm |
Whether it shall return objects embedded in object streams. |
Note : Shall return to an iterator for Ruby 1.9 comp.
# File lib/origami/pdf.rb, line 395 def objects(params = {}) def append_subobj(root, objset, opts) if objset.find{ |o| root.equal?(o) }.nil? objset << root unless opts[:only_keys] if root.is_a?(Dictionary) root.each_pair { |name, value| objset << name if opts[:only_keys] append_subobj(name, objset, opts) if opts[:include_keys] and not opts[:only_keys] append_subobj(value, objset, opts) } elsif root.is_a?(Array) or (root.is_a?(ObjectStream) and opts[:include_objectstreams]) root.each { |subobj| append_subobj(subobj, objset, opts) } end end end options = { :include_objectstreams => true, :include_keys => true, :only_keys => false } options.update(params) options[:include_keys] |= options[:only_keys] objset = [] @revisions.each do |revision| revision.objects.each do |object| append_subobj(object, objset, options) end end objset end
Sets an action to run on document closing.
action |
# File lib/origami/catalog.rb, line 103 def onDocumentClose(action) unless action.is_a?(Action::JavaScript) or action.is_a?(Reference) raise TypeError, "An Action::JavaScript object must be passed." end unless self.Catalog raise InvalidPDFError, "A catalog object must exist to add this action." end self.Catalog.AA ||= CatalogAdditionalActions.new self.Catalog.AA.WC = action self end
Sets an action to run on document opening.
action |
# File lib/origami/catalog.rb, line 84 def onDocumentOpen(action) unless action.is_a?(Action) or action.is_a?(Destination) or action.is_a?(Reference) raise TypeError, "An Action object must be passed." end unless self.Catalog raise InvalidPDFError, "A catalog object must exist to add this action." end self.Catalog.OpenAction = action self end
Sets an action to run on document printing.
action |
# File lib/origami/catalog.rb, line 123 def onDocumentPrint(action) unless action.is_a?(Action::JavaScript) or action.is_a?(Reference) raise TypeError, "An Action::JavaScript object must be passed." end unless self.Catalog raise InvalidPDFError, "A catalog object must exist to add this action." end self.Catalog.AA ||= CatalogAdditionalActions.new self.Catalog.AA.WP = action end
Original data parsed to create this document, nil if created from scratch.
# File lib/origami/pdf.rb, line 184 def original_data @parser.target_data if @parser end
Original file name if parsed from disk, nil otherwise.
# File lib/origami/pdf.rb, line 170 def original_filename @parser.target_filename if @parser end
Original file size if parsed from a data stream, nil otherwise.
# File lib/origami/pdf.rb, line 177 def original_filesize @parser.target_filesize if @parser end
Returns an array of Page
# File lib/origami/page.rb, line 74 def pages raise InvalidPDFError, "Invalid page tree" if not self.Catalog or not self.Catalog.Pages or not self.Catalog.Pages.is_a?(PageTreeNode) self.Catalog.Pages.children end
# File lib/origami/metadata.rb, line 58 def producer; get_document_info_field(:Producer) end
Registers an object into a specific Names root dictionary.
root |
The root dictionary (see Names::Root) |
name |
The value name. |
value |
The value to associate with this name. |
# File lib/origami/catalog.rb, line 144 def register(root, name, value) self.Catalog.Names ||= Names.new value.set_indirect(true) unless value.is_a? Reference namesroot = self.Catalog.Names[root] if namesroot.nil? names = NameTreeNode.new(:Names => []).set_indirect(true) self.Catalog.Names[root] = names names.Names << name << value else namesroot.solve[:Names] << name << value end end
Removes a whole document revision.
index |
Revision index, first is 0. |
# File lib/origami/pdf.rb, line 510 def remove_revision(index) if index < 0 or index > @revisions.size raise IndexError, "Not a valid revision index" end if @revisions.size == 1 raise InvalidPDFError, "Cannot remove last revision" end @revisions.delete_at(index) self end
Tries to strip any xrefs information off the document.
# File lib/origami/xreftable.rb, line 33 def remove_xrefs def delete_xrefstm(xrefstm) prev = xrefstm.Prev delete_object(xrefstm.reference) if prev.is_a?(Integer) and (prev_stm = get_object_by_offset(prev)).is_a?(XRefStream) delete_xrefstm(prev_stm) end end @revisions.reverse_each do |rev| if rev.has_xrefstm? delete_xrefstm(rev.xrefstm) end if rev.trailer.has_dictionary? and rev.trailer.XRefStm.is_a?(Integer) xrefstm = get_object_by_offset(rev.trailer.XRefStm) delete_xrefstm(xrefstm) if xrefstm.is_a?(XRefStream) end rev.xrefstm = rev.xreftable = nil end end
Retrieve the corresponding value associated with name in the specified root name directory, or nil if the value does not exist.
# File lib/origami/catalog.rb, line 172 def resolve_name(root, name) namesroot = get_names_root(root) return nil if namesroot.nil? resolve_name_from_node(namesroot, name) end
Saves the current document.
filename |
The path where to save this PDF. |
# File lib/origami/pdf.rb, line 207 def save(path, params = {}) options = { :delinearize => true, :recompile => true, :decrypt => false } options.update(params) if self.frozen? # incompatible flags with frozen doc (signed) options[:recompile] = options[:rebuildxrefs] = options[:noindent] = options[:obfuscate] = false end if path.respond_to?(:write) fd = path else path = File.expand_path(path) fd = File.open(path, 'w').binmode end intents_as_pdfa1 if options[:intent] =~ /pdf[\/-]?A1?/ self.delinearize! if options[:delinearize] and self.is_linearized? compile(options) if options[:recompile] fd.write output(options) fd.close self end
Saves the file up to given revision number. This can be useful to visualize the modifications over different incremental updates.
revision |
The revision number to save. |
filename |
The path where to save this PDF. |
# File lib/origami/pdf.rb, line 248 def save_upto(revision, filename) save(filename, :up_to_revision => revision) end
Serializes the current PDF.
# File lib/origami/pdf.rb, line 191 def serialize(filename) parser = @parser @parser = nil # do not serialize the parser Zlib::GzipWriter.open(filename) { |gz| gz.write Marshal.dump(self) } @parser = parser self end
Sets PDF extension level and version. Only supported values are "1.7" and 3.
# File lib/origami/catalog.rb, line 33 def set_extension_level(version, level) exts = (self.Catalog.Extensions ||= Extensions.new) exts[:ADBE] = DeveloperExtension.new exts[:ADBE].BaseVersion = Name.new(version) exts[:ADBE].ExtensionLevel = level self end
Sign the document with the given key and x509 certificate.
certificate |
The X509 certificate containing the public key. |
key |
The private key associated with the certificate. |
ca |
Optional CA certificates used to sign the user certificate. |
# File lib/origami/signature.rb, line 108 def sign(certificate, key, options = {}) unless Origami::OPTIONS[:use_openssl] fail "OpenSSL is not present or has been disabled." end params = { :method => "adbe.pkcs7.detached", :ca => [], :annotation => nil, :location => nil, :contact => nil, :reason => nil }.update(options) unless certificate.is_a?(OpenSSL::X509::Certificate) raise TypeError, "A OpenSSL::X509::Certificate object must be passed." end unless key.is_a?(OpenSSL::PKey::RSA) raise TypeError, "A OpenSSL::PKey::RSA object must be passed." end ca = params[:ca] unless ca.is_a?(::Array) raise TypeError, "Expected an Array of CA certificate." end annotation = params[:annotation] unless annotation.nil? or annotation.is_a?(Annotation::Widget::Signature) raise TypeError, "Expected a Annotation::Widget::Signature object." end case params[:method] when 'adbe.pkcs7.detached' signfield_size = lambda{|crt,key,ca| datatest = "abcdefghijklmnopqrstuvwxyz" OpenSSL::PKCS7.sign( crt, key, datatest, ca, OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY ).to_der.size + 128 } when 'adbe.pkcs7.sha1' signfield_size = lambda{|crt,key,ca| datatest = "abcdefghijklmnopqrstuvwxyz" OpenSSL::PKCS7.sign( crt, key, Digest::SHA1.digest(datatest), ca, OpenSSL::PKCS7::BINARY ).to_der.size + 128 } when 'adbe.x509.rsa_sha1' signfield_size = lambda{|crt,key,ca| datatest = "abcdefghijklmnopqrstuvwxyz" key.private_encrypt( Digest::SHA1.digest(datatest) ).size + 128 } raise NotImplementedError, "Unsupported method #{params[:method].inspect}" else raise NotImplementedError, "Unsupported method #{params[:method].inspect}" end digsig = Signature::DigitalSignature.new.set_indirect(true) if annotation.nil? annotation = Annotation::Widget::Signature.new annotation.Rect = Rectangle[:llx => 0.0, :lly => 0.0, :urx => 0.0, :ury => 0.0] end annotation.V = digsig add_fields(annotation) self.Catalog.AcroForm.SigFlags = InteractiveForm::SigFlags::SIGNATURESEXIST | InteractiveForm::SigFlags::APPENDONLY digsig.Type = :Sig #:nodoc: digsig.Contents = HexaString.new("\x00" * signfield_size[certificate, key, ca]) #:nodoc: digsig.Filter = Name.new("Adobe.PPKMS") #:nodoc: digsig.SubFilter = Name.new(params[:method]) #:nodoc: digsig.ByteRange = [0, 0, 0, 0] #:nodoc: digsig.Location = HexaString.new(params[:location]) if params[:location] digsig.ContactInfo = HexaString.new(params[:contact]) if params[:contact] digsig.Reason = HexaString.new(params[:reason]) if params[:reason] if params[:method] == 'adbe.x509.rsa_sha1' digsig.Cert = if ca.empty? HexaString.new(certificate.to_der) else [ HexaString.new(certificate.to_der) ] + ca.map{ |crt| HexaString.new(crt.to_der) } end end # # Flattening the PDF to get file view. # compile # # Creating an empty Xref table to compute signature byte range. # rebuild_dummy_xrefs sigoffset = get_object_offset(digsig.no, digsig.generation) + digsig.sigOffset digsig.ByteRange[0] = 0 digsig.ByteRange[1] = sigoffset digsig.ByteRange[2] = sigoffset + digsig.Contents.size digsig.ByteRange[3] = filesize - digsig.ByteRange[2] until digsig.ByteRange[3] == filesize - digsig.ByteRange[2] # From that point the file size remains constant # # Correct Xrefs variations caused by ByteRange modifications. # rebuildxrefs filedata = output() signable_data = filedata[digsig.ByteRange[0],digsig.ByteRange[1]] + filedata[digsig.ByteRange[2],digsig.ByteRange[3]] signature = case params[:method] when 'adbe.pkcs7.detached' OpenSSL::PKCS7.sign( certificate, key, signable_data, ca, OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY ).to_der when 'adbe.pkcs7.sha1' OpenSSL::PKCS7.sign( certificate, key, Digest::SHA1.digest(signable_data), ca, OpenSSL::PKCS7::BINARY ).to_der when 'adbe.x509.rsa_sha1' key.private_encrypt(Digest::SHA1.digest(signable_data)) end digsig.Contents[0, signature.size] = signature # # No more modification are allowed after signing. # self.freeze end
# File lib/origami/signature.rb, line 381 def signature raise SignatureError, "Not a signed document" unless self.is_signed? self.each_field do |field| if field.FT == :Sig and field.V.is_a?(Dictionary) return field.V end end raise SignatureError, "Cannot find digital signature" end
# File lib/origami/metadata.rb, line 55 def subject; get_document_info_field(:Subject) end
# File lib/origami/metadata.rb, line 53 def title; get_document_info_field(:Title) end
Verify a document signature.
Options: _:trusted_: an array of trusted X509 certificates. If no argument is passed, embedded certificates are treated as trusted.
# File lib/origami/signature.rb, line 47 def verify(options = {}) unless Origami::OPTIONS[:use_openssl] fail "OpenSSL is not present or has been disabled." end params = { :trusted => [] }.update(options) digsig = self.signature unless digsig[:Contents].is_a?(String) raise SignatureError, "Invalid digital signature contents" end store = OpenSSL::X509::Store.new params[:trusted].each do |ca| store.add_cert(ca) end flags = 0 flags |= OpenSSL::PKCS7::NOVERIFY if params[:trusted].empty? stream = StringScanner.new(self.original_data) stream.pos = digsig[:Contents].file_offset Object.typeof(stream).parse(stream) endofsig_offset = stream.pos stream.terminate s1,l1,s2,l2 = digsig.ByteRange if s1.value != 0 or (s2.value + l2.value) != self.original_data.size or (s1.value + l1.value) != digsig[:Contents].file_offset or s2.value != endofsig_offset raise SignatureError, "Invalid signature byte range" end data = self.original_data[s1,l1] + self.original_data[s2,l2] case digsig.SubFilter.value.to_s when 'adbe.pkcs7.detached' flags |= OpenSSL::PKCS7::DETACHED p7 = OpenSSL::PKCS7.new(digsig[:Contents].value) raise SignatureError, "Not a PKCS7 detached signature" unless p7.detached? p7.verify([], store, data, flags) when 'adbe.pkcs7.sha1' p7 = OpenSSL::PKCS7.new(digsig[:Contents].value) p7.verify([], store, nil, flags) and p7.data == Digest::SHA1.digest(data) else raise NotImplementedError, "Unsupported method #{digsig.SubFilter}" end end
Generated with the Darkfish Rdoc Generator 2.