ruby-ole-1.2.11.8/ 0000755 0000041 0000041 00000000000 12461443715 013537 5 ustar www-data www-data ruby-ole-1.2.11.8/Rakefile 0000644 0000041 0000041 00000003607 12461443715 015212 0 ustar www-data www-data require 'rubygems'
require 'rake/testtask'
require 'rbconfig'
require 'fileutils'
spec = eval File.read('ruby-ole.gemspec')
task :default => [:test]
Rake::TestTask.new do |t|
t.test_files = FileList["test/test_*.rb"]
t.warning = true
t.verbose = true
end
begin
Rake::TestTask.new(:coverage) do |t|
t.test_files = FileList["test/test_*.rb"]
t.warning = true
t.verbose = true
t.ruby_opts = ['-rsimplecov -e "SimpleCov.start; load(ARGV.shift)"']
end
rescue LoadError
# SimpleCov not available
end
begin
require 'rdoc/task'
RDoc::Task.new do |t|
t.rdoc_dir = 'doc'
t.rdoc_files.include 'lib/**/*.rb'
t.rdoc_files.include 'README', 'ChangeLog'
t.title = "#{PKG_NAME} documentation"
t.options += %w[--line-numbers --inline-source --tab-width 2]
t.main = 'README'
end
rescue LoadError
# RDoc not available or too old (<2.4.2)
end
begin
require 'rubygems/package_task'
Gem::PackageTask.new(spec) do |t|
t.need_tar = true
t.need_zip = false
t.package_dir = 'build'
end
rescue LoadError
# RubyGems too old (<1.3.2)
end
desc 'Run various benchmarks'
task :benchmark do
require 'benchmark'
require 'tempfile'
require 'ole/storage'
# should probably add some read benchmarks too
def write_benchmark opts={}
files, size = opts[:files], opts[:size]
block_size = opts[:block_size] || 100_000
block = 0.chr * block_size
blocks, remaining = size.divmod block_size
remaining = 0.chr * remaining
Tempfile.open 'ole_storage_benchmark' do |temp|
Ole::Storage.open temp do |ole|
files.times do |i|
ole.file.open "file_#{i}", 'w' do |f|
blocks.times { f.write block }
f.write remaining
end
end
end
end
end
Benchmark.bm do |bm|
bm.report 'write_1mb_1x5' do
5.times { write_benchmark :files => 1, :size => 1_000_000 }
end
bm.report 'write_1mb_2x5' do
5.times { write_benchmark :files => 1_000, :size => 1_000 }
end
end
end
ruby-ole-1.2.11.8/bin/ 0000755 0000041 0000041 00000000000 12461443715 014307 5 ustar www-data www-data ruby-ole-1.2.11.8/bin/oletool 0000755 0000041 0000041 00000002400 12461443715 015706 0 ustar www-data www-data #! /usr/bin/ruby
require 'optparse'
require 'ole/storage'
def oletool
opts = {:verbose => false, :action => :tree}
op = OptionParser.new do |op|
op.banner = "Usage: oletool [options] [files]"
op.separator ''
op.on('-t', '--tree', 'Dump ole trees for files (default)') { opts[:action] = :tree }
op.on('-r', '--repack', 'Repack the ole files in canonical form') { opts[:action] = :repack }
op.on('-m', '--mimetype', 'Print the guessed mime types') { opts[:action] = :mimetype }
op.on('-y', '--metadata', 'Dump the internal meta data as YAML') { opts[:action] = :metadata }
op.separator ''
op.on('-v', '--[no-]verbose', 'Run verbosely') { |v| opts[:verbose] = v }
op.on_tail('-h', '--help', 'Show this message') { puts op; exit }
end
files = op.parse ARGV
if files.empty?
puts 'Must specify 1 or more msg files.'
puts op
exit 1
end
Ole::Log.level = opts[:verbose] ? Logger::WARN : Logger::FATAL
files.each do |file|
case opts[:action]
when :tree
Ole::Storage.open(file) { |ole| puts ole.root.to_tree }
when :repack
Ole::Storage.open file, 'rb+', &:repack
when :metadata
Ole::Storage.open(file) { |ole| y ole.meta_data.to_h }
when :mimetype
puts Ole::Storage.open(file) { |ole| ole.meta_data.mime_type }
end
end
end
oletool
ruby-ole-1.2.11.8/README 0000644 0000041 0000041 00000010554 12461443715 014424 0 ustar www-data www-data = Introduction
The ruby-ole library provides a variety of functions primarily for
working with OLE2 structured storage files, such as those produced by
Microsoft Office - eg *.doc, *.msg etc.
= Example Usage
Here are some examples of how to use the library functionality,
categorised roughly by purpose.
1. Reading and writing files within an OLE container
The recommended way to manipulate the contents is via the
"file_system" API, whereby you use Ole::Storage instance methods
similar to the regular File and Dir class methods.
ole = Ole::Storage.open('oleWithDirs.ole', 'rb+')
p ole.dir.entries('.') # => [".", "..", "dir1", "dir2", "file1"]
p ole.file.read('file1')[0, 25] # => "this is the entry 'file1'"
ole.dir.mkdir('newdir')
2. Accessing OLE meta data
Some convenience functions are provided for (currently read only)
access to OLE property sets and other sources of meta data.
ole = Ole::Storage.open('test_word_95.doc')
p ole.meta_data.file_format # => "MSWordDoc"
p ole.meta_data.mime_type # => "application/msword"
p ole.meta_data.doc_author.split.first # => "Charles"
3. Raw access to underlying OLE internals
This is probably of little interest to most developers using the
library, but for some use cases you may need to drop down to the
lower level API on which the "file_system" API is constructed,
which exposes more of the format details.
Ole::Storage files can have multiple files with the same name,
or with a slash in the name, and other things that are probably
strictly invalid. This API is the only way to access those files.
You can access the header object directly:
p ole.header.num_sbat # => 1
p ole.header.magic.unpack('H*') # => ["d0cf11e0a1b11ae1"]
You can directly access the array of all Dirent objects,
including the root:
p ole.dirents.length # => 5
puts ole.root.to_tree
# =>
- #
|- #
|- #
|- #
\- #
You can access (through RangesIO methods, or by using the
relevant Dirent and AllocationTable methods) information like where within
the container a stream is located (these are offset/length pairs):
p ole.root["\001CompObj"].open { |io| io.ranges } # => [[0, 64], [64, 34]]
See the documentation for each class for more details.
= Thanks
* The code contained in this project was initially based on chicago's libole
(source available at http://prdownloads.sf.net/chicago/ole.tgz).
* It was later augmented with some corrections by inspecting pole, and (purely
for header definitions) gsf.
* The property set parsing code came from the apache java project POIFS.
* The excellent idea for using a pseudo file system style interface by providing
#file and #dir methods which mimic File and Dir, was borrowed (along with almost
unchanged tests!) from Thomas Sondergaard's rubyzip.
= TODO
== 1.2.12
* internal api cleanup
* add buffering to rangesio so that performance for small reads and writes
isn't so awful. maybe try and remove the bottlenecks of unbuffered first
with more profiling, then implement the buffering on top of that.
* fix mode strings - like truncate when using 'w+', supporting append
'a+' modes etc. done?
* make ranges io obey readable vs writeable modes.
* more RangesIO completion. ie, doesn't support #<< at the moment.
* maybe some oletool doc.
* make sure `rake test' runs tests both with $KCODE='UTF8', and without,
and maybe ensure i don't regress on 1.9 and jruby either now that they're
fixed.
== 1.3.1
* case insensitive open mode would be nice
* fix property sets a bit more. see TODO in Ole::Storage::MetaData
* ability to zero out padding and unused blocks
* better tests for mbat support.
* further doc cleanup
* add in place testing for jruby and ruby1.9
== Longer term
* more benchmarking, profiling, and speed fixes. was thinking vs other
ruby filesystems (eg, vs File/Dir itself, and vs rubyzip), and vs other
ole implementations (maybe perl's, and poifs) just to check its in the
ballpark, with no remaining silly bottlenecks.
* supposedly vba does something weird to ole files. test that.
ruby-ole-1.2.11.8/lib/ 0000755 0000041 0000041 00000000000 12461443715 014305 5 ustar www-data www-data ruby-ole-1.2.11.8/lib/ole/ 0000755 0000041 0000041 00000000000 12461443715 015064 5 ustar www-data www-data ruby-ole-1.2.11.8/lib/ole/storage/ 0000755 0000041 0000041 00000000000 12461443715 016530 5 ustar www-data www-data ruby-ole-1.2.11.8/lib/ole/storage/version.rb 0000644 0000041 0000041 00000000135 12461443715 020541 0 ustar www-data www-data # encoding: ASCII-8BIT
module Ole # :nodoc:
class Storage
VERSION = '1.2.11.8'
end
end
ruby-ole-1.2.11.8/lib/ole/storage/file_system.rb 0000644 0000041 0000041 00000024234 12461443715 021405 0 ustar www-data www-data # encoding: ASCII-8BIT
#
# = Introduction
#
# This file intends to provide file system-like api support, a la zip/zipfilesystem.
#
# = TODO
#
# - need to implement some more IO functions on RangesIO, like #puts, #print
# etc, like AbstractOutputStream from zipfile.
#
# - check Dir.mkdir, and File.open, and File.rename, to add in filename
# length checks (max 32 / 31 or something).
# do the automatic truncation, and add in any necessary warnings.
#
# - File.split('a/') == File.split('a') == ['.', 'a']
# the implication of this, is that things that try to force directory
# don't work. like, File.rename('a', 'b'), should work if a is a file
# or directory, but File.rename('a/', 'b') should only work if a is
# a directory. tricky, need to clean things up a bit more.
# i think a general path name => dirent method would work, with flags
# about what should raise an error.
#
# - Need to look at streamlining things after getting all the tests passing,
# as this file's getting pretty long - almost half the real implementation.
# and is probably more inefficient than necessary.
# too many exceptions in the expected path of certain functions.
#
# - should look at profiles before and after switching ruby-msg to use
# the filesystem api.
#
module Ole # :nodoc:
class Storage
def file
@file ||= FileClass.new self
end
def dir
@dir ||= DirClass.new self
end
# tries to get a dirent for path. return nil if it doesn't exist
# (change it)
def dirent_from_path path
dirent = @root
path = file.expand_path(path).split('/')
until path.empty?
part = path.shift
next if part.empty?
return nil if dirent.file?
return nil unless dirent = dirent/part
end
dirent
end
class FileClass
class Stat
attr_reader :ftype, :size, :blocks, :blksize
attr_reader :nlink, :uid, :gid, :dev, :rdev, :ino
def initialize dirent
@dirent = dirent
@size = dirent.size
if file?
@ftype = 'file'
bat = dirent.ole.bat_for_size(dirent.size)
@blocks = bat.chain(dirent.first_block).length
@blksize = bat.block_size
else
@ftype = 'directory'
@blocks = 0
@blksize = 0
end
# a lot of these are bogus. ole file format has no analogs
@nlink = 1
@uid, @gid = 0, 0
@dev, @rdev = 0, 0
@ino = 0
# need to add times - atime, mtime, ctime.
end
alias rdev_major :rdev
alias rdev_minor :rdev
def file?
@dirent.file?
end
def directory?
@dirent.dir?
end
def size?
size if file?
end
def inspect
pairs = (instance_variables - ['@dirent']).map do |n|
"#{n[1..-1]}=#{instance_variable_get n}"
end
"#<#{self.class} #{pairs * ', '}>"
end
end
def initialize ole
@ole = ole
end
def expand_path path
# its already absolute if it starts with a '/'
unless path =~ /^\//
# get the raw stored pwd value (its blank for root)
pwd = @ole.dir.instance_variable_get :@pwd
path = "#{pwd}/#{path}"
end
# at this point its already absolute. we use File.expand_path
# just for the .. and . handling
# No longer use RUBY_PLATFORM =~ /win/ as it matches darwin. better way?
if File::ALT_SEPARATOR != "\\"
File.expand_path(path)
else
File.expand_path(path)[2..-1]
end
end
# +orig_path+ is just so that we can use the requested path
# in the error messages even if it has been already modified
def dirent_from_path path, orig_path=nil
orig_path ||= path
dirent = @ole.dirent_from_path path
raise Errno::ENOENT, orig_path unless dirent
raise Errno::EISDIR, orig_path if dirent.dir?
dirent
end
private :dirent_from_path
def exists? path
!!@ole.dirent_from_path(path)
end
alias exist? :exists?
def file? path
dirent = @ole.dirent_from_path path
dirent and dirent.file?
end
def directory? path
dirent = @ole.dirent_from_path path
dirent and dirent.dir?
end
def open path, mode='r', &block
if IOMode.new(mode).create?
begin
dirent = dirent_from_path path
rescue Errno::ENOENT
# maybe instead of repeating this everywhere, i should have
# a get_parent_dirent function.
parent_path, basename = File.split expand_path(path)
parent = @ole.dir.send :dirent_from_path, parent_path, path
parent << dirent = Dirent.new(@ole, :type => :file, :name => basename)
end
else
dirent = dirent_from_path path
end
dirent.open mode, &block
end
# explicit wrapper instead of alias to inhibit block
def new path, mode='r'
open path, mode
end
def size path
dirent_from_path(path).size
rescue Errno::EISDIR
# kind of arbitrary. I'm getting 4096 from ::File, but
# the zip tests want 0.
0
end
def size? path
dirent_from_path(path).size
# any other exceptions i need to rescue?
rescue Errno::ENOENT, Errno::EISDIR
nil
end
def stat path
# we do this to allow dirs.
dirent = @ole.dirent_from_path path
raise Errno::ENOENT, path unless dirent
Stat.new dirent
end
def read path
open path, &:read
end
# most of the work this function does is moving the dirent between
# 2 parents. the actual name changing is quite simple.
# File.rename can move a file into another folder, which is why i've
# done it too, though i think its not always possible...
#
# FIXME File.rename can be used for directories too....
def rename from_path, to_path
# check what we want to rename from exists. do it this
# way to allow directories.
dirent = @ole.dirent_from_path from_path
raise Errno::ENOENT, from_path unless dirent
# delete what we want to rename to if necessary
begin
unlink to_path
rescue Errno::ENOENT
# we actually get here, but rcov doesn't think so. add 1 + 1 to
# keep rcov happy for now... :)
1 + 1
end
# reparent the dirent
to_parent_path, to_basename = File.split expand_path(to_path)
from_parent = dirent.parent
to_parent = @ole.dir.send :dirent_from_path, to_parent_path, to_path
from_parent.delete dirent, false
# and also change its name
dirent.name = to_basename
to_parent << dirent
0
end
def unlink(*paths)
paths.each do |path|
dirent = dirent_from_path path
dirent.parent.delete dirent
end
paths.length # hmmm. as per ::File ?
end
alias delete :unlink
end
#
# An *instance* of this class is supposed to provide similar methods
# to the class methods of Dir itself.
#
# Fairly complete - like zip/zipfilesystem's implementation, i provide
# everything except chroot and glob. glob could be done with a glob
# to regex conversion, and then simply match in the entries array...
# although recursive glob complicates that somewhat.
#
# Dir.chroot, Dir.glob, Dir.[], and Dir.tmpdir is the complete list of
# methods still missing.
#
class DirClass
def initialize ole
@ole = ole
@pwd = ''
end
# +orig_path+ is just so that we can use the requested path
# in the error messages even if it has been already modified
def dirent_from_path path, orig_path=nil
orig_path ||= path
dirent = @ole.dirent_from_path path
raise Errno::ENOENT, orig_path unless dirent
raise Errno::ENOTDIR, orig_path unless dirent.dir?
dirent
end
private :dirent_from_path
def open path
dir = Dir.new path, entries(path)
return dir unless block_given?
yield dir
end
# as for file, explicit alias to inhibit block
def new path
open path
end
# pwd is always stored without the trailing slash. we handle
# the root case here
def pwd
return '/' if @pwd.empty?
@pwd
end
alias getwd :pwd
def chdir orig_path
# make path absolute, squeeze slashes, and remove trailing slash
path = @ole.file.expand_path(orig_path).squeeze('/').sub(/\/$/, '')
# this is just for the side effects of the exceptions if invalid
dirent_from_path path, orig_path
if block_given?
old_pwd = @pwd
begin
@pwd = path
yield
ensure
@pwd = old_pwd
end
else
@pwd = path
0
end
end
def entries path
dirent = dirent_from_path path
# Not sure about adding on the dots...
entries = %w[. ..] + dirent.children.map(&:name)
# do some checks about un-reachable files
seen = {}
entries.each do |n|
Log.warn "inaccessible file (filename contains slash) - #{n.inspect}" if n['/']
Log.warn "inaccessible file (duplicate filename) - #{n.inspect}" if seen[n]
seen[n] = true
end
entries
end
def foreach path, &block
entries(path).each(&block)
end
def mkdir path
parent_path, basename = File.split @ole.file.expand_path(path)
# note that we will complain about the full path despite accessing
# the parent path. this is consistent with ::Dir
parent = dirent_from_path parent_path, path
# now, we first should ensure that it doesn't already exist
# either as a file or a directory.
raise Errno::EEXIST, path if parent/basename
parent << Dirent.new(@ole, :type => :dir, :name => basename)
0
end
def rmdir path
dirent = dirent_from_path path
raise Errno::ENOTEMPTY, path unless dirent.children.empty?
dirent.parent.delete dirent
0 # hmmm. as per ::Dir ?
end
alias delete :rmdir
alias unlink :rmdir
# note that there is nothing remotely ole specific about
# this class. it simply provides the dir like sequential access
# methods on top of an array.
class Dir
include Enumerable
attr_reader :path
def initialize path, entries
@path, @entries, @pos = path, entries, 0
@closed = false
end
def pos
raise IOError if @closed
@pos
end
def each(&block)
raise IOError if @closed
@entries.each(&block)
end
def close
@closed = true
end
def read
raise IOError if @closed
@entries[pos]
ensure
@pos += 1 if pos < @entries.length
end
def pos= pos
raise IOError if @closed
@pos = [[0, pos].max, @entries.length].min
end
alias tell :pos
alias seek :pos=
def rewind
seek 0
end
end
end
end
end
ruby-ole-1.2.11.8/lib/ole/storage/meta_data.rb 0000644 0000041 0000041 00000010735 12461443715 021002 0 ustar www-data www-data # encoding: ASCII-8BIT
require 'ole/types/property_set'
module Ole
class Storage
#
# The MetaData class is designed to be high level interface to all the
# underlying meta data stored within different sections, themselves within
# different property set streams.
#
# With this class, you can simply get properties using their names, without
# needing to know about the underlying guids, property ids etc.
#
# Example:
#
# Ole::Storage.open('test.doc') { |ole| p ole.meta_data.doc_author }
#
# TODO:
#
# * add write support
# * fix some of the missing type coercion (eg FileTime)
# * maybe add back the ability to access individual property sets as a unit
# directly. ie ole.summary_information. Is this useful?
# * full key support, for unknown keys, like
# ole.meta_data[myguid, myid]. probably needed for user-defined
# properties too.
#
class MetaData
include Enumerable
FILE_MAP = {
Types::PropertySet::FMTID_SummaryInformation => "\005SummaryInformation",
Types::PropertySet::FMTID_DocSummaryInfo => "\005DocumentSummaryInformation"
}
FORMAT_MAP = {
'MSWordDoc' => :doc
}
CLSID_EXCEL97 = Types::Clsid.parse "{00020820-0000-0000-c000-000000000046}"
CLSID_EXCEL95 = Types::Clsid.parse "{00020810-0000-0000-c000-000000000046}"
CLSID_WORD97 = Types::Clsid.parse "{00020906-0000-0000-c000-000000000046}"
CLSID_WORD95 = Types::Clsid.parse "{00020900-0000-0000-c000-000000000046}"
CLSID_MAP = {
CLSID_EXCEL97 => :xls,
CLSID_EXCEL95 => :xls,
CLSID_WORD97 => :doc,
CLSID_WORD95 => :doc
}
MIME_TYPES = {
:xls => 'application/vnd.ms-excel',
:doc => 'application/msword',
:ppt => 'application/vnd.ms-powerpoint',
# not registered at IANA, but seems most common usage
:msg => 'application/vnd.ms-outlook',
# this is my default fallback option. also not registered at IANA.
# file(1)'s default is application/msword, which is useless...
nil => 'application/x-ole-storage'
}
def initialize ole
@ole = ole
end
# i'm thinking of making file_format and mime_type available through
# #[], #each, and #to_h also, as calculated meta data (not assignable)
def comp_obj
return {} unless dirent = @ole.root["\001CompObj"]
data = dirent.read
# see - https://gnunet.org/svn/Extractor/doc/StarWrite_File_Format.html
# compobj_version: 0x0001
# byte_order: 0xffe
# windows_version: 0x00000a03 (win31 apparently)
# marker: 0xffffffff
# compobj_version, byte_order, windows_version, marker, clsid =
# data.unpack("vvVVa#{Types::Clsid::SIZE}")
strings = []
i = 28
while i < data.length
len = data[i, 4].unpack('V').first
i += 4
strings << data[i, len - 1]
i += len
end
# in the unknown chunk, you usually see something like 'Word.Document.6'
{:username => strings[0], :file_format => strings[1], :unknown => strings[2..-1]}
end
private :comp_obj
def file_format
comp_obj[:file_format]
end
def mime_type
# based on the CompObj stream contents
type = FORMAT_MAP[file_format]
return MIME_TYPES[type] if type
# based on the root clsid
type = CLSID_MAP[Types::Clsid.load(@ole.root.clsid)]
return MIME_TYPES[type] if type
# fallback to heuristics
has_file = Hash[*@ole.root.children.map { |d| [d.name.downcase, true] }.flatten]
return MIME_TYPES[:msg] if has_file['__nameid_version1.0'] or has_file['__properties_version1.0']
return MIME_TYPES[:doc] if has_file['worddocument'] or has_file['document']
return MIME_TYPES[:xls] if has_file['workbook'] or has_file['book']
MIME_TYPES[nil]
end
def [] key
pair = Types::PropertySet::PROPERTY_MAP[key.to_s] or return nil
file = FILE_MAP[pair.first] or return nil
dirent = @ole.root[file] or return nil
dirent.open { |io| return Types::PropertySet.new(io)[key] }
end
def []= key, value
raise NotImplementedError, 'meta data writes not implemented'
end
def each(&block)
FILE_MAP.values.each do |file|
dirent = @ole.root[file] or next
dirent.open { |io| Types::PropertySet.new(io).each(&block) }
end
end
def to_h
inject({}) { |hash, (name, value)| hash.update name.to_sym => value }
end
def method_missing name, *args, &block
return super unless args.empty?
return super unless Types::PropertySet::PROPERTY_MAP[name.to_s]
self[name]
end
end
def meta_data
@meta_data ||= MetaData.new(self)
end
end
end
ruby-ole-1.2.11.8/lib/ole/storage/base.rb 0000644 0000041 0000041 00000077604 12461443715 020005 0 ustar www-data www-data # encoding: ASCII-8BIT
require 'tempfile'
require 'ole/base'
require 'ole/types'
require 'ole/ranges_io'
module Ole # :nodoc:
#
# This class is the primary way the user interacts with an OLE storage file.
#
# = TODO
#
# * the custom header cruft for Header and Dirent needs some love.
# * i have a number of classes doing load/save combos: Header, AllocationTable, Dirent,
# and, in a manner of speaking, but arguably different, Storage itself.
# they have differing api's which would be nice to rethink.
# AllocationTable::Big must be created aot now, as it is used for all subsequent reads.
#
class Storage
# thrown for any bogus OLE file errors.
class FormatError < StandardError # :nodoc:
end
# options used at creation time
attr_reader :params
# The top of the ole tree structure
attr_reader :root
# The tree structure in its original flattened form. only valid after #load, or #flush.
attr_reader :dirents
# The underlying io object to/from which the ole object is serialized, whether we
# should close it, and whether it is writeable
attr_reader :io, :close_parent, :writeable
# Low level internals, you probably shouldn't need to mess with these
attr_reader :header, :bbat, :sbat, :sb_file
# +arg+ should be either a filename, or an +IO+ object, and needs to be seekable.
# +mode+ is optional, and should be a regular mode string.
def initialize arg, mode=nil, params={}
params, mode = mode, nil if Hash === mode
params = {:update_timestamps => true}.merge(params)
@params = params
# get the io object
@close_parent, @io = if String === arg
mode ||= 'rb'
[true, open(arg, mode)]
else
raise ArgumentError, 'unable to specify mode string with io object' if mode
[false, arg]
end
# force encoding, to avoid picking up source encoding with StringIO or files in text mode
@io.set_encoding Encoding::ASCII_8BIT if @io.respond_to?(:set_encoding)
# do we have this file opened for writing? use mode when provided,
# otherwise try no-op methods which will raise if read-only
@writeable = begin
if mode
IOMode.new(mode).writeable?
else
# works on mri 1.8 & jruby
@io.flush
begin
# works on mri 1.9 & rubinius, throws EBADF on windows
@io.write_nonblock('') if @io.respond_to?(:write_nonblock)
rescue Errno::EBADF
# for windows
@io.syswrite('');
end
true
end
rescue IOError
false
end
# silence undefined warning in clear
@sb_file = nil
# if the io object has data, we should load it, otherwise start afresh
# this should be based on the mode string rather.
@io.size > 0 ? load : clear
end
# somewhat similar to File.open, the open class method allows a block form where
# the Ole::Storage object is automatically closed on completion of the block.
def self.open arg, mode=nil, params={}
ole = new arg, mode, params
if block_given?
begin yield ole
ensure; ole.close
end
else ole
end
end
# load document from file.
#
# TODO: implement various allocationtable checks, maybe as a AllocationTable#fsck function :)
#
# 1. reterminate any chain not ending in EOC.
# compare file size with actually allocated blocks per file.
# 2. pass through all chain heads looking for collisions, and making sure nothing points to them
# (ie they are really heads). in both sbat and mbat
# 3. we know the locations of the bbat data, and mbat data. ensure that there are placeholder blocks
# in the bat for them.
# 4. maybe a check of excess data. if there is data outside the bbat.truncate.length + 1 * block_size,
# (eg what is used for truncate in #flush), then maybe add some sort of message about that. it
# will be automatically thrown away at close time.
def load
# we always read 512 for the header block. if the block size ends up being different,
# what happens to the 109 fat entries. are there more/less entries?
@io.rewind
header_block = @io.read 512
@header = Header.new header_block
# create an empty bbat.
@bbat = AllocationTable::Big.new self
bbat_chain = header_block[Header::SIZE..-1].unpack 'V*'
mbat_block = @header.mbat_start
@header.num_mbat.times do
blocks = @bbat.read([mbat_block]).unpack 'V*'
mbat_block = blocks.pop
bbat_chain += blocks
end
# am i using num_bat in the right way?
@bbat.load @bbat.read(bbat_chain[0, @header.num_bat])
# get block chain for directories, read it, then split it into chunks and load the
# directory entries. semantics changed - used to cut at first dir where dir.type == 0
@dirents = @bbat.read(@header.dirent_start).to_enum(:each_chunk, Dirent::SIZE).
map { |str| Dirent.new self, str }
# now reorder from flat into a tree
# links are stored in some kind of balanced binary tree
# check that everything is visited at least, and at most once
# similarly with the blocks of the file.
# was thinking of moving this to Dirent.to_tree instead.
class << @dirents
def to_tree idx=0
return [] if idx == Dirent::EOT
d = self[idx]
to_tree(d.child).each { |child| d << child }
raise FormatError, "directory #{d.inspect} used twice" if d.idx
d.idx = idx
to_tree(d.prev) + [d] + to_tree(d.next)
end
end
@root = @dirents.to_tree.first
@dirents.reject! { |d| d.type_id == 0 }
# silence this warning by default, its not really important (issue #5).
# fairly common one appears to be "R" (from office OS X?) which smells
# like some kind of UTF16 snafu, but scottwillson also has had some kanji...
#Log.warn "root name was #{@root.name.inspect}" unless @root.name == 'Root Entry'
unused = @dirents.reject(&:idx).length
Log.warn "#{unused} unused directories" if unused > 0
# FIXME i don't currently use @header.num_sbat which i should
# hmm. nor do i write it. it means what exactly again?
# which mode to use here?
@sb_file = RangesIOResizeable.new @bbat, :first_block => @root.first_block, :size => @root.size
@sbat = AllocationTable::Small.new self
@sbat.load @bbat.read(@header.sbat_start)
end
def close
@sb_file.close
flush if @writeable
@io.close if @close_parent
end
# the flush method is the main "save" method. all file contents are always
# written directly to the file by the RangesIO objects, all this method does
# is write out all the file meta data - dirents, allocation tables, file header
# etc.
#
# maybe add an option to zero the padding, and any remaining avail blocks in the
# allocation table.
#
# TODO: long and overly complex. simplify and test better. eg, perhaps move serialization
# of bbat to AllocationTable::Big.
def flush
# update root dirent, and flatten dirent tree
@root.name = 'Root Entry'
@root.first_block = @sb_file.first_block
@root.size = @sb_file.size
@dirents = @root.flatten
# serialize the dirents using the bbat
RangesIOResizeable.open @bbat, 'w', :first_block => @header.dirent_start do |io|
io.write @dirents.map { |dirent| dirent.to_s }.join
padding = (io.size / @bbat.block_size.to_f).ceil * @bbat.block_size - io.size
io.write 0.chr * padding
@header.dirent_start = io.first_block
end
# serialize the sbat
# perhaps the blocks used by the sbat should be marked with BAT?
RangesIOResizeable.open @bbat, 'w', :first_block => @header.sbat_start do |io|
io.write @sbat.to_s
@header.sbat_start = io.first_block
@header.num_sbat = @bbat.chain(@header.sbat_start).length
end
# create RangesIOResizeable hooked up to the bbat. use that to claim bbat blocks using
# truncate. then when its time to write, convert that chain and some chunk of blocks at
# the end, into META_BAT blocks. write out the chain, and those meta bat blocks, and its
# done.
# this is perhaps not good, as we reclaim all bat blocks here, which
# may include the sbat we just wrote. FIXME
@bbat.map! do |b|
b == AllocationTable::BAT || b == AllocationTable::META_BAT ? AllocationTable::AVAIL : b
end
# currently we use a loop. this could be better, but basically,
# the act of writing out the bat, itself requires blocks which get
# recorded in the bat.
#
# i'm sure that there'd be some simpler closed form solution to this. solve
# recursive func:
#
# num_mbat_blocks = ceil(max((mbat_len - 109) * 4 / block_size, 0))
# bbat_len = initial_bbat_len + num_mbat_blocks
# mbat_len = ceil(bbat_len * 4 / block_size)
#
# the actual bbat allocation table is itself stored throughout the file, and that chain
# is stored in the initial blocks, and the mbat blocks.
num_mbat_blocks = 0
io = RangesIOResizeable.new @bbat, 'w', :first_block => AllocationTable::EOC
# truncate now, so that we can simplify size calcs - the mbat blocks will be appended in a
# contiguous chunk at the end.
# hmmm, i think this truncate should be matched with a truncate of the underlying io. if you
# delete a lot of stuff, and free up trailing blocks, the file size never shrinks. this can
# be fixed easily, add an io truncate
@bbat.truncate!
@io.truncate @bbat.block_size * (@bbat.length + 1)
while true
# get total bbat size. equivalent to @bbat.to_s.length, but for the factoring in of
# the mbat blocks. we can't just add the mbat blocks directly to the bbat, as as this iteration
# progresses, more blocks may be needed for the bat itself (if there are no more gaps), and the
# mbat must remain contiguous.
bbat_data_len = ((@bbat.length + num_mbat_blocks) * 4 / @bbat.block_size.to_f).ceil * @bbat.block_size
# now storing the excess mbat blocks also increases the size of the bbat:
new_num_mbat_blocks = ([bbat_data_len / @bbat.block_size - 109, 0].max * 4 / (@bbat.block_size.to_f - 4)).ceil
if new_num_mbat_blocks != num_mbat_blocks
# need more space for the mbat.
num_mbat_blocks = new_num_mbat_blocks
elsif io.size != bbat_data_len
# need more space for the bat
# this may grow the bbat, depending on existing available blocks
io.truncate bbat_data_len
else
break
end
end
# now extract the info we want:
ranges = io.ranges
bbat_chain = @bbat.chain io.first_block
io.close
bbat_chain.each { |b| @bbat[b] = AllocationTable::BAT }
# tack on the mbat stuff
@header.num_bat = bbat_chain.length
mbat_blocks = (0...num_mbat_blocks).map do
block = @bbat.free_block
@bbat[block] = AllocationTable::META_BAT
block
end
@header.mbat_start = mbat_blocks.first || AllocationTable::EOC
# now finally write the bbat, using a not resizable io.
# the mode here will be 'r', which allows write atm.
RangesIO.open(@io, :ranges => ranges) { |f| f.write @bbat.to_s }
# this is the mbat. pad it out.
bbat_chain += [AllocationTable::AVAIL] * [109 - bbat_chain.length, 0].max
@header.num_mbat = num_mbat_blocks
if num_mbat_blocks != 0
# write out the mbat blocks now. first of all, where are they going to be?
mbat_data = bbat_chain[109..-1]
# expand the mbat_data to include the linked list forward pointers.
mbat_data = mbat_data.to_enum(:each_slice, @bbat.block_size / 4 - 1).to_a.
zip(mbat_blocks[1..-1] + [nil]).map { |a, b| b ? a + [b] : a }
# pad out the last one.
mbat_data.last.push(*([AllocationTable::AVAIL] * (@bbat.block_size / 4 - mbat_data.last.length)))
RangesIO.open @io, :ranges => @bbat.ranges(mbat_blocks) do |f|
f.write mbat_data.flatten.pack('V*')
end
end
# now seek back and write the header out
@io.seek 0
@io.write @header.to_s + bbat_chain[0, 109].pack('V*')
@io.flush
end
def clear
# initialize to equivalent of loading an empty ole document.
Log.warn 'creating new ole storage object on non-writable io' unless @writeable
@header = Header.new
@bbat = AllocationTable::Big.new self
@root = Dirent.new self, :type => :root, :name => 'Root Entry'
@dirents = [@root]
@root.idx = 0
@sb_file.close if @sb_file
@sb_file = RangesIOResizeable.new @bbat, :first_block => AllocationTable::EOC
@sbat = AllocationTable::Small.new self
# throw everything else the hell away
@io.truncate 0
end
# could be useful with mis-behaving ole documents. or to just clean them up.
def repack temp=:file
case temp
when :file
Tempfile.open 'ole-repack' do |io|
io.binmode
repack_using_io io
end
when :mem; StringIO.open('', &method(:repack_using_io))
else raise ArgumentError, "unknown temp backing #{temp.inspect}"
end
end
def repack_using_io temp_io
@io.rewind
IO.copy @io, temp_io
clear
Storage.open temp_io, nil, @params do |temp_ole|
#temp_ole.root.type = :dir
Dirent.copy temp_ole.root, root
end
end
def bat_for_size size
# note >=, not > previously.
size >= @header.threshold ? @bbat : @sbat
end
def inspect
"#<#{self.class} io=#{@io.inspect} root=#{@root.inspect}>"
end
#
# A class which wraps the ole header
#
# Header.new can be both used to load from a string, or to create from
# defaults. Serialization is accomplished with the #to_s method.
#
class Header < Struct.new(
:magic, :clsid, :minor_ver, :major_ver, :byte_order, :b_shift, :s_shift,
:reserved, :csectdir, :num_bat, :dirent_start, :transacting_signature, :threshold,
:sbat_start, :num_sbat, :mbat_start, :num_mbat
)
PACK = 'a8 a16 v2 a2 v2 a6 V3 a4 V5'
SIZE = 0x4c
# i have seen it pointed out that the first 4 bytes of hex,
# 0xd0cf11e0, is supposed to spell out docfile. hmmm :)
MAGIC = "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" # expected value of Header#magic
# what you get if creating new header from scratch.
# AllocationTable::EOC isn't available yet. meh.
EOC = 0xfffffffe
DEFAULT = [
MAGIC, 0.chr * 16, 59, 3, "\xfe\xff", 9, 6,
0.chr * 6, 0, 1, EOC, 0.chr * 4,
4096, EOC, 0, EOC, 0
]
def initialize values=DEFAULT
values = values.unpack(PACK) if String === values
super(*values)
validate!
end
def to_s
to_a.pack PACK
end
def validate!
raise FormatError, "OLE2 signature is invalid" unless magic == MAGIC
if num_bat == 0 or # is that valid for a completely empty file?
# not sure about this one. basically to do max possible bat given size of mbat
num_bat > 109 && num_bat > 109 + num_mbat * (1 << b_shift - 2) or
# shouldn't need to use the mbat as there is enough space in the header block
num_bat < 109 && num_mbat != 0 or
# given the size of the header is 76, if b_shift <= 6, blocks address the header.
s_shift > b_shift or b_shift <= 6 or b_shift >= 31 or
# we only handle little endian
byte_order != "\xfe\xff"
raise FormatError, "not valid OLE2 structured storage file"
end
# relaxed this, due to test-msg/qwerty_[1-3]*.msg they all had
# 3 for this value.
# transacting_signature != "\x00" * 4 or
if threshold != 4096 or
num_mbat == 0 && ![AllocationTable::EOC, AllocationTable::AVAIL].include?(mbat_start) or
reserved != "\x00" * 6
Log.warn "may not be a valid OLE2 structured storage file"
end
true
end
end
#
# +AllocationTable+'s hold the chains corresponding to files. Given
# an initial index, AllocationTable#chain follows the chain, returning
# the blocks that make up that file.
#
# There are 2 allocation tables, the bbat, and sbat, for big and small
# blocks respectively. The block chain should be loaded using either
# Storage#read_big_blocks or Storage#read_small_blocks
# as appropriate.
#
# Whether or not big or small blocks are used for a file depends on
# whether its size is over the Header#threshold level.
#
# An Ole::Storage document is serialized as a series of directory objects,
# which are stored in blocks throughout the file. The blocks are either
# big or small, and are accessed using the AllocationTable.
#
# The bbat allocation table's data is stored in the spare room in the header
# block, and in extra blocks throughout the file as referenced by the meta
# bat. That chain is linear, as there is no higher level table.
#
# AllocationTable.new is used to create an empty table. It can parse a string
# with the #load method. Serialization is accomplished with the #to_s method.
#
class AllocationTable < Array
# a free block (I don't currently leave any blocks free), although I do pad out
# the allocation table with AVAIL to the block size.
AVAIL = 0xffffffff
EOC = 0xfffffffe # end of a chain
# these blocks are used for storing the allocation table chains
BAT = 0xfffffffd
META_BAT = 0xfffffffc
attr_reader :ole, :io, :block_size
def initialize ole
@ole = ole
@sparse = true
super()
end
def load data
replace data.unpack('V*')
end
def truncate
# this strips trailing AVAILs. come to think of it, this has the potential to break
# bogus ole. if you terminate using AVAIL instead of EOC, like I did before. but that is
# very broken. however, if a chain ends with AVAIL, it should probably be fixed to EOC
# at load time.
temp = reverse
not_avail = temp.find { |b| b != AVAIL } and temp = temp[temp.index(not_avail)..-1]
temp.reverse
end
def truncate!
replace truncate
end
def to_s
table = truncate
# pad it out some
num = @ole.bbat.block_size / 4
# do you really use AVAIL? they probably extend past end of file, and may shortly
# be used for the bat. not really good.
table += [AVAIL] * (num - (table.length % num)) if (table.length % num) != 0
table.pack 'V*'
end
# rewrote this to be non-recursive as it broke on a large attachment
# chain with a stack error
def chain idx
a = []
until idx >= META_BAT
raise FormatError, "broken allocationtable chain" if idx < 0 || idx > length
a << idx
idx = self[idx]
end
Log.warn "invalid chain terminator #{idx}" unless idx == EOC
a
end
# Turn a chain (an array given by +chain+) of blocks (optionally
# truncated to +size+) into an array of arrays describing the stretches of
# bytes in the file that it belongs to.
#
# The blocks are Big or Small blocks depending on the table type.
def blocks_to_ranges chain, size=nil
# truncate the chain if required
chain = chain[0, (size.to_f / block_size).ceil] if size
# convert chain to ranges of the block size
ranges = chain.map { |i| [block_size * i, block_size] }
# truncate final range if required
ranges.last[1] -= (ranges.length * block_size - size) if ranges.last and size
ranges
end
def ranges chain, size=nil
chain = self.chain(chain) unless Array === chain
blocks_to_ranges chain, size
end
# quick shortcut. chain can be either a head (in which case the table is used to
# turn it into a chain), or a chain. it is converted to ranges, then to rangesio.
def open chain, size=nil, &block
RangesIO.open @io, :ranges => ranges(chain, size), &block
end
def read chain, size=nil
open chain, size, &:read
end
# catch any method that may add an AVAIL somewhere in the middle, thus invalidating
# the @sparse speedup for free_block. annoying using eval, but define_method won't
# work for this.
# FIXME
[:map!, :collect!].each do |name|
eval <<-END
def #{name}(*args, &block)
@sparse = true
super
end
END
end
def []= idx, val
@sparse = true if val == AVAIL
super
end
def free_block
if @sparse
i = index(AVAIL) and return i
@sparse = false
end
push AVAIL
length - 1
end
# must return first_block. modifies +blocks+ in place
def resize_chain blocks, size
new_num_blocks = (size / block_size.to_f).ceil
old_num_blocks = blocks.length
if new_num_blocks < old_num_blocks
# de-allocate some of our old blocks. TODO maybe zero them out in the file???
(new_num_blocks...old_num_blocks).each { |i| self[blocks[i]] = AVAIL }
self[blocks[new_num_blocks-1]] = EOC if new_num_blocks > 0
blocks.slice! new_num_blocks..-1
elsif new_num_blocks > old_num_blocks
# need some more blocks.
last_block = blocks.last
(new_num_blocks - old_num_blocks).times do
block = free_block
# connect the chain. handle corner case of blocks being [] initially
self[last_block] = block if last_block
blocks << block
last_block = block
self[last_block] = EOC
end
end
# update ranges, and return that also now
blocks
end
class Big < AllocationTable
def initialize(*args)
super
@block_size = 1 << @ole.header.b_shift
@io = @ole.io
end
# Big blocks are kind of -1 based, in order to not clash with the header.
def blocks_to_ranges chain, size=nil
#super chain.map { |b| b + 1 }, size
# duplicated from AllocationTable#blocks_to_ranges to avoid chain.map
# which was decent part of benchmark profile
chain = chain[0, (size.to_f / block_size).ceil] if size
ranges = chain.map { |i| [block_size * (i + 1), block_size] }
ranges.last[1] -= (ranges.length * block_size - size) if ranges.last and size
ranges
end
end
class Small < AllocationTable
def initialize(*args)
super
@block_size = 1 << @ole.header.s_shift
@io = @ole.sb_file
end
end
end
# like normal RangesIO, but Ole::Storage specific. the ranges are backed by an
# AllocationTable, and can be resized. used for read/write to 2 streams:
# 1. serialized dirent data
# 2. sbat table data
# 3. all dirents but through RangesIOMigrateable below
#
# Note that all internal access to first_block is through accessors, as it is sometimes
# useful to redirect it.
class RangesIOResizeable < RangesIO
attr_reader :bat
attr_accessor :first_block
def initialize bat, mode='r', params={}
mode, params = 'r', mode if Hash === mode
first_block, size = params.values_at :first_block, :size
raise ArgumentError, 'must specify first_block' unless first_block
@bat = bat
self.first_block = first_block
# we now cache the blocks chain, for faster resizing.
@blocks = @bat.chain first_block
super @bat.io, mode, :ranges => @bat.ranges(@blocks, size)
end
def truncate size
# note that old_blocks is != @ranges.length necessarily. i'm planning to write a
# merge_ranges function that merges sequential ranges into one as an optimization.
@bat.resize_chain @blocks, size
@pos = size if @pos > size
self.ranges = @bat.ranges(@blocks, size)
self.first_block = @blocks.empty? ? AllocationTable::EOC : @blocks.first
# don't know if this is required, but we explicitly request our @io to grow if necessary
# we never shrink it though. maybe this belongs in allocationtable, where smarter decisions
# can be made.
# maybe its ok to just seek out there later??
max = @ranges.map { |pos, len| pos + len }.max || 0
@io.truncate max if max > @io.size
end
end
# like RangesIOResizeable, but Ole::Storage::Dirent specific. provides for migration
# between bats based on size, and updating the dirent.
class RangesIOMigrateable < RangesIOResizeable
attr_reader :dirent
def initialize dirent, mode='r'
@dirent = dirent
super @dirent.ole.bat_for_size(@dirent.size), mode,
:first_block => @dirent.first_block, :size => @dirent.size
end
def truncate size
bat = @dirent.ole.bat_for_size size
if bat.class != @bat.class
# bat migration needed! we need to backup some data. the amount of data
# should be <= @ole.header.threshold, so we can just hold it all in one buffer.
# backup this
pos = [@pos, size].min
self.pos = 0
keep = read [@size, size].min
# this does a normal truncate to 0, removing our presence from the old bat, and
# rewrite the dirent's first_block
super 0
@bat = bat
# just change the underlying io from right under everyone :)
@io = bat.io
# important to do this now, before the write. as the below write will always
# migrate us back to sbat! this will now allocate us +size+ in the new bat.
super
self.pos = 0
write keep
self.pos = pos
else
super
end
# now just update the file
@dirent.size = size
end
# forward this to the dirent
def first_block
@dirent.first_block
end
def first_block= val
@dirent.first_block = val
end
end
#
# A class which wraps an ole directory entry. Can be either a directory
# (Dirent#dir?) or a file (Dirent#file?)
#
# Most interaction with Ole::Storage is through this class.
# The 2 most important functions are Dirent#children, and
# Dirent#data.
#
# was considering separate classes for dirs and files. some methods/attrs only
# applicable to one or the other.
#
# As with the other classes, #to_s performs the serialization.
#
class Dirent < Struct.new(
:name_utf16, :name_len, :type_id, :colour, :prev, :next, :child,
:clsid, :flags, # dirs only
:create_time_str, :modify_time_str, # files only
:first_block, :size, :reserved
)
include RecursivelyEnumerable
PACK = 'a64 v C C V3 a16 V a8 a8 V2 a4'
SIZE = 128
TYPE_MAP = {
# this is temporary
0 => :empty,
1 => :dir,
2 => :file,
5 => :root
}
# something to do with the fact that the tree is supposed to be red-black
COLOUR_MAP = {
0 => :red,
1 => :black
}
# used in the next / prev / child stuff to show that the tree ends here.
# also used for first_block for directory.
EOT = 0xffffffff
DEFAULT = [
0.chr * 2, 2, 0, # will get overwritten
1, EOT, EOT, EOT,
0.chr * 16, 0, nil, nil,
AllocationTable::EOC, 0, 0.chr * 4
]
# This returns all the children of this +Dirent+. It is filled in
# when the tree structure is recreated.
attr_reader :children
attr_reader :name
attr_reader :ole, :type, :create_time, :modify_time
attr_reader :parent
# i think its just used by the tree building
attr_accessor :idx
# these are for internal use and are used for faster lookup.
attr_reader :name_lookup
attr_writer :parent
protected :name_lookup, :parent=
def initialize ole, values=DEFAULT, params={}
@ole = ole
values, params = DEFAULT, values if Hash === values
values = values.unpack(PACK) if String === values
super(*values)
# extra parsing from the actual struct values
@name = params[:name] || Types::Variant.load(Types::VT_LPWSTR, name_utf16[0...name_len])
@type = if params[:type]
unless TYPE_MAP.values.include?(params[:type])
raise ArgumentError, "unknown type #{params[:type].inspect}"
end
params[:type]
else
TYPE_MAP[type_id] or raise FormatError, "unknown type_id #{type_id.inspect}"
end
# further extra type specific stuff
if file?
default_time = @ole.params[:update_timestamps] ? Types::FileTime.now : nil
@create_time ||= default_time
@modify_time ||= default_time
@create_time = Types::Variant.load(Types::VT_FILETIME, create_time_str) if create_time_str
@modify_time = Types::Variant.load(Types::VT_FILETIME, create_time_str) if modify_time_str
@children = nil
@name_lookup = nil
else
@create_time = nil
@modify_time = nil
self.size = 0 unless @type == :root
@children = []
@name_lookup = {}
end
@parent = nil
# to silence warnings. used for tree building at load time
# only.
@idx = nil
end
def name= name
if @parent
map = @parent.instance_variable_get :@name_lookup
map.delete @name
map[name] = self
end
@name = name
end
def open mode='r'
raise Errno::EISDIR unless file?
io = RangesIOMigrateable.new self, mode
@modify_time = Types::FileTime.now if io.mode.writeable?
if block_given?
begin yield io
ensure; io.close
end
else io
end
end
def read limit=nil
open { |io| io.read limit }
end
def file?
type == :file
end
def dir?
# to count root as a dir.
!file?
end
# maybe need some options regarding case sensitivity.
def / name
@name_lookup[name]
end
def [] idx
if String === idx
#warn 'String form of Dirent#[] is deprecated'
self / idx
else
super
end
end
# move to ruby-msg. and remove from here
def time
#warn 'Dirent#time is deprecated'
create_time || modify_time
end
def each_child(&block)
@children.each(&block) if dir?
end
# flattens the tree starting from here into +dirents+. note it modifies its argument.
def flatten dirents=[]
@idx = dirents.length
dirents << self
if file?
self.prev = self.next = self.child = EOT
else
children.each { |child| child.flatten dirents }
self.child = Dirent.flatten_helper children
end
dirents
end
# i think making the tree structure optimized is actually more complex than this, and
# requires some intelligent ordering of the children based on names, but as long as
# it is valid its ok.
# actually, i think its ok. gsf for example only outputs a singly-linked-list, where
# prev is always EOT.
def self.flatten_helper children
return EOT if children.empty?
i = children.length / 2
this = children[i]
this.prev, this.next = [(0...i), (i+1..-1)].map { |r| flatten_helper children[r] }
this.idx
end
def to_s
tmp = Types::Variant.dump(Types::VT_LPWSTR, name)
tmp = tmp[0, 62] if tmp.length > 62
tmp += 0.chr * 2
self.name_len = tmp.length
self.name_utf16 = tmp + 0.chr * (64 - tmp.length)
# type_id can perhaps be set in the initializer, as its read only now.
self.type_id = TYPE_MAP.to_a.find { |id, name| @type == name }.first
# for the case of files, it is assumed that that was handled already
# note not dir?, so as not to override root's first_block
self.first_block = Dirent::EOT if type == :dir
if file?
# this is messed up. it changes the time stamps regardless of whether the file
# was actually touched. instead, any open call with a writeable mode, should update
# the modify time. create time would be set in new.
if @ole.params[:update_timestamps]
self.create_time_str = Types::Variant.dump Types::VT_FILETIME, @create_time
self.modify_time_str = Types::Variant.dump Types::VT_FILETIME, @modify_time
end
else
self.create_time_str = 0.chr * 8
self.modify_time_str = 0.chr * 8
end
to_a.pack PACK
end
def inspect
str = "#'
end
def << child
child.parent = self
@name_lookup[child.name] = child
@children << child
end
# remove the Dirent +child+ from the children array, truncating the data
# by default.
def delete child, truncate=true
# remove from our child array, so that on reflatten and re-creation of @dirents, it will be gone
unless @children.delete(child)
raise ArgumentError, "#{child.inspect} not a child of #{self.inspect}"
end
@name_lookup.delete(child.name)
child.parent = nil
# free our blocks
child.open { |io| io.truncate 0 } if child.file?
end
def self.copy src, dst
# copies the contents of src to dst. must be the same type. this will throw an
# error on copying to root. maybe this will recurse too much for big documents??
raise ArgumentError, 'differing types' if src.file? and !dst.file?
dst.name = src.name
if src.dir?
src.children.each do |src_child|
dst_child = Dirent.new dst.ole, :type => src_child.type
dst << dst_child
Dirent.copy src_child, dst_child
end
else
src.open do |src_io|
dst.open { |dst_io| IO.copy src_io, dst_io }
end
end
end
end
end
end
ruby-ole-1.2.11.8/lib/ole/support.rb 0000644 0000041 0000041 00000013534 12461443715 017133 0 ustar www-data www-data # encoding: ASCII-8BIT
#
# A file with general support functions used by most files in the project.
#
# These are the only methods added to other classes.
#
require 'logger'
require 'stringio'
require 'enumerator'
class String # :nodoc:
def each_chunk size
(length / size.to_f).ceil.times { |i| yield self[i * size, size] }
end
end
class File # :nodoc:
# for interface consistency with StringIO etc (rather than adding #stat
# to them). used by RangesIO.
unless File.method_defined?(:size)
def size
stat.size
end
end
end
class Symbol # :nodoc:
unless Symbol.method_defined?(:to_proc)
def to_proc
Proc.new { |*args| args.shift.__send__(self, *args) }
end
end
end
module Enumerable # :nodoc:
unless [].respond_to? :group_by
# 1.9 backport
def group_by
hash = Hash.new { |h, key| h[key] = [] }
each { |item| hash[yield(item)] << item }
hash
end
end
unless [].respond_to? :sum
def sum initial=0
inject(initial) { |a, b| a + b }
end
end
end
# move to support?
class IO # :nodoc:
# Copy data from IO-like object +src+, to +dst+
def self.copy src, dst
until src.eof?
buf = src.read(4096)
dst.write buf
end
end
end
class Logger # :nodoc:
# A helper method for creating a +Logger+ which produce call stack
# in their output
def self.new_with_callstack logdev=STDERR
log = Logger.new logdev
log.level = WARN
log.formatter = proc do |severity, time, progname, msg|
# find where we were called from, in our code
callstack = caller.dup
callstack.shift while callstack.first =~ /\/logger\.rb:\d+:in/
from = callstack.first.sub(/:in `(.*?)'/, ":\\1")
"[%s %s]\n%-7s%s\n" % [time.strftime('%H:%M:%S'), from, severity, msg.to_s]
end
log
end
end
# Include this module into a class that defines #each_child. It should
# maybe use #each instead, but its easier to be more specific, and use
# an alias.
#
# I don't want to force the class to cache children (eg where children
# are loaded on request in pst), because that forces the whole tree to
# be loaded. So, the methods should only call #each_child once, and
# breadth first iteration holds its own copy of the children around.
#
# Main methods are #recursive, and #to_tree
module RecursivelyEnumerable # :nodoc:
def each_recursive_depth_first(&block)
each_child do |child|
yield child
if child.respond_to? :each_recursive_depth_first
child.each_recursive_depth_first(&block)
end
end
end
# don't think this is actually a proper breadth first recursion. only first
# level is breadth first.
def each_recursive_breadth_first(&block)
children = []
each_child do |child|
children << child if child.respond_to? :each_recursive_breadth_first
yield child
end
children.each { |child| child.each_recursive_breadth_first(&block) }
end
def each_recursive mode=:depth_first, &block
# we always actually yield ourself (the tree root) before recursing
yield self
send "each_recursive_#{mode}", &block
end
# the idea of this function, is to allow use of regular Enumerable methods
# in a recursive fashion. eg:
#
# # just looks at top level children
# root.find { |child| child.some_condition? }
# # recurse into all children getting non-folders, breadth first
# root.recursive(:breadth_first).select { |child| !child.folder? }
# # just get everything
# items = root.recursive.to_a
#
def recursive mode=:depth_first
to_enum(:each_recursive, mode)
end
# streams a "tree" form of the recursively enumerable structure to +io+, or
# return a string form instead if +io+ is not specified.
#
# mostly a debugging aid. can specify a different block which will be called
# to provide the string form for each node.
def to_tree io='', &inspect
inspect ||= :inspect.to_proc
io << "- #{inspect[self]}\n"
recurse = proc do |node, prefix|
child = nil
node.each_child do |next_child|
if child
io << "#{prefix}|- #{inspect[child]}\n"
recurse.call child, prefix + '| '
end
child = next_child
end if node.respond_to?(:each_child)
if child
io << "#{prefix}\\- #{inspect[child]}\n"
recurse.call child, prefix + ' '
end
end
recurse.call self, ' '
io
end
end
module Ole
class IOMode
# ruby 1.9 defines binary as 0, which isn't very helpful.
# its 4 in rubinius. no longer using
#
# BINARY = 0x4 unless defined?(BINARY)
#
# for that reason, have my own constants module here
module Constants
include File::Constants
BINARY = 0x4
end
include Constants
NAMES = %w[rdonly wronly rdwr creat trunc append binary]
# nabbed from rubinius, and modified
def self.parse_mode mode
ret = 0
case mode[0, 1]
when 'r'; ret |= RDONLY
when 'w'; ret |= WRONLY | CREAT | TRUNC
when 'a'; ret |= WRONLY | CREAT | APPEND
else raise ArgumentError, "illegal access mode #{mode}"
end
(1...mode.length).each do |i|
case mode[i, 1]
when '+'; ret = (ret & ~(RDONLY | WRONLY)) | RDWR
when 'b'; ret |= BINARY
else raise ArgumentError, "illegal access mode #{mode}"
end
end
ret
end
attr_reader :flags
def initialize flags
flags = self.class.parse_mode flags.to_str if flags.respond_to? :to_str
raise ArgumentError, "invalid flags - #{flags.inspect}" unless Fixnum === flags
@flags = flags
end
def writeable?
#(@flags & RDONLY) == 0
(@flags & 0x3) != RDONLY
end
def readable?
(@flags & WRONLY) == 0
end
def truncate?
(@flags & TRUNC) != 0
end
def append?
(@flags & APPEND) != 0
end
def create?
(@flags & CREAT) != 0
end
def binary?
(@flags & BINARY) != 0
end
=begin
# revisit this
def apply io
if truncate?
io.truncate 0
elsif append?
io.seek IO::SEEK_END, 0
end
end
=end
def inspect
names = NAMES.map { |name| name if (flags & IOMode.const_get(name.upcase)) != 0 }
names.unshift 'rdonly' if (flags & 0x3) == 0
"#<#{self.class} #{names.compact * '|'}>"
end
end
end
ruby-ole-1.2.11.8/lib/ole/types.rb 0000644 0000041 0000041 00000000072 12461443715 016554 0 ustar www-data www-data require 'ole/types/base'
require 'ole/types/property_set'
ruby-ole-1.2.11.8/lib/ole/types/ 0000755 0000041 0000041 00000000000 12461443715 016230 5 ustar www-data www-data ruby-ole-1.2.11.8/lib/ole/types/property_set.rb 0000644 0000041 0000041 00000012260 12461443715 021315 0 ustar www-data www-data # encoding: ASCII-8BIT
module Ole
module Types
#
# The PropertySet class currently supports readonly access to the properties
# serialized in "property set" streams, such as the file "\005SummaryInformation",
# in OLE files.
#
# Think it has its roots in MFC property set serialization.
#
# See http://poi.apache.org/hpsf/internals.html for details
#
class PropertySet
HEADER_SIZE = 28
HEADER_PACK = "vvVa#{Clsid::SIZE}V"
OS_MAP = {
0 => :win16,
1 => :mac,
2 => :win32,
0x20001 => :ooffice, # open office on linux...
}
# define a smattering of the property set guids.
DATA = {
Clsid.parse('{f29f85e0-4ff9-1068-ab91-08002b27b3d9}') => ['FMTID_SummaryInformation', {
2 => 'doc_title',
3 => 'doc_subject',
4 => 'doc_author',
5 => 'doc_keywords',
6 => 'doc_comments',
7 => 'doc_template',
8 => 'doc_last_author',
9 => 'doc_rev_number',
10 => 'doc_edit_time',
11 => 'doc_last_printed',
12 => 'doc_created_time',
13 => 'doc_last_saved_time',
14 => 'doc_page_count',
15 => 'doc_word_count',
16 => 'doc_char_count',
18 => 'doc_app_name',
19 => 'security'
}],
Clsid.parse('{d5cdd502-2e9c-101b-9397-08002b2cf9ae}') => ['FMTID_DocSummaryInfo', {
2 => 'doc_category',
3 => 'doc_presentation_target',
4 => 'doc_byte_count',
5 => 'doc_line_count',
6 => 'doc_para_count',
7 => 'doc_slide_count',
8 => 'doc_note_count',
9 => 'doc_hidden_count',
10 => 'mmclips',
11 => 'scale_crop',
12 => 'heading_pairs',
13 => 'doc_part_titles',
14 => 'doc_manager',
15 => 'doc_company',
16 => 'links_up_to_date'
}],
Clsid.parse('{d5cdd505-2e9c-101b-9397-08002b2cf9ae}') => ['FMTID_UserDefinedProperties', {}]
}
# create an inverted map of names to guid/key pairs
PROPERTY_MAP = DATA.inject({}) do |h1, (guid, data)|
data[1].inject(h1) { |h2, (id, name)| h2.update name => [guid, id] }
end
module Constants
DATA.each { |guid, (name, _)| const_set name, guid }
end
include Constants
include Enumerable
class Section
include Variant::Constants
include Enumerable
SIZE = Clsid::SIZE + 4
PACK = "a#{Clsid::SIZE}v"
attr_accessor :guid, :offset
attr_reader :length
def initialize str, property_set
@property_set = property_set
@guid, @offset = str.unpack PACK
self.guid = Clsid.load guid
load_header
end
def io
@property_set.io
end
def load_header
io.seek offset
@byte_size, @length = io.read(8).unpack 'V2'
end
def [] key
each_raw do |id, property_offset|
return read_property(property_offset).last if key == id
end
nil
end
def []= key, value
raise NotImplementedError, 'section writes not yet implemented'
end
def each
each_raw do |id, property_offset|
yield id, read_property(property_offset).last
end
end
private
def each_raw
io.seek offset + 8
io.read(length * 8).each_chunk(8) { |str| yield(*str.unpack('V2')) }
end
def read_property property_offset
io.seek offset + property_offset
type, value = io.read(8).unpack('V2')
# is the method of serialization here custom?
case type
when VT_LPSTR, VT_LPWSTR
value = Variant.load type, io.read(value)
# ....
end
[type, value]
end
end
attr_reader :io, :signature, :unknown, :os, :guid, :sections
def initialize io
@io = io
load_header io.read(HEADER_SIZE)
load_section_list io.read(@num_sections * Section::SIZE)
# expect no gap between last section and start of data.
#Log.warn "gap between section list and property data" unless io.pos == @sections.map(&:offset).min
end
def load_header str
@signature, @unknown, @os_id, @guid, @num_sections = str.unpack HEADER_PACK
# should i check that unknown == 0? it usually is. so is the guid actually
@guid = Clsid.load @guid
@os = OS_MAP[@os_id] || Log.warn("unknown operating system id #{@os_id}")
end
def load_section_list str
@sections = str.to_enum(:each_chunk, Section::SIZE).map { |s| Section.new s, self }
end
def [] key
pair = PROPERTY_MAP[key.to_s] or return nil
section = @sections.find { |s| s.guid == pair.first } or return nil
section[pair.last]
end
def []= key, value
pair = PROPERTY_MAP[key.to_s] or return nil
section = @sections.find { |s| s.guid == pair.first } or return nil
section[pair.last] = value
end
def method_missing name, *args, &block
if name.to_s =~ /(.*)=$/
return super unless args.length == 1
return super unless PROPERTY_MAP[$1]
self[$1] = args.first
else
return super unless args.length == 0
return super unless PROPERTY_MAP[name.to_s]
self[name]
end
end
def each
@sections.each do |section|
next unless pair = DATA[section.guid]
map = pair.last
section.each do |id, value|
name = map[id] or next
yield name, value
end
end
end
def to_h
inject({}) { |hash, (name, value)| hash.update name.to_sym => value }
end
end
end
end
ruby-ole-1.2.11.8/lib/ole/types/base.rb 0000644 0000041 0000041 00000020344 12461443715 017472 0 ustar www-data www-data # encoding: ASCII-8BIT
require 'date'
require 'ole/base'
module Ole # :nodoc:
#
# The Types module contains all the serialization and deserialization code for standard ole
# types.
#
# It also defines all the variant type constants, and symbolic names.
#
module Types
# for anything that we don't have serialization code for
class Data < String
def self.load str
new str
end
def self.dump str
str.to_s
end
end
class Lpstr < String
def self.load str
# not sure if its always there, but there is often a trailing
# null byte.
new str.chomp(0.chr)
end
def self.dump str
# do i need to append the null byte?
str.to_s
end
end
if ''.respond_to? :encode
# NOTE: only here in the interim to preserve behaviour of
# FROM/TO_UTF16 constants for ruby-msg.
class Iconv # :nodoc:
def initialize(to, from)
@to, @from = to, from
end
def iconv(str)
str.encode(@to, @from)
end
end
# for VT_LPWSTR
class Lpwstr < String
FROM_UTF16 = Iconv.new 'utf-8', 'utf-16le'
TO_UTF16 = Iconv.new 'utf-16le', 'utf-8'
def self.load str
new str.encode(Encoding::UTF_8, Encoding::UTF_16LE).chomp(0.chr)
end
def self.dump str
# need to append nulls?
data = str.encode(Encoding::UTF_16LE)
# not sure if this is the recommended way to do it, but I want to treat
# the resulting utf16 data as regular bytes, not characters.
data.force_encoding Encoding::ASCII_8BIT
data
end
end
else
require 'iconv'
# for VT_LPWSTR
class Lpwstr < String
FROM_UTF16 = Iconv.new 'utf-8', 'utf-16le'
TO_UTF16 = Iconv.new 'utf-16le', 'utf-8'
def self.load str
new FROM_UTF16.iconv(str).chomp(0.chr)
end
def self.dump str
# need to append nulls?
TO_UTF16.iconv str
end
end
end
# for VT_FILETIME
class FileTime < DateTime
SIZE = 8
# DateTime.new is slow... faster version for FileTime
def self.new year, month, day, hour=0, min=0, sec=0
# DateTime will remove leap and leap-leap seconds
sec = 59 if sec > 59
if month <= 2
month += 12
year -= 1
end
y = year + 4800
m = month - 3
jd = day + (153 * m + 2).div(5) + 365 * y + y.div(4) - y.div(100) + y.div(400) - 32045
fr = hour / 24.0 + min / 1440.0 + sec / 86400.0
# new! was actually new0 in older versions of ruby (<=1.8.4?)
# see issue #4.
msg = respond_to?(:new!) ? :new! : :new0
send msg, jd + fr - 0.5, 0, ITALY
end if respond_to?(:new!) || respond_to?(:new0)
def self.from_time time
new(*time.to_a[0, 6].reverse)
end
def self.now
from_time Time.now
end
EPOCH = new 1601, 1, 1
#def initialize year, month, day, hour, min, sec
# Create a +DateTime+ object from a struct +FILETIME+
# (http://msdn2.microsoft.com/en-us/library/ms724284.aspx).
#
# Converts +str+ to two 32 bit time values, comprising the high and low 32 bits of
# the 100's of nanoseconds since 1st january 1601 (Epoch).
def self.load str
low, high = str.to_s.unpack 'V2'
# we ignore these, without even warning about it
return nil if low == 0 and high == 0
# the + 0.00001 here stinks a bit...
seconds = (high * (1 << 32) + low) / 1e7 + 0.00001
obj = EPOCH + seconds / 86400 rescue return
# work around home_run not preserving derived class
obj = new! obj.jd + obj.day_fraction - 0.5, 0, ITALY unless FileTime === obj
obj
end
# +time+ should be able to be either a Time, Date, or DateTime.
def self.dump time
return 0.chr * SIZE unless time
# convert whatever is given to be a datetime, to handle the large range
case time
when Date # this includes DateTime & FileTime
when Time
time = from_time time
else
raise ArgumentError, 'unknown time argument - %p' % [time]
end
# round to milliseconds (throwing away nanosecond precision) to
# compensate for using Float-based DateTime
nanoseconds = ((time - EPOCH).to_f * 864000000).round * 1000
high, low = nanoseconds.divmod 1 << 32
[low, high].pack 'V2'
end
def inspect
"#<#{self.class} #{to_s}>"
end
end
# for VT_CLSID
# Unlike most of the other conversions, the Guid's are serialized/deserialized by actually
# doing nothing! (eg, _load & _dump are null ops)
# Rather, its just a string with a different inspect string, and it includes a
# helper method for creating a Guid from that readable form (#format).
class Clsid < String
SIZE = 16
PACK = 'V v v CC C6'
def self.load str
new str.to_s
end
def self.dump guid
return 0.chr * SIZE unless guid
# allow use of plain strings in place of guids.
guid['-'] ? parse(guid) : guid
end
def self.parse str
vals = str.scan(/[a-f\d]+/i).map(&:hex)
if vals.length == 5
# this is pretty ugly
vals[3] = ('%04x' % vals[3]).scan(/../).map(&:hex)
vals[4] = ('%012x' % vals[4]).scan(/../).map(&:hex)
guid = new vals.flatten.pack(PACK)
return guid if guid.format.delete('{}') == str.downcase.delete('{}')
end
raise ArgumentError, 'invalid guid - %p' % str
end
def format
"%08x-%04x-%04x-%02x%02x-#{'%02x' * 6}" % unpack(PACK)
end
def inspect
"#<#{self.class}:{#{format}}>"
end
end
#
# The OLE variant types, extracted from
# http://www.marin.clara.net/COM/variant_type_definitions.htm.
#
# A subset is also in WIN32OLE::VARIANT, but its not cross platform (obviously).
#
# Use like:
#
# p Ole::Types::Variant::NAMES[0x001f] => 'VT_LPWSTR'
# p Ole::Types::VT_DATE # => 7
#
# The serialization / deserialization functions should be fixed to make it easier
# to work with. like
#
# Ole::Types.from_str(VT_DATE, data) # and
# Ole::Types.to_str(VT_DATE, data)
#
# Or similar, rather than having to do VT_* <=> ad hoc class name etc as it is
# currently.
#
module Variant
NAMES = {
0x0000 => 'VT_EMPTY',
0x0001 => 'VT_NULL',
0x0002 => 'VT_I2',
0x0003 => 'VT_I4',
0x0004 => 'VT_R4',
0x0005 => 'VT_R8',
0x0006 => 'VT_CY',
0x0007 => 'VT_DATE',
0x0008 => 'VT_BSTR',
0x0009 => 'VT_DISPATCH',
0x000a => 'VT_ERROR',
0x000b => 'VT_BOOL',
0x000c => 'VT_VARIANT',
0x000d => 'VT_UNKNOWN',
0x000e => 'VT_DECIMAL',
0x0010 => 'VT_I1',
0x0011 => 'VT_UI1',
0x0012 => 'VT_UI2',
0x0013 => 'VT_UI4',
0x0014 => 'VT_I8',
0x0015 => 'VT_UI8',
0x0016 => 'VT_INT',
0x0017 => 'VT_UINT',
0x0018 => 'VT_VOID',
0x0019 => 'VT_HRESULT',
0x001a => 'VT_PTR',
0x001b => 'VT_SAFEARRAY',
0x001c => 'VT_CARRAY',
0x001d => 'VT_USERDEFINED',
0x001e => 'VT_LPSTR',
0x001f => 'VT_LPWSTR',
0x0040 => 'VT_FILETIME',
0x0041 => 'VT_BLOB',
0x0042 => 'VT_STREAM',
0x0043 => 'VT_STORAGE',
0x0044 => 'VT_STREAMED_OBJECT',
0x0045 => 'VT_STORED_OBJECT',
0x0046 => 'VT_BLOB_OBJECT',
0x0047 => 'VT_CF',
0x0048 => 'VT_CLSID',
0x0fff => 'VT_ILLEGALMASKED',
0x1000 => 'VT_VECTOR',
0x2000 => 'VT_ARRAY',
0x4000 => 'VT_BYREF',
0x8000 => 'VT_RESERVED',
0xffff => 'VT_ILLEGAL'
}
CLASS_MAP = {
# haven't seen one of these. wonder if its same as FILETIME?
#'VT_DATE' => ?,
'VT_LPSTR' => Lpstr,
'VT_LPWSTR' => Lpwstr,
'VT_FILETIME' => FileTime,
'VT_CLSID' => Clsid
}
module Constants
NAMES.each { |num, name| const_set name, num }
# VT_TYPEMASK has the same value as VT_ILLEGALMASKED. Keep the latter in the
# NAMES hash so that it will be used when mapping a concrete type to display
# string, but still define this constant here for other uses
VT_TYPEMASK = 0x0fff
end
def self.load type, str
type = NAMES[type] or raise ArgumentError, 'unknown ole type - 0x%04x' % type
(CLASS_MAP[type] || Data).load str
end
def self.dump type, variant
type = NAMES[type] or raise ArgumentError, 'unknown ole type - 0x%04x' % type
(CLASS_MAP[type] || Data).dump variant
end
end
include Variant::Constants
# deprecated aliases, kept mostly for the benefit of ruby-msg, until
# i release a new version.
def self.load_guid str
Variant.load VT_CLSID, str
end
def self.load_time str
Variant.load VT_FILETIME, str
end
FROM_UTF16 = Lpwstr::FROM_UTF16
TO_UTF16 = Lpwstr::TO_UTF16
end
end
ruby-ole-1.2.11.8/lib/ole/ranges_io.rb 0000644 0000041 0000041 00000015765 12461443715 017375 0 ustar www-data www-data # encoding: ASCII-8BIT
# need Ole::IOMode
require 'ole/support'
#
# = Introduction
#
# +RangesIO+ is a basic class for wrapping another IO object allowing you to arbitrarily reorder
# slices of the input file by providing a list of ranges. Intended as an initial measure to curb
# inefficiencies in the Dirent#data method just reading all of a file's data in one hit, with
# no method to stream it.
#
# This class will encapuslate the ranges (corresponding to big or small blocks) of any ole file
# and thus allow reading/writing directly to the source bytes, in a streamed fashion (so just
# getting 16 bytes doesn't read the whole thing).
#
# In the simplest case it can be used with a single range to provide a limited io to a section of
# a file.
#
# = Limitations
#
# * No buffering. by design at the moment. Intended for large reads
#
# = TODO
#
# On further reflection, this class is something of a joining/optimization of
# two separate IO classes. a SubfileIO, for providing access to a range within
# a File as a separate IO object, and a ConcatIO, allowing the presentation of
# a bunch of io objects as a single unified whole.
#
# I will need such a ConcatIO if I'm to provide Mime#to_io, a method that will
# convert a whole mime message into an IO stream, that can be read from.
# It will just be the concatenation of a series of IO objects, corresponding to
# headers and boundaries, as StringIO's, and SubfileIO objects, coming from the
# original message proper, or RangesIO as provided by the Attachment#data, that
# will then get wrapped by Mime in a Base64IO or similar, to get encoded on-the-
# fly. Thus the attachment, in its plain or encoded form, and the message as a
# whole never exists as a single string in memory, as it does now. This is a
# fair bit of work to achieve, but generally useful I believe.
#
# This class isn't ole specific, maybe move it to my general ruby stream project.
#
class RangesIO
attr_reader :io, :mode, :ranges, :size, :pos
# +io+:: the parent io object that we are wrapping.
# +mode+:: the mode to use
# +params+:: hash of params.
# * :ranges - byte offsets, either:
# 1. an array of ranges [1..2, 4..5, 6..8] or
# 2. an array of arrays, where the second is length [[1, 1], [4, 1], [6, 2]] for the above
# (think the way String indexing works)
# * :close_parent - boolean to close parent when this object is closed
#
# NOTE: the +ranges+ can overlap.
def initialize io, mode='r', params={}
mode, params = 'r', mode if Hash === mode
ranges = params[:ranges]
@params = {:close_parent => false}.merge params
@mode = Ole::IOMode.new mode
@io = io
# initial position in the file
@pos = 0
self.ranges = ranges || [[0, io.size]]
# handle some mode flags
truncate 0 if @mode.truncate?
seek size if @mode.append?
end
# add block form. TODO add test for this
def self.open(*args, &block)
ranges_io = new(*args)
if block_given?
begin; yield ranges_io
ensure; ranges_io.close
end
else
ranges_io
end
end
def ranges= ranges
# convert ranges to arrays. check for negative ranges?
ranges = ranges.map { |r| Range === r ? [r.begin, r.end - r.begin] : r }
# combine ranges
if @params[:combine] == false
# might be useful for debugging...
@ranges = ranges
else
@ranges = []
next_pos = nil
ranges.each do |pos, len|
if next_pos == pos
@ranges.last[1] += len
next_pos += len
else
@ranges << [pos, len]
next_pos = pos + len
end
end
end
# calculate cumulative offsets from range sizes
@size = 0
@offsets = []
@ranges.each do |pos, len|
@offsets << @size
@size += len
end
self.pos = @pos
end
def pos= pos, whence=IO::SEEK_SET
case whence
when IO::SEEK_SET
when IO::SEEK_CUR
pos += @pos
when IO::SEEK_END
pos = @size + pos
else raise Errno::EINVAL
end
raise Errno::EINVAL unless (0..@size) === pos
@pos = pos
# do a binary search throuh @offsets to find the active range.
a, c, b = 0, 0, @offsets.length
while a < b
c = (a + b).div(2)
pivot = @offsets[c]
if pos == pivot
@active = c
return
elsif pos < pivot
b = c
else
a = c + 1
end
end
@active = a - 1
end
alias seek :pos=
alias tell :pos
def rewind
seek 0
end
def close
@io.close if @params[:close_parent]
end
def eof?
@pos == @size
end
# read bytes from file, to a maximum of +limit+, or all available if unspecified.
def read limit=nil
data = ''
return data if eof?
limit ||= size
pos, len = @ranges[@active]
diff = @pos - @offsets[@active]
pos += diff
len -= diff
loop do
@io.seek pos
if limit < len
s = @io.read(limit).to_s
@pos += s.length
data << s
break
end
s = @io.read(len).to_s
@pos += s.length
data << s
break if s.length != len
limit -= len
break if @active == @ranges.length - 1
@active += 1
pos, len = @ranges[@active]
end
data
end
# you may override this call to update @ranges and @size, if applicable.
def truncate size
raise NotImplementedError, 'truncate not supported'
end
# using explicit forward instead of an alias now for overriding.
# should override truncate.
def size= size
truncate size
end
def write data
# duplicates object to avoid side effects for the caller, but do so only if
# encoding isn't already ASCII-8BIT (slight optimization)
if data.respond_to?(:encoding) and data.encoding != Encoding::ASCII_8BIT
data = data.dup.force_encoding(Encoding::ASCII_8BIT)
end
return 0 if data.empty?
data_pos = 0
# if we don't have room, we can use the truncate hook to make more space.
if data.length > @size - @pos
begin
truncate @pos + data.length
rescue NotImplementedError
raise IOError, "unable to grow #{inspect} to write #{data.length} bytes"
end
end
pos, len = @ranges[@active]
diff = @pos - @offsets[@active]
pos += diff
len -= diff
loop do
@io.seek pos
if data_pos + len > data.length
chunk = data[data_pos..-1]
@io.write chunk
@pos += chunk.length
data_pos = data.length
break
end
@io.write data[data_pos, len]
@pos += len
data_pos += len
break if @active == @ranges.length - 1
@active += 1
pos, len = @ranges[@active]
end
data_pos
end
alias << write
# i can wrap it in a buffered io stream that
# provides gets, and appropriately handle pos,
# truncate. mostly added just to past the tests.
# FIXME
def gets
s = read 1024
i = s.index "\n"
self.pos -= s.length - (i+1)
s[0..i]
end
alias readline :gets
def inspect
"#<#{self.class} io=#{io.inspect}, size=#{@size}, pos=#{@pos}>"
end
end
# this subclass of ranges io explicitly ignores the truncate part of 'w' modes.
# only really needed for the allocation table writes etc. maybe just use explicit modes
# for those
# better yet write a test that breaks before I fix it. added nodoc for the
# time being.
class RangesIONonResizeable < RangesIO # :nodoc:
def initialize io, mode='r', params={}
mode, params = 'r', mode if Hash === mode
flags = Ole::IOMode.new(mode).flags & ~IO::TRUNC
super io, flags, params
end
end
ruby-ole-1.2.11.8/lib/ole/file_system.rb 0000644 0000041 0000041 00000000233 12461443715 017732 0 ustar www-data www-data warn <<-end
Use of ole/file_system is deprecated. Use ole/storage (the file_system api
is recommended and enabled by default).
end
require 'ole/storage'
ruby-ole-1.2.11.8/lib/ole/base.rb 0000644 0000041 0000041 00000000211 12461443715 016315 0 ustar www-data www-data # encoding: ASCII-8BIT
require 'ole/support'
require 'ole/storage/version'
module Ole # :nodoc:
Log = Logger.new_with_callstack
end
ruby-ole-1.2.11.8/lib/ole/storage.rb 0000644 0000041 0000041 00000000135 12461443715 017054 0 ustar www-data www-data require 'ole/storage/base'
require 'ole/storage/file_system'
require 'ole/storage/meta_data'
ruby-ole-1.2.11.8/metadata.yml 0000644 0000041 0000041 00000003640 12461443715 016045 0 ustar www-data www-data --- !ruby/object:Gem::Specification
name: ruby-ole
version: !ruby/object:Gem::Version
version: 1.2.11.8
platform: ruby
authors:
- Charles Lowe
autorequire:
bindir: bin
cert_chain: []
date: 2014-12-30 00:00:00.000000000 Z
dependencies: []
description: A library for easy read/write access to OLE compound documents for Ruby.
email: aquasync@gmail.com
executables:
- oletool
extensions: []
extra_rdoc_files:
- README
- ChangeLog
files:
- README
- COPYING
- Rakefile
- ChangeLog
- ruby-ole.gemspec
- bin/oletool
- lib/ole/base.rb
- lib/ole/ranges_io.rb
- lib/ole/types/base.rb
- lib/ole/types/property_set.rb
- lib/ole/types.rb
- lib/ole/support.rb
- lib/ole/storage/version.rb
- lib/ole/storage/base.rb
- lib/ole/storage/file_system.rb
- lib/ole/storage/meta_data.rb
- lib/ole/storage.rb
- lib/ole/file_system.rb
- test/test_ranges_io.rb
- test/test_storage.rb
- test/test_filesystem.rb
- test/test_meta_data.rb
- test/test_property_set.rb
- test/test_mbat.rb
- test/test_types.rb
- test/test_support.rb
- test/test_word_95.doc
- test/test.doc
- test/test_word_6.doc
- test/test_word_97.doc
- test/oleWithDirs.ole
- test/test_SummaryInformation
homepage: http://code.google.com/p/ruby-ole
licenses: []
metadata: {}
post_install_message:
rdoc_options:
- --main
- README
- --title
- ruby-ole documentation
- --tab-width
- '2'
require_paths:
- lib
required_ruby_version: !ruby/object:Gem::Requirement
requirements:
- - '>='
- !ruby/object:Gem::Version
version: '0'
required_rubygems_version: !ruby/object:Gem::Requirement
requirements:
- - '>='
- !ruby/object:Gem::Version
version: '0'
requirements: []
rubyforge_project: ruby-ole
rubygems_version: 2.0.14
signing_key:
specification_version: 4
summary: Ruby OLE library.
test_files:
- test/test_ranges_io.rb
- test/test_storage.rb
- test/test_filesystem.rb
- test/test_meta_data.rb
- test/test_property_set.rb
- test/test_mbat.rb
- test/test_types.rb
- test/test_support.rb
ruby-ole-1.2.11.8/test/ 0000755 0000041 0000041 00000000000 12461443715 014516 5 ustar www-data www-data ruby-ole-1.2.11.8/test/test_word_95.doc 0000644 0000041 0000041 00000170000 12461443715 017532 0 ustar www-data www-data ÐÏࡱá ; þÿ u þÿÿÿ ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿýÿÿÿÿÿÿÿþÿÿÿp
! " # $ % &