2010年8月28日土曜日

Clojureでバイナリファイルを読み込む

実践Common Lisp(Practical Common Lisp)のバイナリファイルのパースっぽいものを書こうとしてみました。

(defmulti read-binary-class (fn [class in] class))
(defmulti read-binary-raw (fn [class in] class))

(def *class-list* (atom []))
(def *direct-super-classes-map* (atom {}))

(defn defined-class-p [sym]
(some #(= sym %1) @*class-list*))

;; Symbol like <xxx> means forward declaration
(defn forward-declaration-symbol-p [sym]
(let [tmp (str sym)
len (count tmp)]
(if (<= len 2)
false
(and (= \< (first tmp)) (= \> (last tmp))))))

(defn deref-forward-declaration-symbol [sym map]
`(get ~map '~(symbol (subs (str sym)
1
(dec (count (str sym)))))))

(defn deref-forward-declaration-symbol-recur [obj m]
(cond
(symbol? obj)
(if (forward-declaration-symbol-p obj)
(deref-forward-declaration-symbol obj m)
obj)
(list? obj) (map
#(deref-forward-declaration-symbol-recur %1 m)
obj)
(number? obj) obj
true obj))

(defn make-binary-object
([name]
{:binary-class-name name})
([name map]
(assoc map :binary-class-name name )))

(defn get-direct-super-classes [name]
(get @*direct-super-classes-map* name))

(defmacro with-map [map bind & body]
`(let [~bind ~map]
~@body))

(defn expand-clause-for-reader [clause in]
(if (list? (second clause))
;; (second clause) = (:list class expr)
(if (= (first (second clause)) :list)
;; (:list class expr) expr -> length
(let [[_ class expr] (second clause)
m (gensym "map")]
`(with-map ~m
(assoc ~m
'~(first clause)
(doall
(for [_# (range ~(deref-forward-declaration-symbol-recur expr m))]
~(if (defined-class-p class)
`(~'read-binary-class '~class ~in)
`(~'read-binary-raw '~class ~in)))))))
;; return class name (symbol) expr
(let [m (gensym "map")]
`(with-map ~m
(assoc ~m
'~(first clause)
(let [sym# ~(deref-forward-declaration-symbol-recur (second clause) m)]
(if (~'defined-class-p sym#)
(~'read-binary-class sym# ~in)
(~'read-binary-raw sym# ~in)))))))
`(assoc '~(first clause)
~(if (defined-class-p (second clause))
`(~'read-binary-class '~(second clause) ~in)
`(~'read-binary-raw '~(second clause) ~in)))))



(defn expand-clauses-for-reader [m clauses in]
`(-> ~m
~@(map (fn [clause]
(expand-clause-for-reader clause in))
clauses)))

(defn expand-read-binary-body [name in clauses]
(let [var (gensym "var")
class (gensym "class")
tmp (gensym "tmp")]
`(let [super-classes# (get-direct-super-classes '~name)
~tmp (apply merge
(map
(fn [super#]
(~'read-binary-class super# ~in))
super-classes#))]
~(expand-clauses-for-reader tmp clauses in))))

(defmacro def-binary-raw [name reader]
`(do
(defmethod read-binary-raw '~name ~@reader)))

;; clause = (name class)
(defmacro def-binary-class [name [& supers] & clauses]
(let [obj (gensym "obj")
in (gensym "in")
out (gensym "out")
classname (gensym "classname")]
`(do
(swap! *class-list* conj '~name)
(swap! *direct-super-classes-map* assoc '~name '~supers)
(defmethod read-binary-class '~name [~classname ~in]
~(expand-read-binary-body name in clauses)))))
;;; examples

(def-binary-raw u1
;; reader
([type in]
(bit-and 255 (.read in))))

;; big endian
(def-binary-raw u2
;; reader
([type in]
(+ (bit-shift-left (bit-and 255 (.read in)) 8)
(bit-and 255 (.read in)))))

(def-binary-raw u4
;; reader
([type in]
(+ (bit-shift-left (bit-and 255 (.read in)) 24)
(bit-shift-left (bit-and 255 (.read in)) 16)
(bit-shift-left (bit-and 255 (.read in)) 8)
(bit-and 255 (.read in)))))

;;; java class file format
;; tagはcp-infoが保持し、cp-infoのinfo部にtagに応じた
;; クラスの値が入るようにする

(def-binary-class constant-class-info []
(name-index u2))
(def-binary-class constant-fieldref-info []
(class-index u2)
(name-and-type-index u2))

(def-binary-class constant-methodref-info []
(class-index u2)
(name-and-type-index u2))
(def-binary-class constant-interface-methodref-info []
(class-index u2)
(name-and-type-index u2))
(def-binary-class constant-string-info []
(string-index u2))

(def-binary-class constant-integer-info []
(bytes u4))
(def-binary-class constant-float-info []
(bytes u4))

(def-binary-class constant-long-info []
(high-bytes u4)
(low-bytes u4))
(def-binary-class constant-double-info []
(high-bytes u4)
(low-bytes u4))

(def-binary-class constant-name-and-type-info []
(name-index u2)
(descriptor-index u2))

(def-binary-class constant-utf8-info []
(length u2)
(bytes (:list u1 <length>)))


(def-binary-class cp-info []
(tag u1)
(info (case <tag>
7 'constant-class-info
9 'constant-fieldref-info
10 'constant-methodref-info
11 'constant-interface-methodref-info
8 'constant-string-info
4 'constant-integer-info
3 'constant-float-info
5 'constant-long-info
6 'constant-double-info
12 'constant-name-and-type-info
1 'constant-utf8-info)))

(def-binary-class attribute-info []
(attribute-name-index u2)
(attribute-length u4)
(info (:list u1 <attribute-length>)))

(def-binary-class method-info []
(access-flag u2)
(name-index u2)
(descriptor-index u2)
(attributes-count u2)
(attributes (:list attribute-info <attributes-count>)))

(def-binary-class field-info []
(access-flags u2)
(name-index u2)
(descriptor-index u2)
(attributes-count u2)
(attributes (:list attribute-info <attributes-count>)))


(def-binary-class jvm-class-file []
(magic u4)
(minor-version u2)
(major-version u2)
(constant-pool-count u2)
(constant-pool (:list cp-info (- <constant-pool-count> 1)))
(access-flags u2)
(this-class u2)
(super-class u2)
(interfaces-count u2)
(interfaces (:list u2 <interfaces-count>))
(fields-count u2)
(fields (:list field-info <fields-count>))
(methods-count u2)
(methods (:list method-info <methods-count>))
(attributes-count u2)
(attributes (:list attribute-info <attributes-count>)))


(defn test-read-binary-class [class fname]
(with-open [in (java.io.FileInputStream. fname)]
(read-binary-class class in)))

(use 'clojure.contrib.trace)
(defn test-read-binary-class-with-trace [class fname]
(dotrace [read-binary-class read-binary-raw]
(with-open [in (java.io.FileInputStream. fname)]
(read-binary-class class in))))

0 件のコメント:

コメントを投稿