4,887,235 493 494 (assign byte-r (- b-temp bb-s-bitpos)) (store-word (ldb bb-s-word byte-s byte-r memory-data))) (assign bb-s-word (rotate bb-s-word byte-r)) (assign bb-s-bitpos (- bb-s-bitpos b-temp)) (parallel (decr-d-offset) (lisp (trace-path #/d)) (jump ubitblt-d-aligned-row-source-backwards))) (sequential ;s < d, need to fetch another word (parallel-with-s-access bb-s-offset (assign byte-r (- b-temp bb-s-bitpos)) (assign bb-s-word (logxor bb-constant (rotate memory-data byte-r)))) (decr-wrap-s-offset) (parallel-with-s-access bb-s-offset (assign a-temp (- b-temp bb-s-bitpos)) (assign byte-s (1- a-temp)) (assign bb-s-word2 (logxor bb-constant memory-data))) (assign bb-s-word (ldb bb-s-word2 byte-s byte-r bb-s-word)) (parallel-with-d-access bb-d-offset (assign byte-r (b-constant 0)) (assign byte-s (1- bb-d-bitpos)) (store-word (ldb bb-s-word byte-s byte-r memory-data))) (assign bb-width (- bb-width bb-d-bitpos)) (assign bb-s-bitpos (32- a-temp)) (assign byte-r a-temp) (assign bb-s-word (rotate bb-s-word2 byte-r)) (parallel (decr-d-offset) (lisp (trace-path #/e)) (jump ubitblt-d-aligned-row-source-backwards)))))))) (defucode ubitblt-aligned-row-source-backwards ;8 cycles per word (parallel ;1 (assign bb-width (- bb-width (a-constant 32.))) (trap-if (minus-fixnum obus) ubitblt-aligned-row-source-backwards-done)) (decr-wrap-s-offset) ;1 (parallel-with-s-access ;3 bb-s-offset (assign bb-s-word (logxor bb-constant memory-data))) (assign-vma-offset d) ;1 (store-word bb-s-word) ;1 (parallel ;1 (decr-d-offset) (lisp (trace-path #/,)) (jump ubitblt-aligned-row-source-backwards))) (defucode ubitblt-aligned-row-source-backwards-done (trap-no-save) (if (plus-fixnum bb-width) (sequential (decr-wrap-s-offset) (parallel-with-s-access bb-s-offset (assign byte-s (1- bb-width)) (assign byte-r bb-width) (assign bb-s-word (logxor bb-constant (ldb memory-data byte-s byte-r)))) (parallel-with-d-access bb-d-offset (assign byte-r (- (a-constant 32.) bb-width)) (parallel-with-return (store-word (dpb bb-s-word byte-s byte-r memory-data)) (lisp (trace-path #/2))))) (parallel-with-return (lisp (trace-path #/1))))) ;;each time through the loop, bb-s-word has the low part of the previous word ;;rotated to be at the high end of the word. We use it as background to LDB the ;;high part of the next word into it. (defucode ubitblt-d-aligned-row-source-backwards ;9 cycles per word (parallel ;1 cycle (assign bb-width (- bb-width (a-constant 32.))) ;assign is aborted if trap occurs (trap-if (minus-fixnum obus) ubitbit-d-aligned-row-source-backwards-done)) (decr-wrap-s-offset) ;1 (parallel-with-s-access ;3 bb-s-offset (assign byte-r (32- bb-s-bitpos)) (assign byte-s (31- bb-s-bitpos)) (assign bb-s-word2 (logxor bb-constant memory-data))) (assign-vma-offset d) ;1 (store-word (ldb bb-s-word2 byte-s byte-r bb-s-word)) ;1 (decr-d-offset) ;1 (parallel (assign bb-s-word (rotate bb-s-word2 byte-r)) (lisp (trace-path #/.)) (jump ubitblt-d-aligned-row-source-backwards))) (defucode ubitblt-d-aligned-row-source-backwards-done (trap-no-save) 4,887,235 495 496 (if (plus-fixnum bb-width) (if (greater-or-equal-fixnum bb-s-bitpos bb-width) (parallel-with-d-access bb-d-offset (assign byte-r (b-constant 0)) (assign byte-s (- (a-constant 31.) bb-width)) (parallel-with-return (store-word (ldb memory-data byte-s byte-r- bb-s-word)) (lisp (trace-path #/4)))) (sequential (decr-wrap-s-offset) (parallel-with-s-access bb-s-offset (assign byte-r bb-width) (assign bb-s-word (rotate bb-s-word byte-r)) (assign bb-s-word2 (logxor bb-constant memory-data))) (parallel (assign byte-r (- bb-width bb-s-bitpos)) (assign a-temp obus)) (assign byte-s (1- a-temp)) (assign bt-s-word (ldb bb-s-word2 byte-s byte-r bb-s-word)) (parallel-with-d-access bb-d-offset (assign byte-s (1- bb-width)) (assign byte-r (- (a-constant 32.) bb-width)) (parallel-with-return (store-word (dpb bb-s-word byte-s byte-r memory-data)) (lisp (trace-path #/5)))))) (parallel-with-return (lisp (trace-path #/3))))) (defucode ubitblt-long-row-both-backwards (parallel (assign b-temp bb-d-bitpos) (if (zero-fixnum bb-d-bitpos) (if (zero-fixnum bb-s-bitpos) (parallel (assign bb-s-offset (1+ bb-s-offset)) ;loop will decr first (lisp (trace-path #/a)) (jump ubitblt-aligned-row-both-backwards)) (parallel-with-s-access bb-s-offset (assign byte-r (32- bb-s-bitpos)) (parallel (assign bb-s-word (logxor bb-constant (rotate memory-data byte-r))) (lisp (trace-path #/c)) (jump ubitblt-d-aligned-row-both-backwards)))) (if (equal-fixnum b-temp bb-s-bitpos) (sequential (parallel-with-s-access bb-s-offset (assign byte-s (1- bb-s-bitpos)) (assign byte-r (b-constant 0)) (assign bb-s-word (logxor bb-constant memory-data))) (assign-vma-offset d) (parallel (decr-d-offset) (start-memory read) (call bb-byte-alu-operation-dispatch)) (parallel (assign bb-width (- bb-width bb-s-bitpos)) (lisp (trace-path #/b)) (jump ubitblt-aligned-row-both-backwards))) (if (greater-fixnum bb-s-bitpos b-temp) ;s > d, enough in first word (sequential (parallel-with-s-access bb-s-offset (parallel (assign byte-r (- b-temp bb-s-bitpos)) (assign a-temp obus)) ;this is negative (assign byte-s (1- bb-d-bitpos)) (assign bb-s-word (logxor bb-constant (rotate memory-data byte-r)))) (assign byte-r (b-constant 0)) (parallel (assign-vma-offset d) (call bb-byte-alu-operation-dispatch)) (assign bb-width (- bb-width bb-d-bitpos)) (assign b-temp bb-d-bitpos) (assign bb-s-bitpos (- bb-s-bitpos b-temp)) (parallel (decr-d-offset) (lisp (trace-path #/d)) (jump ubitblt-d-aligned-row-both-backwards))) (sequential (parallel-with-s-access ;s>>Wrapping around on bb-s-offset from ~d." (low32 (tr 'bb-s-offset)))) (assign bb-s-offset (b-constant 0))) (drop-through)))) (defmicro decr-wrap-s-offset () `(parallel (assign bb-s-offset (1- bb-s-offset)) (if (minus-fixnum obus) (parallel (lisp (format T "~&>>>Decr wrapping around on bb-s-offset")) (assign bb-s-offset (1- bb-s-row-length))) (drop-through)))) (defmicro incr-wrap-s-offset-ahead () `(sequential (parallel (assign bb-s-offset-ahead (1+ bb-s-offset)) (assign b-temp-3 obus)) 4,887,235 505 506 (if (greater-or-equal-fixnum b-temp-3 bb-s-row-length) (parallel (lisp (format T "~&>>>Wrapping around on bb-s-offset from ~d." (low32 (tr `bb-s-offset-ahead)))) (assign bb-s-offset-ahead (b-constant 0))) (drop-through)))) (defmicro decr-wrap-s-offset-ahead () `(parallel (assign bb-s-offset-ahead (1- bb-s-offset)) (if (minus-fixnum obus) (parallel (lisp (format t "~&>>>Decr wrapping around on bb-s-offset")) (assign bb-s-offset-ahead (1- bb-s-row-length))) (drop-through)))) (defmicro store-word (datum &rest options) `(store-contents (set-type ,datum dtp-fix) not-pointer . ,options)) ;;---the goddamn simulator compiles ;; (parallel (assign ...) (return)) ;;into ;; (prog ... (return nil) (setq ...)) (defmicro parallel-with-return (&body stm) `(,(if (eq *machine-version* 'sim) 'sequential 'parallel) ,@stm (return))) (defvar *fp-offset-names* ()) (defmacro def-fp-offsets (&rest names) (loop for i upfrom 0 for name in names append `((defatomicro ,name (amem (frame-pointer ,i))) (defprop ,name ,i fp-offset) (or (memq ',name *fp-offset-names*) (push ',name *fp-offset-names*))) into foo finally (return `(progn 'compile ,@foo)))) ;;decode fp offset numbers into symbols. Debugging only. (defun dfp (&rest numbers) (loop for number in numbers collect (loop for name in *fp-offset-names* when (equal (get name 'fp-offset) number) return name finally (return number)))) ;; Define arguments/state for BITBLT instructions. Note that these must be ;; relative to FP. not to the top of the stack, since there might be a ;; saved bitblt-buffer on the stack it the instruction was interrupted. (def-fp-offsets bb-arg-alu bb-arg-width bb-arg-height ;lisp arg bb-arg-from-array bb-arg-from-x bb-arg-from-y ;lisp arg bb-arg-to-array bb-arg-to-x bb-arg-to-y ;lisp arg bb-width ;ucode arg bb-s-data-addr ;ucode arg bb-s-row-offset ;ucode arg bb-s-offset ;ucode arg bb-s-bitpos ;ucode arg bb-s-row-length ;ucode arg bb-d-data-addr ;ucoda arg bb-d-offset ;ucode arg bb-d-bitpos ;ucode org bb-event-count ;ucode arg bb-alu-operation ;ucode arg ) ;;; Some temporaries. (define-b-temps bb-constant ;Value to store or to X0R in bb-s-word ;temp (source word) bb-s-row-addr ;start of current source row bb-d-row-addr ;start of current destination row bb-width-b ;copy of width on B side (sometimes) b-block-size) ;numoer of words in block (defareg bb-constant-a) ;A-side copy of bb-constant (defareg bb-identity) ;Background to dpb into when doing part word (defareg bb-s-word2) ;temp (other source word) (defareg bb-a-temp) (defareg bb-s-offset-ahead) ;s-offset not finalized yet (if pclsr) (defareg a-block-size) ;number- of words in block ;;; Bitblt-buffer hair (eval-when (compile load eval) (defconst n-bitblt-buffers 8)) #.`(progn 'compile ;B-memcry buffer for blccK-mode operations . ,(loop for i from 0 below n-bitblt-buffers collect `(defbreg ,(fintern "BITBLT-BUFFER-~D" i)))) 4,887,235 507 508 (defmicro bitblt-buffer (i) (fintern "BITBLT-BUFFER-~D" i)) ;We first compute the result n words at a time into the bitblt-buffer, ;and then store it into the destination (in one case the whole buffer ;is rotated by 1 to 31 bits as it is being stored). ;The bitblt-buffer is "active" while we are storming it into the destination. ;The bitblt buffer must be active while wo are modifying the destination, ;since the words copied into the buffer might overlapped with parts of ;the destination we have already modified. ; ;A pclsr while the bitblt-buffer is active will copy it into ;the stack, set first-part-done. and clear bitblt-buffer-active. ;A restart with first-part-done set will proceed normally until it comes time ;to store the bitblt-buffer. At that time, first-part-done is seen, the ;bitblt-buffer is restored from the stack (replacing the possibly-erroneous ;contents that were just computed), and execution then proceeds normally. ; ;The contents of the bitblt-buffer are assumed to have valid data type tags. ;For now, they could be forced to fixnum, but in the future we may have ;other instructions using this buffer and its save/restore mechanism. ;--- Still need to fix microcompiler to default cdr source from Bbus correctly --- ;Call here if we pclsr with the bitblt-buffer active (defucode save-bitblt-buffer #.`(sequential ,(loop for i from 0 below n-bitblt-buffers collect `(pushval-with-cdr (bitblt-buffer ,i)))) (assign first-part-done (b-constant 1)) (parallel (assign bitblt-buffer-active (b-constant 0)) (return))) ;Call here when about to start storing the bitblt-buffer ;This is actually a micro so that the first instruction of the routine ;gets open-coded into the caller ;This is hairily bummed to make the normal case go in only one cycle ;(if the trap is not taken then the obus has -1 on it) (defmicro activate-bitblt-buffer () `(parallel (assign bitblt-buffer-active obus) (trap-if (bit-test frame-misc-data (b-constant (byte-mask first-part-done))) activate-saved-bitblt-buffer))) ;We also need this closed-subroutine version (defucode activate-bitblt-buffer (parallel (activate-bitblt-buffer) (return))) (defucode activate-saved-bitblt-buffer (parallel (trap-save) ;Retry the assign,trap-if upon return #.`(sequential ,@(loop for i from (1- n-bitblt-buffers) downto 0 collect `(parallel (assign (bitblt-buffer ,i) top-of-stack-a) (decrement-stack-pointer))))) (parallel (assign first-part-done (b-constant 0)) (return))) ;Call here when done storing the bitblt-buffer (defucode deactivate-bitblt-buffer (parallel (assign bitblt-buffer-active (b-constant 0)) (assign top-of-stack top-of-stack-a) ;Could have been bashed by activate.,. (return))) (defmicro read-bb-s-word () `(parallel (assign a-temp (+ bb-width-b bb-s-bitpos)) (call read-bb-s-word1))) ;a-temp has the number of s bits needed relative to bit 0 of the first word (defucode read-bb-s-word1 (assign-vma-offset s) (parallel (assign byte-r (32- bb-s-bitpos)) (start-memory read)) (parallel (waiting-for-memory) (if (lesser-or-equal-fixnum a-temp (b-constant 32.)) ;;source is entirely within one word (parallel-with-return (abus-array-data (assign bb-s-word (logxor bb-constant (rotate memory-data byte-r))))) ;; source is split across two words (sequential (abus-array-data (assign bb-s-word (rotate memory-data byte-r))) 4,887,235 509 510 (incr-wrap-s-offset-ahead) (assign-vma-offset s-ahead) (parallel (start-memory read) ;byte-r is already ok ) (parallel (waiting-for-memory) (assign byte-s (1- a-temp))) (abus-array-data (assign bb-s-word (dpb memory-data byte-s byte-r bb-s-word))) (parallel-with-return (assign bb-s-word (logxor bb-s-word bb-constant-a))))))) ;;Assumptions about setup: ;;bb-constant has: ;; >> for constant operations (0,-1): the constant; ;; >> for operations dependent only on source or destination (x, ~x, y, ~y); ;; a 0 for x,y or -1 for ~x,~y; ;; >> for operations dependent on both s and d; 0 for those using source directly, :; and -1 for those that want the source complemented. (defucode bb-copy-stuff-to-b-side (assign bb-s-row-addr (+ bb-e-data-addr b-temp)) (parallel-with-return (assign bb-d-row-addr bb-d-data-addr))) (defmacro definst-bitblt (name source destination neither both) `(definst ,name no-operand (parallel (assign b-temp bb-s-row-offset) (call bb-copy-stuff-to-b-side)) (dispatch-after-this (parallel (ldb bb-alu-operation 4 0) ;; Set up constant needed for- the most common case (assign bb-constant (via-xbus (b-constant 0))) (assign bb-constant-a (via-xbus (b-constant 0)))) (assign bb-width-b bb-width) ((0) ;0 (goto ,neither)) ((1) ;x*y (parallel (assign bb-identity (a-constant -1)) (jump ,both))) ((2) ;~x*y (assign bb-identity (a-constant -1)) (parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1)) (jump ,both))) ((3) (return)) ;y ((4) (parallel (assign bb-identity (a-constant -1)) (jump ,both))) ((5) (goto ,source)) ;x ((6) ;x xor y (parallel (assign bb-identity (a-constant 0)) (jump ,both))) ((7) ;x+y (parallel (assign bb-identity (a-constant 0)) (jump ,both))) ((8.) ;~x*~y (assign bb-identity (a-constant -1)) (parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1)) (jump,both))) ((9.) ;~x xor y (assign bb-identity (a-constant -1)) (parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1)) (jump ,both))) ((10.) ;~x (parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1)) (jump ,source))) ((11.) ;~x+y (assign bb-identity (a-constant 0)) (parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1)) (jump ,both))) ((12.) ;~y (parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1)) (jump .destination))) ((13.) ;x+~y actually, ~(~x*y) (assign bb-identity (a-constant -1)) (parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1)) (jump ,both))) ((14.) ;~x+~y actually, ~(x*y) (parallel (assign bb-identity (a-constant -1)) (jump ,both))) ((15.) ;-1 (parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1)) (jump ,neither)))))) (definst-bitblt %bitblt-short-row ubitblt-short-row-source ubitblt-short-row-destination ubitblt-short-row-neither ubitblt-short-row-both) 4,887,235 511 512 (definst-bitblt %bitblt-long-row ubitblt-long-row-source ubitblt-long-row-destination ubitblt-long-row-neither ubitblt-long-row-both) (definst-bitblt %bitblt-long-row-backwards ubitblt-long-row-source-backwards ubitblt-long-row-destination ubitblt-long-row-neither ;direction immaterial ubitblt-long-row-backwards) (defucode ubitblt-short-row-source (read-bb-s-word) (assign a-temp (+ bb-width-b bb-d-bitpos)) (parallel (assign byte-s (- a-temp (b-constant 32.) 1)) (if (lesser-or-equal-fixnum-unsigned a-temp (b-constant 32.)) ;; destination is entirely within one word (parallel-with-d-access bb-d-offset (assign byte-s (1- bb-width)) (assign byte-r bb-d-bitpos) (parallel-with-return (store-word (dpb bb-s-word byte-s byte-r memory-data)))) ;; destination is cplit across two words ;; must access-check them both before modifying either (sequential ;; compute the high byte (parallel-with-d-access-check-write (1+ bb-d-offset) (assign byte-r bb-d-bitpos) (assign a-temp (ldb bb-s-word byte-s byte-r memory-data))) ;; compute and store the low byte (parallel-with-d-access bb-d-offset (assign byte-s (31- bb-d-bitpos)) (store-word (dpb bb-s-word byte-s byte-r memory-data) block)) ;; now store the hign byte. This cannot fault (parallel-with-return (store-word a-temp block)))))) (defucode ubitblt-short-row-destination (assign a-temp (+ bb-width-b bb-d-bitpos)) (parallel (assign byte-s (- a-temp (b-constant 32.) 1)) (if (lesser-or-equal-fixnum-unsigned a-temp (b-constant 32.)) ;; destination is entirely within one word (parallel-with-d-access bb-d-offset (assign byte-s (1- bb-width)) (assign byte-r bb-d-bitpos) (parallel-with-return (store-word (logxor (dpb bb-constant byte-s byte-r 0) memory-data)))) ;; destination is split across two words ;; must access-check them both before modifying either (sequential ;; compute the high byte (parallel-with-d-access-check-write (1+ bb-d-offset) (assign byte-r (a-constant 0)) (assign a-temp (logxor (ldb bb-constant byte-s byte-r) memory-data))) ;; compute and store the low byte (parallel-with-d-access bb-d-offset (assign byte-s (31- bb-d-bitpos)) (assign byte-r bb-d-bitpos) (store-word (logxor (dpb bb-constant byte-s byte-r 0) memory-data) block)) ;; now store the high byte. This cannot fault (parallel-with-return (store-word a-temp block)))))) ;; The alu operation is actually a constant (defucode ubitblt-short-row-neither (assign a-temp (+ bb-width-b bb-d-bitpos)) (if (lesser-or-equal-fixnum a-temp (b-constant 32.)) ;; destination is entirely within one word (parallel-with-d-access bb-d-offset (assign byte-s (1- bb-width)) (assign byte-r bb-d-bitpos) (parallel-with-return (store-word (dpb bb-constant byte-s byte-r memory-data)))) ;; destination is split across two words, but no pclcr problems since doing ;; the operation twice produces the same effect (sequential ;; store the low byte (parallel-with-d-access bb-d-offset (assign byte-s (31- bb-d-bitpos)) (assign byte-r bb-d-bitpos) (store-word (dpb bb-constant byte-s byte-r memory-data))) ;; store the high byte (parallel-with-d-access (1+ bb-d-offset) (assign byte-s (1- a-temp)) (assign byte-r (a-constant 0)) (parallel-with-return (store-word (dpb bb-constant byte-s byte-r memory-data)))))))