4,887,235 293 294 (definst numberp no-operand (if (data-type? top-of-stack-a dtp-fix dtp-float dtp-extended-number) (goto true1) (goto false1))) (definst symbolp no-operand (if (data-type? top-of-stack-a dtp-symbol dtp-nil) (goto true1) (goto false1))) (definst arrayp no-operand (if (data-type? top-of-stack-a dtp-array) (goto true1) (goto false1))) F:>LMach>Ucode>NET.LISP.71 ;;; -*- Mode:Lisp; Package:Micro; Base:8; Lowercase:yes -*- ;;; (c) Copyright 1982, Symbolics, Inc. (reserve-scratchpad-memory 2520 2531 314 324) (associate-dispatch-cues net-micro-status *net-micro-status-codes*) (define-enumerated-value-constants *net-micro-status-codes*) (defatomic-byte-field net-micro-status (4 0) %net-micro-status) (defareg %net-block-pointer) ;Pointer to next block (defareg %net-memory-address) ;Address in this block (defareg %net-word-count) ;Word count of this block ;; Packet we are receiving into or -1 (defareg %net-packet-being-received (set-type -1 dtp-fix)) ;; Packet we are transmitting or -1 (defareg %net-packet-being-transmitted (set-type -1 dtp-fix)) (defareg %net-control-address) ;Address of the control register (defareg net-dma-temp) ;;; A network unit is 512 bit times, but the board times 128 bit times, so that ;;; we must multiply by 4 (defareg %net-backoff-count) ;l2us units to back off (defareg %net-next-backoff) ;Mask of units to back off ;betwecn 2^n-1 where n is ;the nth retransmission + 2 (defbreg %net-address-1) ;Our net address (defbreg %net-address-2) (defbreg net-b-temp) (defmicro set-net-status (net-status-code) 'assign %net-micro-status (set-type ,net-status-code dtp-fix))) ;Wakeup the net service task ;This is called in the DMA task usually, but can also be called by the emulator (defmicro wakeup-net-service () '(parallel (assign service-task-requests (logior service-task-requests (b-constant (byte-mask %%service-net)))) (wakeup-task %device-service-task) )) (defmicro terminate-net-dma (net-status-code &optional (end-p t)) '(sequential (set-net-status ,net-status-code) (net-control nil ,end-p) (parallel (wakeup-net-service) (jump net-dma-dead)))) (defmicro start-net-dma (location) '(write-task-state %net-dma-task (a-constant '(build-task-state cpc ,location npc (npc-successor ,location) csp 17)))) (defmicro io-board-bug-delay () '(parallel (disable-tasking) (declare-memory-timing (next active-cycle)))) (eval-when (compile load eval) (defun net-buffer-address (dma-p dismiss-p end-p) (logior (if dma-p 1 0) (if dismiss-p 2 0) (if end-p 4 0) 10)) );eval-when compile load eval 4,887,235 295 296 (defmicro read-net-buffer (&optional (dismiss-p nil) (end-p nil)) (let ((dev-addr (net-buffer-address nil dismiss-p end-p))) '(parallel (extra-time-to-drive-lbus) (read-lbus-dev iob ,dev-addr) ,(if dismiss-p '(dismiss))))) (defmicro service-read-net-buffer (&optional (dismiss-p nil) (end-p nil)) (let ((dev-addr (net-buffer-address nil dismiss-p end-p))) '(parallel (extra-time-to-drive-lbus) (read-lbus-dev iob ,dev-addr)))) (defmicro service-net-control (&optional (dismiss-p nil) (end-p nil)) (let ((dev-addr (net-buffer-address nil dismiss-p end-p))) '(parallel (write-lbus-dev iob ,dev-addr nil)))) (defmicro transmit-dma (addr &optional (dismiss-p t) (end-p nil)) (let ((dev-addr (net-buffer-address t dismiss-p end-p))) '(parallel (start-memory read physical addr dma lob ,dev-addr) ,(if dismiss-p '(dismiss))))) (defmicro receive-dma (addr &optional (dismiss-p t) (end-p nil)) (let ((dev-addr (net-buffer-address t dismiss-p end-p))) '(parallel (start-memory write physical ,addr dma iob ,dev-addr) (assign ,addr (1+ ,addr)) ,(if dismiss-p '(dismiss))))) (defmicro net-control (&optional (input-p nil) (dismiss-p t) (end-p nil)) (let ((dev-addr (net-buffer-address nil dismiss-p end-p))) '(parallel ,(if input-p '(for-effect (read-lbus-dev lob ,dev-addr)) '(write-lbus-dev iob dev-addr nil)) ,(if dismiss-p '(dismiss))))) (defmicro increment (location &optional (fixnum-p t)) (if fixnum-p '(assign ,location (set-type (1+ ,location) dtp-fix)) '(assign ,location (1+ ,location)))) F:>lmach>ucode>nBITBLT.LISP.22 ;;; -*- Mode:Lisp; Package:Micro; Base:8; Lowercase:yes -*- ;;; (c) Copyright 1982, Symbolics, Inc. ;;;; BITBLT microcode for 3600 ;; The pclsring theory: ;; ;; Reads can be repeated with no harmful effects, writes cannot be (in most cases). ;; State is not permanently updated until a write is consummated. ;; After every write, state should be updated so that if the next memory operation ;; faults and pclsrs, that write will not be repeated (the bitblt row will be shorter). ;; To avoid the overhead of doing this for every write, we have block mode ;; operations that only update the state after writing a block of words. ;; ;; For the block mode things, we use a buffer that can be saved. See next+1 page. ;; ;; For the short-row things, when the destination is split across two words, ;; we check write access to both words before modifyin9 either of them. ;: No pclsring problems if the operation depends on neither operand. ;; :; When there is a partial word at the front, do it and then advance the arguments ;; so the bitblt is word aligned in the destination, When there is a partial word ;; at the end, when we get there the arguments have been advanced. (reserve-scratchpad-memory 2460 2470 320 330) (defmicro waiting-for-memory () ;documentation Only '(nop)) (defmicro abus-array-data (&body body) '(parallel (transport data) (check-data-type memory-data dtp-fix) ,@body)) 4,887,235 297 298 (defmicro assign-vma-offset (which &rest stuff) (selectq which (S '(assign vma (+ bb-s-row-addr bb-s-offset ,@stuff))) (0 '(assign vma (+ bb-d-row-addr bb-d-offset ,@stuff))) (S-ahead '(assign vma (+ bb-s-row-addr bb-s-offset-ahead ,@stuff))) (otherwise (ferror "assign-vma-offset knows about only S and 0, not ~S which")))) (defmicro parallel-with-s-access (offset &body body) (make-memory-access 'bb-s-row-addr 'bb-s-offset offset body '(read))) (defmicro parallel-with-d-access (offset &body body) (make-memory-access 'bb-d-row-addr 'bb-d-offset offset body '(read))) (defmicro parallel-with-d-access-check-write (offset &body body) (make-memory-access 'bb-d-row-addr 'bb-d-offset offset body (read write))) (eval-when (eval compile load) (defun make-memory-access (baseaddr offset-sym offset body memory-modes) (or (eq offset offset-sym) (equal offset '(1+ ,offset-sym)) (and (eq offset-sym 'bb-s-offset) (eq offset 'bb-s-offset-ahead)) (ferror "~S is not a recognized offset for ~S" offset offset-sym)) (let* ((body (reverse body)) (finally '(abus-array-data ,(car body)))) (do ((ll (reverse '((assign vma ,(if (atom offset) '(+ ,baseaddr ,offset) '(+ ,baseaddr ,(second offset) 1))) (start-memory ,@memory-modes) (waiting-for-memory))) (cdr ll)) (body (cdr body) (cdr body)) (l)) ((and (null ll) (null body)) '(sequential ,@l ,finally)) (cond ((null ll) (push (car body) l)) ((null body) (push (car ll) l)) (T (push '(parallel ,(car ll) ,(car body)) l)))))) );eval-when (defmicro 31- (operand) '(- (b-constant 31.) ,operand)) (defmicro incr-d-offset () '(assign bb-d-offset (1+ bb-d-offset))) (defmicro decr-d-offset () '(assign bb-d-offset (1- bb-d-offset))) (defmicro incr-wrap-s-offset () '(sequential (parallel (assign bb-s-offset (1+ bb-s-offset)) (assign b-temp-3 obus)) (if (greater-or-equal-fixnum b-temp-3 bb-s-row-length) (parallel (lisp (format T "~&>>>Wrapping around on bb-s-offset from ~d." (low32 (tr 'bb-s-offset)))) (assign bb-s-offset (b-constant 0))) (drop-through)))) (defmicro decr-wrap-s-offset () '(parallel (assign bb-s-offset (1- bb-s-offset)) (if (minus-fixnum obus) (parallel (lisp (format t "~&>>>Decr wrapping around on bb-s-offset")) (assign bb-s-offset (1- bb-s-row-length))) (drop-through)))) (defmicro incr-wrap-s-offset-ahead () (sequential (parallel (assign bb-s-offset-ahead (1+ bb-s-offset)) (assign b-temp-3 obus)) (if (greater-or-equal-fixnum b-temp-3 bb-s-row-length) (parallel (lisp (format T "~&>>>Wrapping around on bb-s-offset from ~d" (low32 (tr bb-s-offset-ahead)))) (assign bb-s-offset-ahead (b-constant 0))) (drop-through)))) (defmicro decr-wrap-s-offset-ahead () '(parallel (assign bb-s-offset-ahead (1- bb-s-offset)) (if (minus-fixnum obus) (parallel (lisp (format t "~&~>>>Decr wrapping around on bb-s-offset")) (assign bb-s-offset-ahead (1- bb-s-row-length))) (drop-through)))) 4,887,235 299 300 (defmicro store-word (datum &rest options) '(store-contents (set-type ,datum dtp-fix) not-pointer . ,options)) ;;---the goddamn simulator compiles ;; (parallel (assign ...) (return)) ;; into ;; (prog ... (return nil) (setq ...)) (defmicro parallel-with-return (&body stm) '(,(if (eq *machine-version* 'sim) 'sequential 'parallel) ,@stm (return))) (defmicro via-xbus (source) (make-microdata 'xbus (get-to-xbus source))) (defvar *fp-offset-names* ()) (defmacro def-fp-offsets (&rest names) (loop for i upfrom 0 for name in names append '((defatomicro ,name (amem (frame-pointer ,i))) (defprop ,name ,i fp-offset) (or (memq ',name *fp-offset-names*) (push ',name *fp-offset-names*))) into foo finally (return `(progn 'compile ,@foo)))) ;; decode fp offset numbers into symbols. Debugging only. (defun dfp (&rest numbers) (loop for number in numbers collect (loop for name in *fp-offset-names* when (equal (get name 'fp-offset) number) return name finally (return number)))) ;; Define arguments/state for BITBLT instructions. Note that these must be ;: relative to FP, not to the top of the stack, since there might be a ;; saved bitblt-buffer on the stack if the instruction was interrupted. (def-fp-offsets bb-arg-alu bb-arg-width bb-arg-height ;lisp arg bb-arg-from-array bb-arg-from-x bb-arg-from-y ; lisp arg bb-arq-to-array bb-arg-to-x bb-arg-to-y ;lisp arg bb-width ;ucode arg bb-s-data-addr ;ucode arg bb-s-row-offset ;ucooe arg bb-s-offset ;ucode arg bb-s-bitpos ;ucode arg bb-s-row-length ;ucode arg bb-d-data-addr ;ucode arg bb-d-offset ;ucode arg bb-d-bitpos ;ucode arg bb-event-count ;ucode arg bb-alu-operation ;ucode arg ) ;;; Some temporaries. (define-b-temps bb-constant ;Value to store or to XOR in bb-s-word ;temp (source word) bb-s-row-addr ;start of current source row bb-d-row-addr ;start of current destination row bb-width-b ;copy of width on B side (sometimes) b-block-size) ;number of words in block (defareg bb-constant-a) ;A-side copy of bb-constant (defareg bb-identity) ;Background to dpb into when doing part word (defareg bb-s-word2) ;temp (other source word) (defareg bb-a-temp) (defareg bb-s-offset-ahead) ;a-offset not finalized yet (if pclsr) (defareg a-block-size) ;number of words in block ;;; Bitblt-buffer hair (eval-when (compile load eval) (defconst n-bitblt-buffers 8)) ;XXXbrad backquote? #.'(progn 'compile ;B-memory buffer for block-mode operations . ,(loop for i from 0 below n-bitblt-buffers collect '(deltreg ,(fintern "BITBLT-BUFFER-~D" i)))) (defmicro bitblt-buffer (i) (fintern "BITBLT-BUFFER-~D" i)) ;--- this defareg goes in some other file --- ;If this register is non-zero and we pclsr, save-bitblt-buffer must be ;called after restoring the stack pointer. (defareg bitblt-buffer-active 0) 4,887,235 301 302 ;We first compute the result n words at a time into the bitblt-buffer, ;and then store it into the destination (in one case the whole buffer ;is rotated by 1 to 31 bits as it is being stored). ;The bitblt-buffer is "active" while we are storing it into the destination. ;The bitbit buffer must be active while we are modifying the destination, ;since the words copied into the buffer might overlapped with parts of ;the destination we have already modified. ; ;A pclsr while the bitblt-buffer is active will copy it into ;the stack, set first-part-done, and clear bitblt-buffer-active. ;A restart with first-part-done set will proceed normally until it comes time ;to store the bitblt-buffer. At that time, first-part-done is seen, the ;bitblt-buffer is restored from the stack (replacing the possibly-erroneous ;contents that were just computed), and execution then proceeds normally. ; ;The contents of the bitblt-buffer are assumed to have valid data type tags. ;For now, they could be forced to fixnum, but in the future we may have ;other instructions using this buffer and its save/restore mechanism. ;--- Still need to fix microcompiler to default cdr source from Bbus correctly --- ;Call here if we pclsr with the bitblt-buffer active (defucode save-bitblt-buffer #.'(sequential . ,(loop for i from 0 below n-bitblt-buffers collect '(pushval-with-cdr (bitblt-buffer ,i)))) (assign first-part-done (b-constant 1)) (parallel (assign bitblt-buffer-active (b-constant 0)) (return))) ;Call here when about to start storing the bitblt-buffer ;This is actually a micro so that the first instruction of the routine ;gets open-coded into the caller ;This is hairiiy bummed to make the normal case go in only one cycle ;(if the trap is not taken then the obus has -1 on it) (defmicro activate-bitblt-buffer () '(parallel (assign bitblt-buffer-active obus) (trap-if (bit-test frame-misc-data (b-constant (byte-mask first-part-done))) activate-saved-bitblt-buffer))) ;We also need this closed-subroutine version (defucode activate-bitblt-buffer (parallel (activate-bitblt-buffer) (return))) (defucode activate-saved-bitblt-buffer (parallel (trap-save) #.'(sequential ;Retry the assign,trap-if upon return ,@(loop for i from (1- n-bitblt-buffers) downto 0 collect '(parallel (assign (bitblt-buffer ,i) top-of-stack-a) (decrement-stack-pointer))))) (parallel (assign first-part-done (b-constant 0)) (return))) ;Call here when done storing the bitblt-buffer (defucode deactivate-bitblt-buffer (parallel (assign bitblt-buffer-active (b-constant 0)) (assign top-of-stack top-of-stack-a) ;Could hove been bashed by activate... (return))) (defmicro read-bb-s-word () '(parallel (assign a-temp (+ bb-width-b bb-s-bitpos)) (call read-bb-s-word1))) ;a-temp has the number of in bits needed relative to bit 0 of the first word (defucode read-bb-s-word1 ;XXXbrad s? (assign-vma-offset s) (parallel (assign byte-r (32- bb-s-bitpos)) (start-memory read)) (parallel (waiting-for-memory) (if (lesser-or-equal-fixnum a-temp (b-constant 32.)) ;;source is entrely within one word (parallel-with-return (abus-array-data (assign bb-s-word (logxor bb-constant (rotate memory-data byte-r))))) ;;source is split across two words (sequential (abus-array-data (assign bb-s-word (rotate memory-data byte-r))) (incr-wrap-s-offset-ahead) (assign-vma-offset s-ahead) 4,887,235 303 304 (parallel (start-memory read) ;byte-r is already Ok ) (parallel (waiting-for-memory) (assign byte-s (1- a-temp))) (abus-array-data (assign bb-s-word (dpb memory-data byte-s byte-r bb-s-word))) (parallel-with-return (assign bb-s-word (logxor bb-s-word bb-constant-a))))))) ;;Assumptions about setup: ;;bb-constant has: ;; >> for constant cperations (0,-1): the constant; ;; >> for operations dependent only on source or destination (x, ~x, y, ~y) ;; a 0 for x,y or -1 for ~x,~y; ;; >> for operations dependent on both a and d: 0 for those using source directly, ;; and -1 for those that want the source complementod. (defucode bb-copy-stuff-to-b-side (assign bb-s-row-addr (+ bb-s-data-addr b-temp)) (parallel-with-return (assign bb-d-row-addr bb-d-data-addr))) (defmacro definst-bitblt (name source destination neither both) '(definst ,name no-operand (parallel (assign b-temp bb-s-row-offset) (call bb-copy-stuff-to-b-side)) (dispatch-after-this (parallel (ldb bb-alu-operation 4 0) ;; Set up constant needed for the most common case (assign bb-constant (via-xbus (b-constant 0))) (assign bb-constant-a (via-xbus (b-constant 0)))) (assign bb-width-b bb-width) ((0) ;0 (goto ,neither)) ((1) ;x*y (parallel (assign bb-identity (a-constant -1)) (jump ,both))) ((2) ;~x*y (assign bb-identity (a-constant -1)) (parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1)) (jump ,both))) ((3) (return)) ;y ((4) ;x*~y (parallel (assign bb-identity (a-constant -1)) (jump ,both))) ((5) (goto ,source)) ((6) ;x xor y (parallel (assign bb-identity (a-constant 0)) (jump ,both))) ((7) ;x+y (parallel (assign bb-identity (a-constant 0)) (jump ,both))) ((8.) ;~x*~y (assign bb-identity (a-constant -1)) (parallel (assign bb-constant (a-constant -1)) (assign bt-constant-a (a-constant -1)) (jump ,both))) ((9.) ;~x xor y (assign bb-identity (a-constant 0)) (parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1)) (jump ,both))) ((10.) ;~x (parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1)) (jump ,source))) ((11.) ;~x+y (assign bb-identity (a-constant 0)) (parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1)) (jump ,both))) ((12.) ;~y (parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1)) (jump ,destination))) ((13.) ;x+~y actually ~(~x*y) (assign bb-identity (a-constant -1)) (parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1)) (jump ,both))) ((14.) ;~x+~y actually ~(x*y) (parallel (assign bb-identity (a-constant -1)) (jump ,both))) ((15.) ;-1 (parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1)) (jump ,neither)))))) (definst-bitblt %bitblt-short-row ubitblt-short-row-source ubitblt-short-row-destination ubitblt-short-row-neither ubitblt-short-row-both) 4,887,235 305 306 (definst-bitblt %bitblt-long-row ubitblt-long-row-source ubitblt-long-row-destination ubitblt-long-row-neither ubitblt-long-row-both) (definst-bitblt %bitblt-long-row-backwards ubitblt-long-row-source-backwards ubitblt-long-row-destination ;direction immaterial ubitblt-long-row-neither ubitblt-long-row-both-backwards) (defucode ubitblt-short-row-source (read-bb-s-word) (assign a-temp (+ bb-width-b bb-d-bitpos)) (parallel (assign byte-s (- a-temp (b-constant 32.) 1)) (if (lesser-or-equal-fixnum-unsigned a-temp (b-constant 32.)) ;; destination is entirely within one word (parallel-with-d-access bb-d-offset (assign byte-s (1- bb-width)) (assign byte-r bb-d-bitpos) (parallel-with-return (store-word (dpb bb-s-word byte-s byte-r memory-data)))) :; destination is split across two words ;; must access-check them both before modifying either (sequential ;; compute the high byte (parallel-with-d-access-check-write (1+ bb-d-offset) (assign byte-r bb-d-bitpos) (assign a-temp (ldb bb-s-word byte-s byte-r memory-data))) ;; compute and store the low byte (parallel-with-d-access bb-d-offset (assign byte-s (31- bb-d-bitpos)) (store-word (dpb bb-s-word byte-s byte-r memory-data) block)) ;; now store the high byte. This cannot fault (parallel-with-return (store-word a-temp block)))))) (defucode ubitblt-short-row-destination (assign a-temp (+ bb-width-b bb-d-bitpos)) (parallel (assign byte-s (- a-temp (b-constant 32.) 1)) (if (lesser-or-equal-fixnum-unsigned a-temp (b-constant 32.)) ;; destination is entirely within one word (parallel-with-d-access bb-d-offset (assign byte-s (1- bb-width)) (assign byte-r bb-d-bitpos) (parallel-with-return (store-word (logxor (dpb bb-constant byte-s byte-r 0) memory-data)))) ;; destination is split across two words ;; must access-check them both before modifying either (sequential ;; compute the hich byte (parallel-with-d-access-check-write (1+ bb-d-offset) (assign type-r (a-constant 0)) (assign a-temp (logxor (ldb bb-constant byte-s byte-r) memory-data))) ;; compute and store the low byte (parallel-with-d-access bb-d-offset (assign byte-s (31- bb-d-bitpos)) (assign byte-r bb-d-bitpos) (store-word (logxor (dpb bb-constant byte-s byte-r 0) memory-data) block)) ;; now store the high byte. This cannot fault (parallel-with-return (store-word a-temp block)))))) ;; The alu operation is actually a constant (defucode ubitblt-short-row-neither (assign a-temp (+ bb-width-b bb-d-bitpos)) (if (lesser-or-equal-fixnum a-temp (b-constant 32.)) ;; destination is entirely within one word (parallel-with-d-access bb-d-offset (assign byte-s (1- bb-width)) (assign byte-r bt-d-bitpos) (parallel-with-return (store-word (dpb bb-constant byte-s byte-r memory-data)))) ;; destination is split across two words, but no pclsr problems since doing ;; the operation twice produces the same effect (sequential ;; store the low byte (parallel-with-d-access bb-d-offset (assign byte-s (31- bb-d-bitpos)) (assign byte-r bb-d-bitpos) (store-word (dpb bb-constant byte-s byte-r memory-data))) ;; store the high byte (parallel-with-d-access (1+ bb-d-offset) (assign byte-s (1- a-temp)) (assign byte-r (a-constant 0)) (parallel-with-return (store-word (dpb bb-constant byte-s byte-r memory-data))))))) 4,887,235 307 308 ;; The alu operation depends upon both source and destination bits (defucode ubitblt-short-row-both (read-bb-s-word) (assign a-temp (+ bb-width-b bb-d-bitpos)) (if (lesser-or-equal-fixnum a-temp (b-constant 32.)) ;; destination is entirely within one word (sequential (assign byte-s (1- bb-width)) (assign byte-r bb-d-bitpos) (parallel (assign-vma-offset d) (jump bb-byte-alu-operation-dispatch))) ;jcall ;; destination is split across two words (sequential ;; make sure we have write access to the high byte so no pclsr after storing low (assign-vma-offset d 1) (start-memory read write) ;; store the low byte (assign byte-s (31- bb-d-bitpos)) (assign byte-r bb-d-bitpos) (parallel (assign-vma-offset d) (call bb-byte-alu-operation-dispatch)) ;; store the high byte (assign bb-s-word (rotate bb-s-word byte-r)) (assign byte-s (1- a-temp)) (assign byte-r (b-constant 0)) (parallel (assign-vma-offset d 1) (jump bb-byte-alu-operation-dispatch))))) ;jcall ;(boole fn x y ...) if fn is "abcd" then ; y 0 1 2 3 4 5 6 7 ; | 0 1 0 x*y ~x*y y x*~y x x#y x+y ; ---------- ; 0 | a c 8 9 l0 11 12 13 14 15 ; x | ~(x+y) ~(x#y) ~x ~x+y ~y x+~y ~x+~y -1 ; 1 | b d ;;vma and byte regs have been set up already, for DPB. ;;trashes a-temp-2, b-temp-2, b-temp-3, but not a-temp and b-temp. (defucode bb-byte-alu-operation-dispatch (dispatch-after-this (parallel (start-memory read) (ldb bb-alu-operation 4 0)) (parallel (assign b-temp-3 (dpb bb-s-word byte-s byte-r bb-identity)) (waiting-for-memory)) ((1 2) ;;l x*y logand ;l2 ~x*y logand (parallel-with-return (parallel (declare-memory-timing data-cycle) (abus-array-data (store-word (logand memory-data b-temp-3)))))) ((4 8.) ;;4 ~(~x+y) = x*~y andc2 ;;8 ~(x+y) = ~xx*~y andcb (parallel (declare-memory-timing data-cycle) (abus-array-data (assign a-temp-2 memory-data))) (assign b-temp-2 (dpb (b-constant -1) byte-s byte-r 0)) ;can't merge this... (assign a-temp-2 (logxor a-temp-2 b-temp-2)) ;...with this. (parallel-with-return (store-word (logand a-temp-2 b-temp-3)))) ((6 9.) ;;6 x#y logxor ;;9 ~(x#y)=~x#y logxor (parallel-with-return (parallel (declare-memory-timing data-cycle) (abus-array-data (store-word (logxor b-temp-3 memory-data)))))) ((7 11.) ;;7 x+y logior ;;11 ~x+y logior (parallel-with-return (parallel (declare-memory-timing data-cycle) (abus-array-data (store-word (logior b-temp-3 memory-data)))))) ((13. 14.) ;;13 x+~y = ~(~x*y) lognand ;;14 ~x+~y=~(x*y) (parallel (declare-memory-timing data-cycle) (abus-array-data (assign a-temp-2 (logand b-temp-3 memory-data)))) (parallel-with-return (store-word (logxor (dpb (b-constant -1) byte-s byte-r 0) a-temp-2)))))) ;;vma has been set up already (defucode bt-word-alu-operation-dispatch ;commonly 3 cycles (plus 1 for the call) (dispatch-after-this (parallel (start-memory read) (ldb bt-alu-operation 4 0)) (waiting-for-memory) ;---want to use this somehow... ((1 2) ;;1 x*y logand ;;2 ~x:y logand (parallel (declare-memory-timing data-cycle) (abus-array-data (store-word (logand bb-s-word memory-data))) (return))) 4,887,235 309 310 ((4 8.) ;;4 x*~y andcb ;;8 ~(x+y) ~x*~y andcb (parallel (declare-memory-timing data-cycle) (abus-array-data (store-word (anac2 bb-s-word memory-data))) (return))) ((6 9.) ;;6 x#y logxor ;;9 ~(x#y)=~x#ylogxor (parallel (declare-memory-timing data-cycle) (abus-array-data (store-word (logxor bb-s-word memory-data))) (return))) ((7 11.) ;;7 x+y logior ;;1l ~x+y logior (parallel (declare-memory-timing data-cycle) (abus-array-data (store-word (logior bb-s-word memory-data))) (return))) ((13. 14.) ;;13 x+~y = ~(~x*y) ;;14 ~x+~y=~(x*y) (parallel (declare-memory-timing data-cycle) (abus-array-data (store-word (lognand bb-s-word memory-data))) (return))))) ;;alu depends only on source bits (defucode ubitblt-long-row-source (parallel (assign b-temp bb-d-bitpos) (if (zero-fixnum bb-d-bitpos) (if (zero-fixnum bb-s-bitpos) (goto ubitblt-aligned-row-source) ;; SSSSSSSSSSSSSSSSSSSSSSSSSsssssss ;; dddddddddddddddddddddddddddddddd (parallel-with-s-access bb-s-offset (assign byte-r (32- bb-s-bitpos)) (parallel (assign bb-s-word2 (logxor bb-constant (rotate memory-data byte-r))) (lisp (trace-path #/c)) (jump ubitblt-d-aligned-row-source)))) (if (equal-fixnum b-temp bb-s-bitpos) ;;SSSSSSSSSSSSSSSSSSSSSSSSSsssssss ;;DDDDDDDDDDDDDDDDDDDDDDDDDddddddd (sequential (parallel-with-s-access bb-s-offset (assign b-temp (32- bt-d-bitpos)) (assign byte-r b-temp) (assign bb-s-word (logxor bb-constant (rotate memory-data tyte-r)))) (parallel-with-d-access bb-d-offset (assign byte-r bb-d-bitpos) (assign byte-s (1- t-temp)) (store-word (dpb bb-s-word byte-s byte-r memory-data))) ;;First partial word done, we are now the aligned case (incr-wrap-s-offset) (incr-d-offset) (assign bb-width (- bb-width b-temp)) (assign bb-s-bitpos (b-constant 0)) (parallel (assign bb-d-bitpos (b-constant 0)) (lisp (trace-path #/b)) (jump ubitbit-aligned-row-source))) (if (lesser-fixnum bb-s-bitpos b-temp) ;;sssssssssSSSSSSSSSSSSSSS........ ;; DDDDDDDDDDDDDDDdddddddddddddddd ;; <- 32-d.bitpos -> (sequential (parallel-with-s-access bb-s-offset (assign byte-r (32- bb-s-bitpos)) (assign b-temp (32- bb-d-bitpos)) (assign bb-s-word (logxor bb-constant (rotate memory-data byte-r)))) ;;.......sssssssssSSSSSSSSSSSSSSSS (parallel-with-d-access bb-d-offset (assign byte-r bb-d-bitpos) (assign byte-s (1- b-temp)) (store-word (dpb bb-s-word byte-s byte-r memory-data))) ;; First partial B word done, some S bits from first word remain (incr-d-offset) ;;rotate s-word further to right by 32-d.bitpos = left by -(32-d.bitpos) ;;SSSSSSSSSSSSSSSS.......sssssssss (assign bb-s-word2 (rotate bb-s-word byte-r)) (assign bb-s-bitpos (+ bb-s-bitpos b-temp)) (assign bb-width (- bb-width b-temp)) (parallel (assign bb-d-bitpos (b-constant 0))) (lisp (trace-path #/d)) (jump ubitblt-d-aligned-row-source)) (sequential ;;The high part of the first source word is not as long as the high part of the ;;first destination word. So extract the useful part of the firs t source word. ;:and deposit into it as much of the cecond source word as needed to fill Out the rest ;;of the first destination word. Then position the rest of the second source word ;;appropriately for the inner loop. 4,887,235 311 312 ;; <- 32-s -> ;; ................................|SSSSSSSSSSssssssssssssssssssssss ;; DDDDDDDDDDDDDDDD DDDDDDDDDDdddddd (parallel-with-s-access bb-s-offset (assign byte-r (32- bb-s-bitpos)) (assign b-temp-2 bb-s-bitpos) (assign bb-s-word (logxor bb-constant (rotate memory-data tyte-r)))) (incr-wrap-s-offset-ahead) ;; <----- s-d ----> <- 32-s -> (32-d)-(32-s)-s-d ;; ssssssssssssssssSSSSSSSSSSSSSSSS|1111111111...................... ;; DDDDDDDDDDDDDDDD DDDDDDDDDDdddddd (parallel-with-s-access bb-s-offset-ahead (assign byte-r (32- bb-s-bitpos)) (assign byte-s (- b-temp-2 bt-d-bitpos 1)) (assign bb-s-word2 (logxor bb-constant memory-data))) (assign bb-s-word (dpb bb-s-word2 byte-s byte-r bb-s-word)) (parallel (assign a-temp (32- bt-d-bitpos)) (assign b-temp obus)) (parallel-with-d-access bb-d-offset (assign byte-r bb-d-bitpos) (assign byte-s (1- a-temp)) (store-word (dpb bb-s-word byte-s byte-r memory-data))) ;; We have now done the first partial D word. Turn into the d-aligned ;; case, with the source advanced by one word from where it started. (incr-d-offset) (assign bb-s-offset bb-s-offset-ahead) (assign bb-s-bitpos (- b-temp-2 bt-d-bitpos)) (assign byte-r (32- bb-s-bitpos)) (assign bb-s-word2 (rotate bb-s-word2 byte-r)) (assign bb-width (- bb-width t-temp)) (parallel (assign bb-d-bitpos (b-constant 0)) (lisp (trace-patn #/e)) (jump ubitblt-d-aligned-row-source)))))))) (defucode ubitblt-aligned-row-source ;28 cycles per 8 words (if (greater-or-equal-fixnum bb-width (b-constant (* 8. 32.))) ;;Fetch a block of words into the buffer (sequential (assign b-temp (+ bb-s-offset (b-constant 8.))) (if (lesser-fixnum bb-s-row-length b-temp) (goto ubitblt-aligned-row-source-slow-loop) (sequential (parallel (assign-vma-offset s) (call ubitblt-block-read-8)) (parallel (assign-vma-offset d) (call ubitblt-block-write-8)) (parallel (assign bb-s-offset (+ bb-s-offset b-block-size)) (jump ubitblt-aligned-row-source))))) ;;Frob with whats left. Too bad dispatch blocks are expensive. (if (greater-or-equal-fixnum bb-width (b-constant (* 4 32.))) (sequential (assign b-temp (+ bb-s-offset (b-constant 4))) (if (lesser-fixnum bb-s-row-length b-temp) (goto ubitblt-aligned-row-source-slow-loop) (sequential (parallel (assign-vma-offset s) (call ubitblt-block-read-4)) (parallel (assign-vma-offset d) (call bitblt-block-write-4)) (parallel (assign bb-s-offset (+ bb-s-offset b-block-size)) (jump ubitblt-aligned-row-source-slow-loop))))) (goto ubitblt-aligned-row-source-slow-loop)))) (defucode ubitblt-aligned-row-source-slow-loop ;10 cycles per word (parallel-with-s-access bb-s-offset (trap-if (lesser-fixnum bb-width (b-constant 32.)) ubitblt-aligned-row-source-slow-loop-done) (waiting-for-memory) (assign bb-s-word (logxor bb-constant memory-data))) (assign-vma-offset d) ;1 (store-word bb-s-word) ;1 (assign bb-width (- bb-width (b-constant 32.))) ;1 (incr-wrap-s-offset) ;2 (parallel ;1 (incr-d-offset) (lisp (trace-path #/,)) (jump ubitblt-aligned-row-source-slow-loop))) ;Do last partial word, if any (defucode ubitblt-aligned-row-source-slow-loop-done (if (plus-fixnum bb-width) (sequential