[(rnrs bytevectors) implementation atsmyles@earthlink.net**20090501200740 This uses the bytevectors implementation from r6rs.org reference implementation. Made various changes as necessary. ] hunk ./compile-r6rs 24 -(define core-libraries (path-expand* "libs/core" "core.scm" "r5rs.scm" "records.scm" "hashtables.scm")) +(define core-libraries (path-expand* "libs/core" "core.scm" "r5rs.scm" "records.scm" "hashtables.scm" "bytevectors.scm")) hunk ./compile-r6rs 61 + hunk ./compile-r6rs 66 +(define scenary-files (list (path-expand "srfi-66.scm"))) + hunk ./compile-r6rs 70 -(apply compile-files-to-c stage-files) +(apply compile-files-to-c (append stage-files scenary-files)) hunk ./compile-r6rs 82 -(apply compile-r6rs-base (append stage-files play-files)) +(apply compile-r6rs-base (append stage-files scenary-files play-files)) hunk ./compile-r6rs-libs 25 - (path-expand* "libs/srfi/" "srfi-0.scm" "srfi-2.scm" "srfi-21.scm" "srfi-18.scm" "srfi-23.scm" "srfi-27.scm" "srfi-39.scm" "srfi-4.scm" "srfi-6.scm" "srfi-69.scm" "srfi-8.scm" "srfi-88.scm" "srfi-89.scm" "srfi-99.scm" "srfi-9.scm")) + (path-expand* "libs/srfi/" "srfi-0.scm" "srfi-2.scm" "srfi-21.scm" "srfi-18.scm" "srfi-23.scm" "srfi-27.scm" "srfi-39.scm" "srfi-4.scm" "srfi-6.scm" "srfi-66.scm" "srfi-69.scm" "srfi-8.scm" "srfi-88.scm" "srfi-89.scm" "srfi-99.scm" "srfi-9.scm")) hunk ./compile-r6rs-libs 28 - (path-expand* "libs/rnrs/" "exceptions.scm" "conditions.scm" "base.scm" "records.scm" "syntax-case.scm" "control.scm" "hashtables.scm" "lists.scm" "enumerations.scm" "arithmetic.scm" "bytevectors.scm" "eval.scm" "r5rs.scm" "mutable.scm")) + (path-expand* "libs/rnrs/" "exceptions.scm" "conditions.scm" "base.scm" "records.scm" "syntax-case.scm" "control.scm" "hashtables.scm" "lists.scm" "enumerations.scm" "arithmetic.scm" "eval.scm" "r5rs.scm" "mutable.scm")) hunk ./compile-r6rs-libs 33 +(define bytevector-libraries + (path-expand* "libs/rnrs/bytevectors" "bytevector-core.scm" "bytevector-proto.scm" "bytevector-ieee.scm" "bytevector-string.scm" "bytevector.scm")) + hunk ./compile-r6rs-libs 49 - (append gambit-libraries standard-libraries0 unicode-libraries standard-libraries srfi-libraries misc-libraries)) + (append gambit-libraries standard-libraries0 bytevector-libraries unicode-libraries standard-libraries srfi-libraries misc-libraries)) hunk ./docs/index.html 74 +
  • (rnrs bytevectors)
  • hunk ./docs/index.html 98 -
  • (rnrs bytevectors)
  • addfile ./libs/core/bytevectors.scm hunk ./libs/core/bytevectors.scm 1 +;This is a superset of srfi-66 +(library (core bytevectors) + (export u8vector? make-u8vector u8vector u8vector->list list->u8vector u8vector-length u8vector-ref u8vector-set! u8vector=? u8vector-compare u8vector-copy! u8vector-copy native-endianness u8vector-fill!) + (import (primitives u8vector? make-u8vector u8vector u8vector->list list->u8vector u8vector-length u8vector-ref u8vector-set! u8vector=? u8vector-compare u8vector-copy! u8vector-copy native-endianness u8vector-fill!))) + adddir ./libs/rnrs/bytevectors hunk ./libs/rnrs/bytevectors.scm 1 -(library (rnrs bytevectors) - (export - ;general options - bytevector=? bytevector-copy! endianness native-endianness - (rename (u8vector? bytevector?) (make-u8vector make-bytevector) (u8vector-length bytevector-length) (u8vector-fill! bytevector-fill!) (u8vector-copy bytevector-copy)) - ;operations on bytes and octets - bytevector-s8-ref bytevector-s8-set! - (rename (u8vector-ref bytevector-u8-ref) (u8vector-set! bytevector-u8-set!) (u8vector->list bytevector-u8list) (list->u8vector u8-list->bytevector)) - ;operations on integers of arbitrary size - bytevector-uint-ref bytevector-sint-ref bytevector-uint-set! bytevector-sint-set! bytevector->uint-list bytevector->sint-list uint-list->bytevector sint-list->bytevector - ;operations on 16 bit integers - bytevector-u16-ref bytevector-s16-ref bytevector-u16-native-ref bytevector-s16-native-ref bytevector-u16-set! bytevector-s16-set! bytevector-u16-native-set! bytevector-s16-native-set! - ;operations on 32 bit integers - bytevector-u32-ref bytevector-s32-ref bytevector-u32-native-ref bytevector-s32-native-ref bytevector-u32-set! bytevector-s32-set! bytevector-u32-native-set! bytevector-s32-native-set! - ;operations on 64 bit integers - bytevector-u64-ref bytevector-s64-ref bytevector-u64-native-ref bytevector-s64-native-ref bytevector-u64-set! bytevector-s64-set! bytevector-u64-native-set! bytevector-s64-native-set! - ;operations on IEEE-754 - bytevector-ieee-single-native-ref bytevector-ieee-single-ref bytevector-ieee-double-native-ref bytevector-ieee-double-ref bytevector-ieee-single-native-set! bytevector-ieee-single-set! bytevector-ieee-double-native-set! bytevector-ieee-double-set! - ;operations on strings - string->utf8 string->utf16 string->utf32 utf8->string utf16->string utf32->string) - (import - (rnrs base) - (rnrs control) - (gambit bytevectors) - (gambit io) - (gambit debug)) - - (define (endianness x) - (case x - ((big little native) x) - (else (error 'endianness "Endianness must be big, little or native")))) - - (define (native-endianness) 'native) - - (define (bytevector=? bytevector1 bytevector2) (equal? bytevector1 bytevector2)) - (define (bytevector-copy! source source-start target target-start k) (error 'bytevector-copy! "Not implemented")) - - (define (bytevector-s8-ref bytevector k) (error 'bytevector-s8-ref "Not implemented")) - - (define (bytevector-s8-set! bytevector k byte) (error 'bytevector-s8-set! "Not implemented")) - (define (bytevector-uint-ref bytevector k endianness size) (error 'bytevector-uint-ref "Not implemented")) - (define (bytevector-sint-ref bytevector k endianness size) (error 'bytevector-sint-ref "Not implemented")) - (define (bytevector-uint-set! bytevector k n endianness size) (error 'bytevector-uint-set! "Not implemented")) - (define (bytevector-sint-set! bytevector k n endianness size) (error 'bytevector-sint-set! "Not implemented")) - (define (bytevector->uint-list bytevector endianness size) (error 'bytevector->uint-list "Not implemented")) - (define (bytevector->sint-list bytevector endianness size) (error 'bytevector->sint-list "Not implemented")) - (define (uint-list->bytevector list endianness size) (error 'uint-list->bytevector "Not implemented")) - (define (sint-list->bytevector list endianness size) (error 'sint-list->bytevector "Not implemented")) - ;16bit - (define (bytevector-u16-ref bytevector k endianness) (bytevector-uint-ref bytevector k endianness 16)) - (define (bytevector-s16-ref bytevector k endianness) (bytevector-sint-ref bytevector k endianness 16)) - (define (bytevector-u16-native-ref bytevector k) (bytevector-uint-ref bytevector k (native-endianness) 16)) - (define (bytevector-s16-native-ref bytevector k) (bytevector-sint-ref bytevector k (native-endianness) 16)) - (define (bytevector-u16-set! bytevector k n endianness) (bytevector-uint-set! bytevector k n endianness 16)) - (define (bytevector-s16-set! bytevector k n endianness) (bytevector-sint-set! bytevector k n endianness 16)) - (define (bytevector-u16-native-set! bytevector k n) (bytevector-uint-set! bytevector k n (native-endianness) 16)) - (define (bytevector-s16-native-set! bytevector k n) (bytevector-sint-set! bytevector k n (native-endianness) 16)) - ;32bit - (define (bytevector-u32-ref bytevector k endianness) (bytevector-uint-ref bytevector k endianness 32)) - (define (bytevector-s32-ref bytevector k endianness) (bytevector-sint-ref bytevector k endianness 32)) - (define (bytevector-u32-native-ref bytevector k) (bytevector-uint-ref bytevector k (native-endianness) 32)) - (define (bytevector-s32-native-ref bytevector k) (bytevector-sint-ref bytevector k (native-endianness) 32)) - (define (bytevector-u32-set! bytevector k n endianness) (bytevector-uint-set! bytevector k n endianness 32)) - (define (bytevector-s32-set! bytevector k n endianness) (bytevector-sint-set! bytevector k n endianness 32)) - (define (bytevector-u32-native-set! bytevector k n) (bytevector-uint-set! bytevector k n (native-endianness) 32)) - (define (bytevector-s32-native-set! bytevector k n) (bytevector-sint-set! bytevector k n (native-endianness) 32)) - ;64bit - (define (bytevector-u64-ref bytevector k endianness) (bytevector-uint-ref bytevector k endianness 64)) - (define (bytevector-s64-ref bytevector k endianness) (bytevector-sint-ref bytevector k endianness 64)) - (define (bytevector-u64-native-ref bytevector k) (bytevector-uint-ref bytevector k (native-endianness) 64)) - (define (bytevector-s64-native-ref bytevector k) (bytevector-sint-ref bytevector k (native-endianness) 64)) - (define (bytevector-u64-set! bytevector k n endianness) (bytevector-uint-set! bytevector k n endianness 64)) - (define (bytevector-s64-set! bytevector k n endianness) (bytevector-sint-set! bytevector k n endianness 64)) - (define (bytevector-u64-native-set! bytevector k n) (bytevector-uint-set! bytevector k n (native-endianness) 64)) - (define (bytevector-s64-native-set! bytevector k n) (bytevector-sint-set! bytevector k n (native-endianness) 64)) - ;ieee-754 - (define (bytevector-ieee-single-native-ref bytevector k) (error 'bytevector-ieee-single-native-ref "Not implemented")) - (define (bytevector-ieee-single-ref bytevector k endianness) (error 'bytevector-ieee-single-ref "Not implemented")) - (define (bytevector-ieee-double-native-ref bytevector k) (error 'bytevector-ieee-double-native-ref "Not implemented")) - (define (bytevector-ieee-double-ref bytevector k endianness) (error 'bytevector-ieee-double-ref "Not implemented")) - (define (bytevector-ieee-single-native-set! bytevector k x) (error 'bytevector-ieee-single-native-set! "Not implemented")) - (define (bytevector-ieee-single-set! bytevector k x endianness) (error 'bytevector-ieee-single-set! "Not implemented")) - (define (bytevector-ieee-double-native-set! bytevector k x) (error 'bytevector-ieee-double-native-set! "Not implemented")) - (define (bytevector-ieee-double-set! bytevector k x endianness) (error 'bytevector-ieee-double-set! "Not implemented")) - ;strings - (define (string->utf8 string) - (call-with-output-u8vector '(char-encoding: UTF-8) - (lambda (p) - (display string p)))) - - (define (string->utf16 string . endian) - (call-with-output-u8vector - (if (null? endian) - '(char-encoding: UTF-16) - (case (endianness (car endian)) - ((big) '(char-encoding: UTF-16BE)) - ((little) '(char-encoding: UTF-16LE)) - (else '(char-encoding: UTF-16)))) - (lambda (p) - (display string p)))) - - (define (string->utf32 string . endian) - (call-with-output-u8vector - (if (null? endian) - '(char-encoding: UCS-4) - (case (endianness (car endian)) - ((big) '(char-encoding: UCS-4BE)) - ((little) '(char-encoding: UCS-4LE)) - (else '(char-encoding: UCS-4)))) - (lambda (p) - (display string p)))) - - (define (utf8->string bytevector) - (define buffer-size 512) - (call-with-input-u8vector `(init: ,bytevector char-encoding: UTF-8 permanent-close: #t) - (lambda (in) - (define out (open-output-string '())) - (define buffer (make-string buffer-size)) - (do ((i (read-substring buffer 0 buffer-size in) (read-substring buffer 0 buffer-size in))) - ((< i buffer-size) - (write-substring buffer 0 i out) - (display "getting output") - (force-output out) - (close-port out) - (get-output-string out)) - (pp i) - (pp buffer) - (write-substring buffer 0 i out))))) - - ;fixme - (define (utf16->string bytevector endianness . manditory) - (with-input-from-u8vector (list init: bytevector char-encoding: "UTF-16") - (read-line (current-input-port) #f))) - - ;fixme - (define (utf32->string bytevector endianness . manditory) - (with-input-from-u8vector (list init: bytevector char-encoding: "UCS-4") - (read-line (current-input-port) #f))) -) rmfile ./libs/rnrs/bytevectors.scm addfile ./libs/rnrs/bytevectors/bytevector-core.scm hunk ./libs/rnrs/bytevectors/bytevector-core.scm 1 +; Bytevectors + +; Copyright (C) Michael Sperber (2005). All Rights Reserved. +; +; Permission is hereby granted, free of charge, to any person +; obtaining a copy of this software and associated documentation files +; (the "Software"), to deal in the Software without restriction, +; including without limitation the rights to use, copy, modify, merge, +; publish, distribute, sublicense, and/or sell copies of the Software, +; and to permit persons to whom the Software is furnished to do so, +; subject to the following conditions: +; +; The above copyright notice and this permission notice shall be +; included in all copies or substantial portions of the Software. +; +; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +; BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +; ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +; CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +; SOFTWARE. + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Modified for Larceny by William D Clinger, beginning 2 August 2006. +; +; This file is Larceny-specific: it uses bytevectors. +; +; Although bytevectors could be implemented as records +; that encapsulate a vector, that representation would +; require 4 to 8 times as much space as a native representation, +; and I believe most systems already have something analogous to +; bytevectors. It therefore seems reasonable to expect +; implementors to rewrite the core operations that are defined +; in this file. +; +; For a big-endian implementation, the other files should work +; as is once the core operations of this file are implemented. +; +; If the (native-endianness) is to be little, then some parts +; of the bytes-ieee.sch file will have to be converted. +; + +(library (rnrs bytevectors core) + (export native-endianness + (rename (u8vector? bytevector?) (make-u8vector make-bytevector) (u8vector-length bytevector-length) (u8vector-ref bytevector-u8-ref) (u8vector-set! bytevector-u8-set!) (quotient bytevector:div) (remainder bytevector:mod))) + (import (r5rs) (core bytevectors)) +) addfile ./libs/rnrs/bytevectors/bytevector-ieee.scm hunk ./libs/rnrs/bytevectors/bytevector-ieee.scm 1 +; Copyright 2006 William D Clinger. +; +; Permission to copy this software, in whole or in part, to use this +; software for any lawful purpose, and to redistribute this software +; is granted subject to the restriction that all copies made of this +; software must include this copyright notice in full. +; +; I also request that you send me a copy of any improvements that you +; make to this software so that they may be incorporated within it to +; the benefit of the Scheme community. +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; (bytevector-ieee) +; +; This file defines the operations of (r6rs bytevector) that +; have to do with IEEE-754 single and double precision +; floating point numbers. +; +; The definitions in this file should work in systems +; that use IEEE-754 double precision to represent inexact +; reals, and will probably work or come close to working +; with other floating point representations. Although +; they assume big-endian is the native representation, +; as in bvec-proto.sch, none of the code in this file +; depends upon the bit-level representation of inexact +; reals. (The code *does* depend upon the bit-level +; representation of IEEE-754 single and double precision, +; but that is an entirely different matter.) +; +; The representation-independent definitions in this file +; are far less efficient than a representation-dependent +; implementation would be. For reasonable efficiency, +; the following procedures should be redefined: +; +; bytevector-ieee-single-native-ref +; bytevector-ieee-double-native-ref +; bytevector-ieee-single-native-set! +; bytevector-ieee-double-native-set! +; +; Since those four procedures are easier to implement in +; machine language, it did not seem worthwhile to try to +; optimize their semi-portable definitions in this file. +; +; To simplify bootstrapping, this file uses R5RS arithmetic +; instead of R6RS fixnum and flonum operations. +; +; Known bugs: +; +; The -set! procedures perform double rounding for +; denormalized numbers. Redefining the -native-set! +; procedures in machine language will eliminate those +; bugs. +; + +(library (rnrs bytevectors ieee) + + (export bytevector-ieee-single-native-ref bytevector-ieee-single-ref + bytevector-ieee-double-native-ref bytevector-ieee-double-ref + bytevector-ieee-single-native-set! bytevector-ieee-single-set! + bytevector-ieee-double-native-set! bytevector-ieee-double-set!) + + (import (rnrs base) (rnrs control) (r5rs) + (rnrs bytevectors core) (rnrs bytevectors proto)) + + +(define (unspecified) (if #f #f)) + +; FIXME: these definitions are temporary, in case the +; infinite? and nan? procedures aren't yet in a system's +; preliminary version of (r6rs base). + +(define (bytevector:nan? x) + (and (real? x) + (not (= x x)))) + +(define (bytevector:infinite? x) + (and (real? x) + (not (bytevector:nan? x)) + (bytevector:nan? (- x x)))) + +; Magic numbers for IEEE-754 single and double precision: +; the exponent bias (127 or 1023) +; the integer value of the hidden bit (2^23 or 2^52) + +(define bytevector:single-maxexponent 255) +(define bytevector:single-bias (bytevector:div bytevector:single-maxexponent 2)) +(define bytevector:single-hidden-bit 8388608) + +(define bytevector:double-maxexponent 2047) +(define bytevector:double-bias (bytevector:div bytevector:double-maxexponent 2)) +(define bytevector:double-hidden-bit 4503599627370496) + +; Given four exact integers, returns +; +; (-1)^sign * (2^exponent) * p/q +; +; as an inexact real. + +(define (bytevector:normalized sign exponent p q) + (let* ((p/q (exact->inexact (/ p q))) + (x (* p/q (expt 2.0 exponent)))) + (cond ((= sign 0) x) + ((= x 0.0) -0.0) + (else (- x))))) + +; Given exact positive integers p and q, +; returns three values: +; exact integers exponent, p2, and q2 such that +; q2 <= p2 < q2+q2 +; p / q = (p2 * 2^exponent) / q2 + +(define (bytevector:normalized-ieee-parts p q) + (cond ((< p q) + (do ((p p (+ p p)) + (e 0 (- e 1))) + ((>= p q) + (values e p q)))) + ((<= (+ q q) p) + (do ((q q (+ q q)) + (e 0 (+ e 1))) + ((< p (+ q q)) + (values e p q)))) + (else + (values 0 p q)))) + +; Given an inexact real x, an exponent bias, and an exact positive +; integer q that is a power of 2 representing the integer value of +; the hidden bit, returns three exact integers: +; +; sign +; biased-exponent +; p +; +; If x is normalized, then 0 < biased-exponent <= bias+bias, +; q <= p < 2*q, and +; +; x = (-1)^sign * (2^(biased-exponent - bias)) * p/q +; +; If x is denormalized, then p < q and the equation holds. +; If x is zero, then biased-exponent and p are zero. +; If x is infinity, then biased-exponent = bias+bias+1 and p=0. +; If x is a NaN, then biased-exponent = bias+bias+1 and p>0. +; + +(define (bytevector:ieee-parts x bias q) + (cond ((bytevector:nan? x) + (values 0 (+ bias bias 1) (- q 1))) + ((bytevector:infinite? x) + (values (if (positive? x) 0 1) (+ bias bias 1) 0)) + ((zero? x) + (values (if (eqv? x -0.0) 1 0) 0 0)) + (else + (let* ((sign (if (negative? x) 1 0)) + (y (inexact->exact (abs x))) + (num (numerator y)) + (den (denominator y))) + (call-with-values + (lambda () (bytevector:normalized-ieee-parts num den)) + (lambda (exponent num den) + (let ((biased-exponent (+ exponent bias))) + (cond ((< 0 biased-exponent (+ bias bias 1)) + ; within the range of normalized numbers + (if (<= den q) + (let* ((factor (/ q den)) + (num*factor (* num factor))) + (if (integer? factor) + (values sign biased-exponent num*factor) + (error 'bytevector:ieee-parts + "this shouldn't happen: " x bias q))) + (let* ((factor (/ den q)) + (num*factor (/ num factor))) + (values sign + biased-exponent + (round num*factor))))) + ((>= biased-exponent (+ bias bias 1)) + ; infinity + (values (if (positive? x) 0 1) (+ bias bias 1) 0)) + (else + ; denormalized + ; FIXME: this has the double rounding bug + (do ((biased biased-exponent (+ biased 1)) + (num (round (/ (* q num) den)) + (round (bytevector:div num 2)))) + ((and (< num q) (= biased 1)) + (values sign biased num)))))))))))) + +; The exported procedures + +(define (bytevector-ieee-single-native-ref bytevector k) + (let ((b0 (bytevector-u8-ref bytevector k)) + (b1 (bytevector-u8-ref bytevector (+ k 1))) + (b2 (bytevector-u8-ref bytevector (+ k 2))) + (b3 (bytevector-u8-ref bytevector (+ k 3)))) + (let ((sign (bytevector:div b0 128)) + (exponent (+ (* 2 (bytevector:mod b0 128)) + (bytevector:div b1 128))) + (fraction (+ (* 256 256 (bytevector:mod b1 128)) + (* 256 b2) + b3))) + (cond ((< 0 exponent bytevector:single-maxexponent) + ; normalized (the usual case) + (bytevector:normalized sign + (- exponent bytevector:single-bias) + (+ bytevector:single-hidden-bit fraction) + bytevector:single-hidden-bit)) + ((= 0 exponent) + (cond ((> fraction 0) + ; denormalized + (bytevector:normalized sign + (+ (- bytevector:single-bias) 1) + fraction + bytevector:single-hidden-bit)) + ((= sign 0) 0.0) + (else -0.0))) + ((= 0 fraction) + (if (= sign 0) +inf.0 -inf.0)) + (else + (if (= sign 0) +nan.0 +nan.0)))))) + +(define (bytevector-ieee-double-native-ref bytevector k) + (let ((b0 (bytevector-u8-ref bytevector k)) + (b1 (bytevector-u8-ref bytevector (+ k 1))) + (b2 (bytevector-u8-ref bytevector (+ k 2)))) + (let ((sign (bytevector:div b0 128)) + (exponent (+ (* 16 (bytevector:mod b0 128)) + (bytevector:div b1 16))) + (fraction (+ (* 281474976710656 (bytevector:mod b1 16)) + (bytevector-uint-ref bytevector (+ k 2) 'big 6)))) + (cond ((< 0 exponent bytevector:double-maxexponent) + ; normalized (the usual case) + (bytevector:normalized sign + (- exponent bytevector:double-bias) + (+ bytevector:double-hidden-bit fraction) + bytevector:double-hidden-bit)) + ((= 0 exponent) + (cond ((> fraction 0) + ; denormalized + (bytevector:normalized sign + (+ (- bytevector:double-bias) 1) + fraction + bytevector:double-hidden-bit)) + ((= sign 0) 0.0) + (else -0.0))) + ((= 0 fraction) + (if (= sign 0) +inf.0 -inf.0)) + (else + (if (= sign 0) +nan.0 +nan.0)))))) + +(define (bytevector-ieee-single-ref bytevector k endianness) + (if (eq? endianness 'big) + (if (= 0 (bytevector:mod k 4)) + (bytevector-ieee-single-native-ref bytevector k) + (let ((b (make-bytevector 4))) + (bytevector-copy! bytevector k b 0 4) + (bytevector-ieee-single-native-ref b 0))) + (let ((b (make-bytevector 4))) + (bytevector-u8-set! b 0 (bytevector-u8-ref bytevector (+ k 3))) + (bytevector-u8-set! b 1 (bytevector-u8-ref bytevector (+ k 2))) + (bytevector-u8-set! b 2 (bytevector-u8-ref bytevector (+ k 1))) + (bytevector-u8-set! b 3 (bytevector-u8-ref bytevector k)) + (bytevector-ieee-single-native-ref b 0)))) + +(define (bytevector-ieee-double-ref bytevector k endianness) + (if (eq? endianness 'big) + (if (= 0 (bytevector:mod k 8)) + (bytevector-ieee-double-native-ref bytevector k) + (let ((b (make-bytevector 8))) + (bytevector-copy! bytevector k b 0 8) + (bytevector-ieee-double-native-ref b 0))) + (let ((b (make-bytevector 8))) + (bytevector-u8-set! b 0 (bytevector-u8-ref bytevector (+ k 7))) + (bytevector-u8-set! b 1 (bytevector-u8-ref bytevector (+ k 6))) + (bytevector-u8-set! b 2 (bytevector-u8-ref bytevector (+ k 5))) + (bytevector-u8-set! b 3 (bytevector-u8-ref bytevector (+ k 4))) + (bytevector-u8-set! b 4 (bytevector-u8-ref bytevector (+ k 3))) + (bytevector-u8-set! b 5 (bytevector-u8-ref bytevector (+ k 2))) + (bytevector-u8-set! b 6 (bytevector-u8-ref bytevector (+ k 1))) + (bytevector-u8-set! b 7 (bytevector-u8-ref bytevector k)) + (bytevector-ieee-double-native-ref b 0)))) + +(define (bytevector-ieee-single-native-set! bytevector k x) + (call-with-values + (lambda () (bytevector:ieee-parts x bytevector:single-bias bytevector:single-hidden-bit)) + (lambda (sign biased-exponent frac) + (define (store! sign biased-exponent frac) + (bytevector-u8-set! bytevector k + (+ (* 128 sign) (bytevector:div biased-exponent 2))) + (bytevector-u8-set! bytevector (+ k 1) + (+ (* 128 (bytevector:mod biased-exponent 2)) + (bytevector:div frac (* 256 256)))) + (bytevector-u8-set! bytevector (+ k 2) + (bytevector:div (bytevector:mod frac (* 256 256)) 256)) + (bytevector-u8-set! bytevector (+ k 3) + (bytevector:mod frac 256)) + (unspecified)) + (cond ((= biased-exponent bytevector:single-maxexponent) + (store! sign biased-exponent frac)) + ((< frac bytevector:single-hidden-bit) + (store! sign 0 frac)) + (else + (store! sign biased-exponent (- frac bytevector:single-hidden-bit))))))) + +(define (bytevector-ieee-double-native-set! bytevector k x) + (call-with-values + (lambda () + (bytevector:ieee-parts x bytevector:double-bias + bytevector:double-hidden-bit)) + (lambda (sign biased-exponent frac) + (define (store! sign biased-exponent frac) + (bytevector-u8-set! bytevector k + (+ (* 128 sign) + (bytevector:div biased-exponent 16))) + (bytevector-u8-set! bytevector (+ k 1) + (+ (* 16 (bytevector:mod biased-exponent 16)) + (bytevector:div frac (* 65536 65536 65536)))) + (bytevector-u16-native-set! bytevector (+ k 2) + (bytevector:div (bytevector:mod frac (* 65536 65536 65536)) + (* 65536 65536))) + (bytevector-u32-native-set! bytevector (+ k 4) + (bytevector:mod frac (* 65536 65536))) + (unspecified)) + (cond ((= biased-exponent bytevector:double-maxexponent) + (store! sign biased-exponent frac)) + ((< frac bytevector:double-hidden-bit) + (store! sign 0 frac)) + (else + (store! sign biased-exponent (- frac bytevector:double-hidden-bit))))))) + +(define (bytevector-ieee-single-set! bytevector k x endianness) + (if (eq? endianness 'big) + (if (= 0 (bytevector:mod k 4)) + (bytevector-ieee-single-native-set! bytevector k x) + (let ((b (make-bytevector 4))) + (bytevector-ieee-single-native-set! b 0 x) + (bytevector-copy! b 0 bytevector k 4))) + (let ((b (make-bytevector 4))) + (bytevector-ieee-single-native-set! b 0 x) + (bytevector-u8-set! bytevector k (bytevector-u8-ref b 3)) + (bytevector-u8-set! bytevector (+ k 1) (bytevector-u8-ref b 2)) + (bytevector-u8-set! bytevector (+ k 2) (bytevector-u8-ref b 1)) + (bytevector-u8-set! bytevector (+ k 3) (bytevector-u8-ref b 0))))) + +(define (bytevector-ieee-double-set! bytevector k x endianness) + (if (eq? endianness 'big) + (if (= 0 (bytevector:mod k 8)) + (bytevector-ieee-double-native-set! bytevector k x) + (let ((b (make-bytevector 8))) + (bytevector-ieee-double-native-set! b 0 x) + (bytevector-copy! b 0 bytevector k 8))) + (let ((b (make-bytevector 8))) + (bytevector-ieee-double-native-set! b 0 x) + (bytevector-u8-set! bytevector k (bytevector-u8-ref b 7)) + (bytevector-u8-set! bytevector (+ k 1) (bytevector-u8-ref b 6)) + (bytevector-u8-set! bytevector (+ k 2) (bytevector-u8-ref b 5)) + (bytevector-u8-set! bytevector (+ k 3) (bytevector-u8-ref b 4)) + (bytevector-u8-set! bytevector (+ k 4) (bytevector-u8-ref b 3)) + (bytevector-u8-set! bytevector (+ k 5) (bytevector-u8-ref b 2)) + (bytevector-u8-set! bytevector (+ k 6) (bytevector-u8-ref b 1)) + (bytevector-u8-set! bytevector (+ k 7) (bytevector-u8-ref b 0))))) + +) addfile ./libs/rnrs/bytevectors/bytevector-proto.scm hunk ./libs/rnrs/bytevectors/bytevector-proto.scm 1 +; Bytevectors + +; Copyright (C) Michael Sperber (2005). All Rights Reserved. +; +; Permission is hereby granted, free of charge, to any person +; obtaining a copy of this software and associated documentation files +; (the "Software"), to deal in the Software without restriction, +; including without limitation the rights to use, copy, modify, merge, +; publish, distribute, sublicense, and/or sell copies of the Software, +; and to permit persons to whom the Software is furnished to do so, +; subject to the following conditions: +; +; The above copyright notice and this permission notice shall be +; included in all copies or substantial portions of the Software. +; +; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +; BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +; ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +; CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +; SOFTWARE. + +; Modified by William D Clinger, beginning 2 August 2006. +; +; Calls to many of these procedures should be compiled +; into a short sequence of machine instructions. +; Many of the definitions below could be made faster +; by inlining help procedures and unrolling loops, +; but that would not be as fast as generating machine +; code. +; +; This file defines all of the operations on bytes objects +; except for those defined in bytevector-core.sch and in +; bytevector-ieee.sch. +; +; ATS +; Modified with (core bytevectors) which is srfi-66 plus extensions + +(library (rnrs bytevectors proto) + (export endianness + + ; The next few exports come from bytevector-core. + + native-endianness + bytevector? make-bytevector bytevector-length + bytevector-u8-ref bytevector-s8-ref + bytevector-u8-set! bytevector-s8-set! + + ; The remaining exports are defined in this file. + + bytevector=? bytevector-fill! + bytevector-copy! bytevector-copy + bytevector->u8-list u8-list->bytevector + bytevector-uint-ref bytevector-sint-ref + bytevector-uint-set! bytevector-sint-set! + bytevector->uint-list bytevector->sint-list + uint-list->bytevector sint-list->bytevector + bytevector-u16-ref bytevector-s16-ref + bytevector-u16-set! bytevector-s16-set! + bytevector-u16-native-ref bytevector-s16-native-ref + bytevector-u16-native-set! bytevector-s16-native-set! + bytevector-u32-ref bytevector-s32-ref + bytevector-u32-set! bytevector-s32-set! + bytevector-u32-native-ref bytevector-s32-native-ref + bytevector-u32-native-set! bytevector-s32-native-set! + bytevector-u64-ref bytevector-s64-ref + bytevector-u64-set! bytevector-s64-set! + bytevector-u64-native-ref bytevector-s64-native-ref + bytevector-u64-native-set! bytevector-s64-native-set! +) + + (import (rnrs base) (rnrs control) (core bytevectors) (rnrs bytevectors core)) + +; Help procedures; not exported. + +(define (u8->s8 octet) + (if (> octet 127) + (- octet 256) + octet)) + +(define (s8->u8 val) + (if (negative? val) + (+ val 256) + val)) + +(define (make-uint-ref size) + (lambda (bytevector k endianness) + (bytevector-uint-ref bytevector k endianness size))) + +(define (make-sint-ref size) + (lambda (bytevector k endianness) + (bytevector-sint-ref bytevector k endianness size))) + +(define (make-uint-set! size) + (lambda (bytevector k n endianness) + (bytevector-uint-set! bytevector k n endianness size))) + +(define (make-sint-set! size) + (lambda (bytevector k n endianness) + (bytevector-sint-set! bytevector k n endianness size))) + +(define (make-ref/native base base-ref) + (lambda (bytevector index) + (ensure-aligned index base) + (base-ref bytevector index (native-endianness)))) + +(define (make-set!/native base base-set!) + (lambda (bytevector index val) + (ensure-aligned index base) + (base-set! bytevector index val (native-endianness)))) + +(define (ensure-aligned index base) + (if (not (zero? (bytevector:mod index base))) + (error #f "non-aligned bytevector access" index base))) + +(define (make-bytevector->int-list bytevector-ref) + (lambda (b endness size) + (let ((ref (lambda (i) (bytevector-ref b i endness size))) + (length (bytevector-length b))) + (let loop ((i 0) (r '())) + (if (>= i length) + (reverse r) + (loop (+ i size) + (cons (ref i) r))))))) + +(define (make-int-list->bytevector bytevector-set!) + (lambda (l endness size) + (let* ((bytevector (make-bytevector (* size (length l)))) + (setter! (lambda (i n) + (bytevector-set! bytevector i n endness size)))) + (let loop ((i 0) (l l)) + (if (null? l) + bytevector + (begin + (setter! i (car l)) + (loop (+ i size) (cdr l)))))))) + +; Exported syntax and procedures. + +(define-syntax endianness + (syntax-rules (little big) + ((endianness little) 'little) + ((endianness big) 'big))) + +(define (bytevector-s8-ref b k) + (u8->s8 (bytevector-u8-ref b k))) + +(define (bytevector-s8-set! b k val) + (bytevector-u8-set! b k (s8->u8 val))) + +(define (bytevector-uint-ref bytevector index endness size) + (case endness + ((big) + (do ((i 0 (+ i 1)) + (result 0 (+ (* 256 result) + (bytevector-u8-ref bytevector (+ index i))))) + ((>= i size) + result))) + ((little) + (do ((i (- size 1) (- i 1)) + (result 0 (+ (* 256 result) + (bytevector-u8-ref bytevector (+ index i))))) + ((< i 0) + result))) + (else + (error 'bytevector-uint-ref "Invalid endianness: " endness)))) + +(define (bytevector-sint-ref bytevector index endness size) + (let* ((high-byte (bytevector-u8-ref bytevector + (if (eq? endness 'big) + index + (+ index size -1)))) + (uresult (bytevector-uint-ref bytevector index endness size))) + (if (> high-byte 127) + (- uresult (expt 256 size)) + uresult))) + +; FIXME: Some of these procedures may not do enough range checking. + +(define (bytevector-uint-set! bytevector index val endness size) + (case endness + ((little) + (do ((i 0 (+ i 1)) + (val val (bytevector:div val 256))) + ((>= i size)) + (bytevector-u8-set! bytevector (+ index i) (bytevector:mod val 256)))) + ((big) + (do ((i (- size 1) (- i 1)) + (val val (bytevector:div val 256))) + ((< i 0)) + (bytevector-u8-set! bytevector (+ index i) (bytevector:mod val 256)))) + (else + (error 'bytevector-uint-set! "Invalid endianness: " endness)))) + +(define (bytevector-sint-set! bytevector index val endness size) + (let ((uval (if (< val 0) + (+ val (* 128 (expt 256 (- size 1)))) + val))) + (bytevector-uint-set! bytevector index uval endness size))) + +(define bytevector-u16-ref (make-uint-ref 2)) +(define bytevector-u16-set! (make-uint-set! 2)) +(define bytevector-s16-ref (make-sint-ref 2)) +(define bytevector-s16-set! (make-sint-set! 2)) +(define bytevector-u16-native-ref (make-ref/native 2 bytevector-u16-ref)) +(define bytevector-u16-native-set! (make-set!/native 2 bytevector-u16-set!)) +(define bytevector-s16-native-ref (make-ref/native 2 bytevector-s16-ref)) +(define bytevector-s16-native-set! (make-set!/native 2 bytevector-s16-set!)) + +(define bytevector-u32-ref (make-uint-ref 4)) +(define bytevector-u32-set! (make-uint-set! 4)) +(define bytevector-s32-ref (make-sint-ref 4)) +(define bytevector-s32-set! (make-sint-set! 4)) +(define bytevector-u32-native-ref (make-ref/native 4 bytevector-u32-ref)) +(define bytevector-u32-native-set! (make-set!/native 4 bytevector-u32-set!)) +(define bytevector-s32-native-ref (make-ref/native 4 bytevector-s32-ref)) +(define bytevector-s32-native-set! (make-set!/native 4 bytevector-s32-set!)) + +(define bytevector-u64-ref (make-uint-ref 8)) +(define bytevector-u64-set! (make-uint-set! 8)) +(define bytevector-s64-ref (make-sint-ref 8)) +(define bytevector-s64-set! (make-sint-set! 8)) +(define bytevector-u64-native-ref (make-ref/native 8 bytevector-u64-ref)) +(define bytevector-u64-native-set! (make-set!/native 8 bytevector-u64-set!)) +(define bytevector-s64-native-ref (make-ref/native 8 bytevector-s64-ref)) +(define bytevector-s64-native-set! (make-set!/native 8 bytevector-s64-set!)) + +(define bytevector=? u8vector=?) +(define bytevector-fill! u8vector-fill!) +(define bytevector-copy! u8vector-copy!) +(define bytevector-copy u8vector-copy) +(define bytevector->u8-list u8vector->list) + +(define (bytevector->s8-list b) + (let ((n (bytevector-length b))) + (do ((i (- n 1) (- i 1)) + (result '() (cons (bytevector-s8-ref b i) result))) + ((< i 0) + result)))) + +(define u8-list->bytevector list->u8vector) + +(define (s8-list->bytevector l) + (let* ((n (length l)) + (b (make-bytevector n))) + (do ((vals l (cdr vals)) + (i 0 (+ i 1))) + ((null? vals)) + (bytevector-s8-set! b i (car vals))) + b)) + +(define bytevector->uint-list (make-bytevector->int-list bytevector-uint-ref)) +(define bytevector->sint-list (make-bytevector->int-list bytevector-sint-ref)) + +(define uint-list->bytevector (make-int-list->bytevector bytevector-uint-set!)) +(define sint-list->bytevector (make-int-list->bytevector bytevector-sint-set!)) + + +) addfile ./libs/rnrs/bytevectors/bytevector-string.scm hunk ./libs/rnrs/bytevectors/bytevector-string.scm 1 +; Copyright 2007 William D Clinger. +; +; Permission to copy this software, in whole or in part, to use this +; software for any lawful purpose, and to redistribute this software +; is granted subject to the restriction that all copies made of this +; software must include this copyright notice in full. +; +; I also request that you send me a copy of any improvements that you +; make to this software so that they may be incorporated within it to +; the benefit of the Scheme community. +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; (bytevector-string) +; +; This file defines the operations of (r6rs bytevector) that +; convert strings to bytevectors and vice versa. +; +; To simplify bootstrapping, to improve efficiency, and to +; avoid some potential problems with byte order marks, this +; implementation does not rely on (r6rs i/o ports). +; +; This code often uses bytevector-u8-X operations in place of +; bytevector-u16-X operations with an endianness argument, on +; the possibly incorrect theory that adding an adjustment is +; faster than dispatching on a symbol. + +(library (rnrs bytevectors string) + + (export string->utf8 string->utf16 string->utf32 + utf8->string utf16->string utf32->string) + + (import (rnrs base) (rnrs mutable-strings) (rnrs control) (rnrs arithmetic bitwise) (rnrs bytevectors core) (rnrs bytevectors proto) (only (r5rs) quotient remainder)) + +(define (string->utf8 string) + (let* ((n (string-length string)) + (k (do ((i 0 (+ i 1)) + (k 0 (+ k (let ((sv (char->integer (string-ref string i)))) + (cond ((<= sv #x007f) 1) + ((<= sv #x07ff) 2) + ((<= sv #xffff) 3) + (else 4)))))) + ((= i n) k))) + (bv (make-bytevector k))) + (define (loop i j) + (if (= i n) + bv + (let ((sv (char->integer (string-ref string i)))) + (cond ((<= sv #x007f) + (bytevector-u8-set! bv j sv) + (loop (+ i 1) (+ j 1))) + ((<= sv #x07ff) + (let ((u0 (bitwise-ior #b11000000 + (bitwise-bit-field sv 6 11))) + (u1 (bitwise-ior #b10000000 + (bitwise-bit-field sv 0 6)))) + (bytevector-u8-set! bv j u0) + (bytevector-u8-set! bv (+ j 1) u1) + (loop (+ i 1) (+ j 2)))) + ((<= sv #xffff) + (let ((u0 (bitwise-ior #b11100000 + (bitwise-bit-field sv 12 16))) + (u1 (bitwise-ior #b10000000 + (bitwise-bit-field sv 6 12))) + (u2 (bitwise-ior #b10000000 + (bitwise-bit-field sv 0 6)))) + (bytevector-u8-set! bv j u0) + (bytevector-u8-set! bv (+ j 1) u1) + (bytevector-u8-set! bv (+ j 2) u2) + (loop (+ i 1) (+ j 3)))) + (else + (let ((u0 (bitwise-ior #b11110000 + (bitwise-bit-field sv 18 21))) + (u1 (bitwise-ior #b10000000 + (bitwise-bit-field sv 12 18))) + (u2 (bitwise-ior #b10000000 + (bitwise-bit-field sv 6 12))) + (u3 (bitwise-ior #b10000000 + (bitwise-bit-field sv 0 6)))) + (bytevector-u8-set! bv j u0) + (bytevector-u8-set! bv (+ j 1) u1) + (bytevector-u8-set! bv (+ j 2) u2) + (bytevector-u8-set! bv (+ j 3) u3) + (loop (+ i 1) (+ j 4)))))))) + (loop 0 0))) + +; Given a bytevector containing the UTF-8 encoding +; of a string, decodes and returns a newly allocated +; string (unless empty). +; +; If the bytevector begins with the three-byte sequence +; #xef #xbb #xbf, then those bytes are ignored. (They +; are conventionally used as a signature to indicate +; UTF-8 encoding. The string->utf8 procedure does not +; emit those bytes, but UTF-8 encodings produced by +; other sources may contain them.) +; +; The main difficulty is that Unicode Corrigendum #1 +; ( http://unicode.org/versions/corrigendum1.html ) +; forbids interpretation of illegal code unit sequences, +; which include non-shortest forms. A UTF-8 decoder +; must therefore detect non-shortest forms and treat +; them as errors. +; +; Another difficulty is that the specification of this +; particular decoder says it will replace an illegal +; code unit sequence by a replacement character, but +; does not fully specify the recovery process, which +; affects the number of replacement characters that +; will appear in the result. +; +; Ignoring the special treatment of a ZERO WIDTH +; NO-BREAK SPACE at the beginning of a bytevector, +; the decoding is implemented by the following +; state machine. q0 is the start state and the +; only state in which no more input is acceptable. +; +; q0 --- dispatching on the first byte of a character +; Dispatch on the next byte according to Table 3.1B +; of Corrigendum #1. Note that there are two error +; ranges not shown in that table, for a total of 9. +; The 00..7f, 80..c1, and f5..ff ranges remain in +; state q0. 00..7f is an Ascii character; the other +; two ranges that remain in state q0 are illegal. +; +; q1 --- expecting one more byte in range 80..bf +; +; q2 --- expecting two more bytes, the first in range lower..bf +; +; q3 --- expecting three more bytes, the first in range lower..upper + +(define (utf8->string bv) + (let* ((n (bytevector-length bv)) + (replacement-character (integer->char #xfffd)) + (bits->char (lambda (bits) + (cond ((<= 0 bits #xd7ff) + (integer->char bits)) + ((<= #xe000 bits #x10ffff) + (integer->char bits)) + (else + replacement-character)))) + (begins-with-bom? + (and (<= 3 n) + (= #xef (bytevector-u8-ref bv 0)) + (= #xbb (bytevector-u8-ref bv 1)) + (= #xbf (bytevector-u8-ref bv 2))))) + + (define (result-length) + ; i is index of the next byte + ; k is the number of characters encoded by bytes 0 through i-1 + (define (q0 i k) + (if (= i n) + k + (let ((unit (bytevector-u8-ref bv i)) + (i1 (+ i 1)) + (k1 (+ k 1))) + (cond ((<= unit #x7f) + (q0 i1 k1)) + ((<= unit #xc1) + ; illegal + (q0 i1 k1)) + ((<= unit #xdf) + (q1 i1 k1)) + ((<= unit #xe0) + (q2 i1 k1 #xa0)) + ((<= unit #xef) + (q2 i1 k1 #x80)) + ((<= unit #xf0) + (q3 i1 k1 #x90 #xbf)) + ((<= unit #xf3) + (q3 i1 k1 #x80 #xbf)) + ((<= unit #xf4) + (q3 i1 k1 #x80 #x8f)) + (else + ; illegal + (q0 i1 k1)))))) + (define (q1 i k) + (if (= i n) + k + (let ((unit (bytevector-u8-ref bv i)) + (i1 (+ i 1))) + (cond ((< unit #x80) + ; illegal + (q0 i k)) + ((<= unit #xbf) + (q0 i1 k)) + (else + ; illegal + (q0 i k)))))) + (define (q2 i k lower) + (if (= i n) + k + (let ((unit (bytevector-u8-ref bv i)) + (i1 (+ i 1))) + (cond ((< unit lower) + ; illegal + (q0 i k)) + ((<= unit #xbf) + (q1 i1 k)) + (else + ; illegal + (q0 i k)))))) + (define (q3 i k lower upper) + (if (= i n) + k + (let ((unit (bytevector-u8-ref bv i)) + (i1 (+ i 1))) + (cond ((< unit lower) + ; illegal + (q0 i k)) + ((<= unit upper) + (q2 i1 k #x80)) + (else + ; illegal + (q0 i k)))))) + (if begins-with-bom? + (q0 3 0) + (q0 0 0))) + + (let* ((k (result-length)) + (s (make-string k))) + + ; i is index of the next byte in bv + ; k is index of the next character in s + + (define (q0 i k) + (if (< i n) + (let ((unit (bytevector-u8-ref bv i)) + (i1 (+ i 1)) + (k1 (+ k 1))) + (cond ((<= unit #x7f) + (string-set! s k (integer->char unit)) + (q0 i1 k1)) + ((<= unit #xc1) + ; illegal + (string-set! s k replacement-character) + (q0 i1 k1)) + ((<= unit #xdf) + (q1 i1 k (bitwise-and unit #x1f))) + ((<= unit #xe0) + (q2 i1 k #xa0 0)) + ((<= unit #xef) + (q2 i1 k #x80 (bitwise-and unit #x0f))) + ((<= unit #xf0) + (q3 i1 k #x90 #xbf 0)) + ((<= unit #xf3) + (q3 i1 k #x80 #xbf (bitwise-and unit #x07))) + ((<= unit #xf4) + (q3 i1 k #x80 #x8f (bitwise-and unit #x07))) + (else + ; illegal + (string-set! s k replacement-character) + (q0 i1 k1)))))) + (define (q1 i k bits) + (if (= i n) + (string-set! s k replacement-character) + (let ((unit (bytevector-u8-ref bv i)) + (i1 (+ i 1)) + (k1 (+ k 1))) + (cond ((< unit #x80) + ; illegal + (string-set! s k replacement-character) + (q0 i k1)) + ((<= unit #xbf) + (string-set! s k (bits->char + (bitwise-ior + (bitwise-arithmetic-shift-left bits 6) + (bitwise-and unit #x3f)))) + (q0 i1 k1)) + (else + ; illegal + (string-set! s k replacement-character) + (q0 i k1)))))) + (define (q2 i k lower bits) + (if (= i n) + (string-set! s k replacement-character) + (let ((unit (bytevector-u8-ref bv i)) + (i1 (+ i 1))) + (cond ((< unit lower) + ; illegal + (string-set! s k replacement-character) + (q0 i (+ k 1))) + ((<= unit #x00bf) + (q1 i1 k (bitwise-ior + (bitwise-arithmetic-shift-left bits 6) + (bitwise-and unit #x3f)))) + (else + ; illegal + (string-set! s k replacement-character) + (q0 i (+ k 1))))))) + (define (q3 i k lower upper bits) + (if (= i n) + (string-set! s k replacement-character) + (let ((unit (bytevector-u8-ref bv i)) + (i1 (+ i 1))) + (cond ((< unit lower) + ; illegal + (string-set! s k replacement-character) + (q0 i (+ k 1))) + ((<= unit upper) + (q2 i1 k #x80 (bitwise-ior + (bitwise-arithmetic-shift-left bits 6) + (bitwise-and unit #x3f)))) + (else + ; illegal + (string-set! s k replacement-character) + (q0 i (+ k 1))))))) + (if begins-with-bom? + (q0 3 0) + (q0 0 0)) + s))) + +; (utf-32-codec) might write a byte order mark, +; so it's better not to use textual i/o for this. + +(define (string->utf16 string . rest) + (let* ((endianness (cond ((null? rest) 'big) + ((not (null? (cdr rest))) + (apply assertion-violation 'string->utf16 + "too many arguments" string rest)) + ((eq? (car rest) 'big) 'big) + ((eq? (car rest) 'little) 'little) + (else (endianness-violation + 'string->utf16 + (car rest))))) + + ; endianness-dependent adjustments to indexing + + (hi (if (eq? 'big endianness) 0 1)) + (lo (- 1 hi)) + + (n (string-length string))) + + (define (result-length) + (do ((i 0 (+ i 1)) + (k 0 (let ((sv (char->integer (string-ref string i)))) + (if (< sv #x10000) (+ k 2) (+ k 4))))) + ((= i n) k))) + + (let ((bv (make-bytevector (result-length)))) + + (define (loop i k) + (if (< i n) + (let ((sv (char->integer (string-ref string i)))) + (if (< sv #x10000) + (let ((hibits (bitwise-bit-field sv 8 16)) + (lobits (bitwise-bit-field sv 0 8))) + (bytevector-u8-set! bv (+ k hi) hibits) + (bytevector-u8-set! bv (+ k lo) lobits) + (loop (+ i 1) (+ k 2))) + (let* ((x (- sv #x10000)) + (hibits (bitwise-bit-field x 10 20)) + (lobits (bitwise-bit-field x 0 10)) + (hi16 (bitwise-ior #xd800 hibits)) + (lo16 (bitwise-ior #xdc00 lobits)) + (hi1 (bitwise-bit-field hi16 8 16)) + (lo1 (bitwise-bit-field hi16 0 8)) + (hi2 (bitwise-bit-field lo16 8 16)) + (lo2 (bitwise-bit-field lo16 0 8))) + (bytevector-u8-set! bv (+ k hi) hi1) + (bytevector-u8-set! bv (+ k lo) lo1) + (bytevector-u8-set! bv (+ k hi 2) hi2) + (bytevector-u8-set! bv (+ k lo 2) lo2) + (loop (+ i 1) (+ k 4))))))) + + (loop 0 0) + bv))) + +(define (utf16->string bytevector . rest) + (let* ((n (bytevector-length bytevector)) + + (begins-with-bom? + (and (null? rest) + (<= 2 n) + (let ((b0 (bytevector-u8-ref bytevector 0)) + (b1 (bytevector-u8-ref bytevector 1))) + (or (and (= b0 #xfe) (= b1 #xff) 'big) + (and (= b0 #xff) (= b1 #xfe) 'little))))) + + (endianness (cond ((null? rest) (or begins-with-bom? 'big)) + ((eq? (car rest) 'big) 'big) + ((eq? (car rest) 'little) 'little) + (else (endianness-violation + 'utf16->string + (car rest))))) + + ; endianness-dependent adjustments to indexing + + (hi (if (eq? 'big endianness) 0 1)) + (lo (- 1 hi)) + + (replacement-character (integer->char #xfffd))) + + ; computes the length of the encoded string + + (define (result-length) + (define (loop i k) + (if (>= i n) + k + (let ((octet (bytevector-u8-ref bytevector i))) + (cond ((< octet #xd8) + (loop (+ i 2) (+ k 1))) + ((< octet #xdc) + (let* ((i2 (+ i 2)) + (octet2 (if (< i2 n) + (bytevector-u8-ref bytevector i2) + 0))) + (if (<= #xdc octet2 #xdf) + (loop (+ i 4) (+ k 1)) + ; bad surrogate pair, becomes replacement character + (loop i2 (+ k 1))))) + (else (loop (+ i 2) (+ k 1))))))) + (if begins-with-bom? + (loop (+ hi 2) 0) + (loop hi 0))) + + (if (odd? n) + (assertion-violation 'utf16->string + "bytevector has odd length" bytevector)) + + (let ((s (make-string (result-length)))) + (define (loop i k) + (if (< i n) + (let ((hibits (bytevector-u8-ref bytevector (+ i hi))) + (lobits (bytevector-u8-ref bytevector (+ i lo)))) + (cond ((< hibits #xd8) + (let ((c (integer->char + (bitwise-ior + (bitwise-arithmetic-shift-left hibits 8) + lobits)))) + (string-set! s k c)) + (loop (+ i 2) (+ k 1))) + ((< hibits #xdc) + (let* ((i2 (+ i hi 2)) + (i3 (+ i lo 2)) + (octet2 (if (< i2 n) + (bytevector-u8-ref bytevector i2) + 0)) + (octet3 (if (< i2 n) + (bytevector-u8-ref bytevector i3) + 0))) + (if (<= #xdc octet2 #xdf) + (let* ((sv (+ #x10000 + (bitwise-arithmetic-shift-left + (bitwise-and + (bitwise-ior + (bitwise-arithmetic-shift-left + hibits 8) + lobits) + #x03ff) + 10) + (bitwise-and + (bitwise-ior + (bitwise-arithmetic-shift-left + octet2 8) + octet3) + #x03ff))) + (c (if (<= #x10000 sv #x10ffff) + (integer->char sv) + replacement-character))) + (string-set! s k c) + (loop (+ i 4) (+ k 1))) + ; bad surrogate pair + (begin (string-set! s k replacement-character) + (loop (+ i 2) (+ k 1)))))) + ((< hibits #xe0) + ; second surrogate not preceded by a first surrogate + (string-set! s k replacement-character) + (loop (+ i 2) (+ k 1))) + (else + (let ((c (integer->char + (bitwise-ior + (bitwise-arithmetic-shift-left hibits 8) + lobits)))) + (string-set! s k c)) + (loop (+ i 2) (+ k 1))))))) + (if begins-with-bom? + (loop 2 0) + (loop 0 0)) + s))) + +; There is no utf-32-codec, so we can't use textual i/o for this. + +(define (string->utf32 string . rest) + (let* ((endianness (cond ((null? rest) 'big) + ((eq? (car rest) 'big) 'big) + ((eq? (car rest) 'little) 'little) + (else (endianness-violation + 'string->utf32 + (car rest))))) + (n (string-length string)) + (result (make-bytevector (* 4 n)))) + (do ((i 0 (+ i 1))) + ((= i n) result) + (bytevector-u32-set! result + (* 4 i) + (char->integer (string-ref string i)) + endianness)))) + +; There is no utf-32-codec, so we can't use textual i/o for this. + +(define (utf32->string bytevector . rest) + (let* ((n (bytevector-length bytevector)) + + (begins-with-bom? + (and (null? rest) + (<= 4 n) + (let ((b0 (bytevector-u8-ref bytevector 0)) + (b1 (bytevector-u8-ref bytevector 1)) + (b2 (bytevector-u8-ref bytevector 2)) + (b3 (bytevector-u8-ref bytevector 3))) + (or (and (= b0 0) (= b1 0) (= b2 #xfe) (= b3 #xff) + 'big) + (and (= b0 #xff) (= b1 #xfe) (= b2 0) (= b3 0) + 'little))))) + + (endianness (cond ((null? rest) (or begins-with-bom? 'big)) + ((eq? (car rest) 'big) 'big) + ((eq? (car rest) 'little) 'little) + (else (endianness-violation + 'string->utf32 + (car rest))))) + + (i0 (if begins-with-bom? 4 0)) + + (result (if (zero? (remainder n 4)) + (make-string (quotient (- n i0) 4)) + (assertion-violation + 'utf32->string + "Bytevector has bad length." bytevector)))) + + (do ((i i0 (+ i 4)) + (j 0 (+ j 1))) + ((= i n) result) + (let* ((sv (bytevector-u32-ref bytevector i endianness)) + (sv (cond ((< sv #xd800) sv) + ((< sv #xe000) #xfffd) ; replacement character + ((< sv #x110000) sv) + (else #xfffd))) ; replacement character + (c (integer->char sv))) + (string-set! result j c))))) + +(define (endianness-violation who what) + (assertion-violation who "bad endianness" what)) + +) addfile ./libs/rnrs/bytevectors/bytevector.scm hunk ./libs/rnrs/bytevectors/bytevector.scm 1 +; Copyright 2006 William D Clinger. +; +; Permission to copy this software, in whole or in part, to use this +; software for any lawful purpose, and to redistribute this software +; is granted subject to the restriction that all copies made of this +; software must include this copyright notice in full. +; +; I also request that you send me a copy of any improvements that you +; make to this software so that they may be incorporated within it to +; the benefit of the Scheme community. +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; (r6rs bytevector) +; +; Bytevectors + +(library (rnrs bytevectors) + + (export endianness native-endianness + bytevector? make-bytevector bytevector-length + bytevector-u8-ref bytevector-s8-ref + bytevector-u8-set! bytevector-s8-set! + bytevector-uint-ref bytevector-sint-ref + bytevector-uint-set! bytevector-sint-set! + bytevector-u16-ref bytevector-s16-ref + bytevector-u16-set! bytevector-s16-set! + bytevector-u16-native-ref bytevector-s16-native-ref + bytevector-u16-native-set! bytevector-s16-native-set! + bytevector-u32-ref bytevector-s32-ref + bytevector-u32-set! bytevector-s32-set! + bytevector-u32-native-ref bytevector-s32-native-ref + bytevector-u32-native-set! bytevector-s32-native-set! + bytevector-u64-ref bytevector-s64-ref + bytevector-u64-set! bytevector-s64-set! + bytevector-u64-native-ref bytevector-s64-native-ref + bytevector-u64-native-set! bytevector-s64-native-set! + bytevector=? + bytevector-ieee-single-native-ref bytevector-ieee-single-ref + bytevector-ieee-double-native-ref bytevector-ieee-double-ref + bytevector-ieee-single-native-set! bytevector-ieee-single-set! + bytevector-ieee-double-native-set! bytevector-ieee-double-set! + bytevector-copy! bytevector-copy + bytevector->u8-list u8-list->bytevector + bytevector->uint-list bytevector->sint-list + uint-list->bytevector sint-list->bytevector + + utf8->string utf16->string utf32->string + string->utf8 string->utf16 string->utf32) + + (import (rnrs base) + (rnrs bytevectors core) (rnrs bytevectors proto) (rnrs bytevectors ieee) (rnrs bytevectors string) + +)) addfile ./libs/srfi/srfi-66.scm hunk ./libs/srfi/srfi-66.scm 1 +(library (srfi :66) + (export u8vector? make-u8vector u8vector u8vector->list list->u8vector u8vector-length u8vector-ref u8vector-set! u8vector=? u8vector-compare u8vector-copy! u8vector-copy) + (import (except (core bytevectors) native-endianness))) + +(library (srfi :66 octet-vectors) + (export u8vector? make-u8vector u8vector u8vector->list list->u8vector u8vector-length u8vector-ref u8vector-set! u8vector=? u8vector-compare u8vector-copy! u8vector-copy) + (import (srfi :66))) + addfile ./srfi-66.scm hunk ./srfi-66.scm 1 +;; Implementation of srfi-66 +;; Since gambit provides most of the functions, only providing what is not allready done. r6gambit expects all of these. There is an extension function (native-endiannes) which should provide the endianness of the platform + +;(u8vector? obj) +;(make-u8vector k fill) +;(u8vector octet ...) +;(u8vector->list u8vector) +;(list->u8vector octets) +;(u8vector-length u8vector) +;(u8vector-ref u8vector k) +;(u8vector-set! u8vector k octet) +;(u8vector-copy u8vector) + +(define (u8vector-compare u8vector-1 u8vector-2) + (cond + ((< (u8vector-length u8vector-1) (u8vector-length u8vector-2)) -1) + ((> (u8vector-length u8vector-1) (u8vector-length u8vector-2)) 1) + (else (let compare ((i 0)) + (cond + ((= i (u8vector-length u8vector-1)) 0) + ((< (u8vector-ref u8vector-1 i) (u8vector-ref u8vector-2 i)) -1) + ((> (u8vector-ref u8vector-1 i) (u8vector-ref u8vector-2 i)) 1) + (else (loop (+ i 1)))))))) + +(define (u8vector=? u8vector-1 u8vector-2) (= (u8vector-compare u8vector-1 u8vector-2) 0)) + +;Perhaps a more native version is more efficient? +(define (u8vector-copy! source source-start target target-start n) + (if (>= source-start target-start) + (do ((i 0 (+ i 1))) + ((>= i count)) + (u8vector-set! target + (+ target-start i) + (u8vector-ref source (+ source-start i)))) + (do ((i (- count 1) (- i 1))) + ((< i 0)) + (u8vector-set! target + (+ target-start i) + (u8vector-ref source (+ source-start i)))))) + + +(define native-endianness + (let ((ne (c-lambda () bool " + #ifdef ___BIG_ENDIAN + ___result=1; + #else + ___result=0; + #endif"))) + + (lambda () (if (ne) 'big 'little)))) + + +