--- blas-1.1.orig/src/crotg.f +++ blas-1.1/src/crotg.f @@ -1,18 +1,18 @@ subroutine crotg(ca,cb,c,s) complex ca,cb,s real c - real norm,scale + real norm,scale,cabs2 complex alpha - if (cabs(ca) .ne. 0.) go to 10 + if (cabs2(ca) .ne. 0.) go to 10 c = 0. s = (1.,0.) ca = cb go to 20 10 continue - scale = cabs(ca) + cabs(cb) - norm = scale * sqrt((cabs(ca/scale))**2 + (cabs(cb/scale))**2) - alpha = ca /cabs(ca) - c = cabs(ca) / norm + scale = cabs2(ca) + cabs2(cb) + norm = scale*sqrt((cabs2(ca/scale))**2 + (cabs2(cb/scale))**2) + alpha = ca /cabs2(ca) + c = cabs2(ca) / norm s = alpha * conjg(cb) / norm ca = alpha * norm 20 continue --- blas-1.1.orig/src/icamax.f +++ blas-1.1/src/icamax.f @@ -9,7 +9,7 @@ real smax integer i,incx,ix,n complex zdum - real cabs1 + real volatile cabs1 cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum)) c icamax = 0 --- blas-1.1.orig/src/izamax.f +++ blas-1.1/src/izamax.f @@ -8,7 +8,7 @@ double complex zx(*) double precision smax integer i,incx,ix,n - double precision dcabs1 + double precision volatile dcabs1 c izamax = 0 if( n.lt.1 .or. incx.le.0 )return --- blas-1.1.orig/src/zrotg.f +++ blas-1.1/src/zrotg.f @@ -1,19 +1,19 @@ subroutine zrotg(ca,cb,c,s) double complex ca,cb,s double precision c - double precision norm,scale + double precision norm,scale,zabs2 double complex alpha - if (cdabs(ca) .ne. 0.0d0) go to 10 + if (zabs2(ca) .ne. 0.0d0) go to 10 c = 0.0d0 s = (1.0d0,0.0d0) ca = cb go to 20 10 continue - scale = cdabs(ca) + cdabs(cb) - norm = scale*dsqrt((cdabs(ca/dcmplx(scale,0.0d0)))**2 + - * (cdabs(cb/dcmplx(scale,0.0d0)))**2) - alpha = ca /cdabs(ca) - c = cdabs(ca) / norm + scale = zabs2(ca) + zabs2(cb) + norm = scale*dsqrt((zabs2(ca/dcmplx(scale,0.0d0)))**2 + + * (zabs2(cb/dcmplx(scale,0.0d0)))**2) + alpha = ca /zabs2(ca) + c = zabs2(ca) / norm s = alpha * dconjg(cb) / norm ca = alpha * norm 20 continue --- blas-1.1.orig/src/c_abs.f +++ blas-1.1/src/c_abs.f @@ -0,0 +1,8 @@ + real function cabs2(z) + complex z,zz + real t(2) + equivalence (zz,t(1)) + zz = z + cabs2 = sqrt( t(1)*t(1)+t(2)*t(2) ) + return + end --- blas-1.1.orig/src/z_abs.f +++ blas-1.1/src/z_abs.f @@ -0,0 +1,8 @@ + double precision function zabs2(z) + double complex z,zz + double precision t(2) + equivalence (zz,t(1)) + zz = z + zabs2 = sqrt( t(1)*t(1)+t(2)*t(2) ) + return + end --- blas-1.1.orig/doc/faq.html +++ blas-1.1/doc/faq.html @@ -12,7 +12,7 @@

-

+

Many thanks to the @@ -22,7 +22,7 @@

-

+

Table of Contents @@ -42,7 +42,7 @@

-

+

1) BLAS

@@ -278,7 +278,7 @@ for your machine using ATLAS,
http://www.netlib.org/atlas/
.

-

+

lapack@cs.utk.edu
--- blas-1.1.orig/man/manl/cscal.l +++ blas-1.1/man/manl/cscal.l @@ -1,5 +1,6 @@ +.TH CSCAL l "16 October 1992" "BLAS routine" "BLAS routine" .SH NAME - +CSCAL - scales a vector by a constant. .SH SYNOPSIS .TP 31 subroutine cscal(n,ca,cx,incx) --- blas-1.1.orig/man/manl/dcabs1.l +++ blas-1.1/man/manl/dcabs1.l @@ -1,5 +1,6 @@ +.TH DCABS1 l "16 October 1992" "BLAS routine" "BLAS routine" .SH NAME - +DCABS1 - complex double precision absolute value .SH SYNOPSIS .TP 17 double precision --- blas-1.1.orig/man/manl/zrotg.l +++ blas-1.1/man/manl/zrotg.l @@ -1,5 +1,6 @@ +.TH ZROTG l "16 October 1992" "BLAS routine" "BLAS routine" .SH NAME - +ZROTG - construct givens plane rotation .SH SYNOPSIS .TP 28 subroutine zrotg(ca,cb,c,s) --- blas-1.1.orig/man/manl/zscal.l +++ blas-1.1/man/manl/zscal.l @@ -1,5 +1,6 @@ +.TH ZSCAL l "16 October 1992" "BLAS routine" "BLAS routine" .SH NAME - +ZSCAL - scales a vector by a constant. .SH SYNOPSIS .TP 31 subroutine zscal(n,za,zx,incx) --- blas-1.1.orig/cblas/src/cblas.h +++ blas-1.1/cblas/src/cblas.h @@ -2,6 +2,12 @@ #define CBLAS_H #include +/* Allow the use in C++ code. */ +#ifdef __cplusplus +extern "C" +{ +#endif + /* * Enumerated and derived types */ @@ -564,4 +570,9 @@ void *C, const int ldc); void cblas_xerbla(int p, const char *rout, const char *form, ...); + +#ifdef __cplusplus +} +#endif + #endif --- blas-1.1.orig/cblas/testing/c_cblas1.c +++ blas-1.1/cblas/testing/c_cblas1.c @@ -62,12 +62,12 @@ return cblas_icamax(*N, X, *incX); } -float F77_scnrm2(const int *N, const void *X, const int *incX) +double F77_scnrm2(const int *N, const void *X, const int *incX) { return cblas_scnrm2(*N, X, *incX); } -float F77_scasum(const int *N, void *X, const int *incX) +double F77_scasum(const int *N, void *X, const int *incX) { return cblas_scasum(*N, X, *incX); } --- blas-1.1.orig/cblas/testing/c_cblat3.f +++ blas-1.1/cblas/testing/c_cblat3.f @@ -1365,7 +1365,7 @@ * 150 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME - CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, + CALL CPRCN3( NOUT, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, $ M, N, ALPHA, LDA, LDB) * 160 CONTINUE --- blas-1.1.orig/cblas/testing/c_dblat3.f +++ blas-1.1/cblas/testing/c_dblat3.f @@ -1335,7 +1335,7 @@ * 150 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME - CALL DPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, + CALL DPRCN3( NOUT, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, $ M, N, ALPHA, LDA, LDB) * 160 CONTINUE --- blas-1.1.orig/cblas/testing/c_sblas1.c +++ blas-1.1/cblas/testing/c_sblas1.c @@ -8,7 +8,7 @@ */ #include "cblas_test.h" #include "cblas.h" -float F77_sasum(const int *N, float *X, const int *incX) +double F77_sasum(const int *N, float *X, const int *incX) { return cblas_sasum(*N, X, *incX); } @@ -20,12 +20,12 @@ return; } -float F77_scasum(const int *N, void *X, const int *incX) +double F77_scasum(const int *N, void *X, const int *incX) { return cblas_scasum(*N, X, *incX); } -float F77_scnrm2(const int *N, const void *X, const int *incX) +double F77_scnrm2(const int *N, const void *X, const int *incX) { return cblas_scnrm2(*N, X, *incX); } @@ -37,13 +37,13 @@ return; } -float F77_sdot(const int *N, const float *X, const int *incX, +double F77_sdot(const int *N, const float *X, const int *incX, const float *Y, const int *incY) { return cblas_sdot(*N, X, *incX, Y, *incY); } -float F77_snrm2(const int *N, const float *X, const int *incX) +double F77_snrm2(const int *N, const float *X, const int *incX) { return cblas_snrm2(*N, X, *incX); } --- blas-1.1.orig/cblas/testing/c_sblat3.f +++ blas-1.1/cblas/testing/c_sblat3.f @@ -1339,7 +1339,7 @@ * 150 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME - CALL SPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, + CALL SPRCN3( NOUT, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, $ M, N, ALPHA, LDA, LDB) * 160 CONTINUE --- blas-1.1.orig/cblas/testing/c_zblat2.f +++ blas-1.1/cblas/testing/c_zblat2.f @@ -1350,7 +1350,7 @@ * * Call the subroutine. * - IF( SNAME( 4: 5 ).EQ.'mv' )THEN + IF( SNAME( 10: 11 ).EQ.'mv' )THEN IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, @@ -1376,7 +1376,7 @@ CALL CZTPMV( IORDER, UPLO, TRANS, DIAG, $ N, AA, XX, INCX ) END IF - ELSE IF( SNAME( 4: 5 ).EQ.'sv' )THEN + ELSE IF( SNAME( 10: 11 ).EQ.'sv' )THEN IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, @@ -1465,7 +1465,7 @@ END IF * IF( .NOT.NULL )THEN - IF( SNAME( 4: 5 ).EQ.'mv' )THEN + IF( SNAME( 10: 11 ).EQ.'mv' )THEN * * Check the result. * @@ -1473,7 +1473,7 @@ $ INCX, ZERO, Z, INCX, XT, G, $ XX, EPS, ERR, FATAL, NOUT, $ .TRUE. ) - ELSE IF( SNAME( 4: 5 ).EQ.'sv' )THEN + ELSE IF( SNAME( 10: 11 ).EQ.'sv' )THEN * * Compute approximation to original vector. * @@ -1611,7 +1611,7 @@ * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK * .. Executable Statements .. - CONJ = SNAME( 5: 5 ).EQ.'c' + CONJ = SNAME( 11: 11 ).EQ.'c' * Define the number of arguments. NARGS = 9 * --- blas-1.1.orig/cblas/testing/c_zblat3.f +++ blas-1.1/cblas/testing/c_zblat3.f @@ -1366,7 +1366,7 @@ * 150 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME - CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, + CALL ZPRCN3( NOUT, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, $ M, N, ALPHA, LDA, LDB) * 160 CONTINUE --- blas-1.1.orig/debian/blas-docs.docs +++ blas-1.1/debian/blas-docs.docs @@ -0,0 +1,5 @@ +debian/blas2-paper.ps +debian/blas3-paper.ps +debian/faq.html +debian/cinterface.pdf +debian/blasqr.ps --- blas-1.1.orig/debian/rules +++ blas-1.1/debian/rules @@ -0,0 +1,316 @@ +#!/usr/bin/make -f + +ARCH:=$(shell dpkg --print-architecture) + +# This is the debhelper compatability version to use. +#export DH_COMPAT=2 + +F77:=g77 +GCC:=gcc +F77FLAGS:= -pipe -O3 -fomit-frame-pointer -ff90 + +ifeq ($(ARCH),alpha) +F77FLAGS:=$(F77FLAGS) -mieee +endif + +#ifeq ($(ARCH),mipsel) +#F77:=g77-3.2 +#GCC:=gcc-3.2 +#endif + +#ifeq ($(ARCH),mips) +#F77:=g77-3.2 +#GCC:=gcc-3.2 +#endif + +#ifeq ($(ARCH),sparc) +#F77:=g77-3.3 +#GCC:=gcc-3.3 +#endif + +SO=2 +VERS=$(SO).0 + +VN:=$(shell pwd) +VO:=$(VN).orig +UD:=$(shell dirname $$(pwd)) + +PDF:=cinterface +PDD:=debian/patched-docs +PDP:=$(addprefix $(PDD)/,$(PDF)) +PDS:=$(addsuffix .tex,$(PDP)) +PDO:=$(addsuffix .pdf,$(PDP)) + +$(UD)/blas.tgz: + mkdir -p $(@D) + cd $(@D) && wget -N ftp://ftp.netlib.org/blas/$(@F) + +$(UD)/cblas.tgz: + mkdir -p $(@D) + cd $(@D) && wget -N ftp://ftp.netlib.org/blas/blast-forum/$(@F) + +$(UD)/manpages.tgz: + mkdir -p $(@D) + cd $(@D) && wget -N ftp://ftp.netlib.org/lapack/$(@F) + +$(VO)/src: $(UD)/blas.tgz + mkdir -p $@ + cd $@ && tar zxf $< + +TF:=s d c z +TF:=$(addsuffix blat1,$(TF)) $(addsuffix blat2,$(TF)) $(addsuffix blat3,$(TF)) \ + $(addsuffix blat2d,$(TF)) $(addsuffix blat3d,$(TF)) + +$(addprefix $(VO)/test/,$(TF)): + mkdir -p $(@D) + cd $(@D) && wget -N ftp://ftp.netlib.org/blas/$(@F) + +$(VO)/test: $(addprefix $(VO)/test/,$(TF)) + +$(VO)/man: $(UD)/manpages.tgz + mkdir -p $(@D) + cd $(VO) && tar zxf $< blas + rm -rf $@ + mv $(VO)/blas/man $@ + rmdir $(VO)/blas + +$(VO)/cblas: $(UD)/cblas.tgz + cd $(VO) && tar zxf $< + rm -rf $@ + mv $(VO)/CBLAS $@ + +DF:=blas2-paper.ps blas3-paper.ps faq.html blasqr.ps +$(addprefix $(VO)/doc/,$(DF)): + mkdir -p $(@D) + cd $(@D) && wget -N ftp://ftp.netlib.org/blas/$(@F) + +$(VO)/doc/cinterface.pdf: + mkdir -p $(@D) + cd $(@D) && wget -N ftp://ftp.netlib.org/blas/blast-forum/$(@F) + +$(VO)/doc/blue.gif: + mkdir -p $(@D) + cd $(@D) && wget -N ftp://ftp.netlib.org/scalapack/html/gif/$(@F) + +%.png: %.gif + gif2png $< + +$(VO)/doc: $(addprefix $(VO)/doc/,$(DF)) $(VO)/doc/cinterface.pdf $(VO)/doc/blue.png + +ori: $(addprefix $(VO)/,src test man cblas doc) + +BSRC:=$(shell echo src/*.f) +BOBJ:=$(BSRC:.f=.o) + +debian/patch_applied: + ! [ -e debian/$(ARCH)/patch ] || patch -p1 $@ + +cblas/libcblas.a: cblas/Makefile.in debian/patch_applied + cd $(@D) && $(MAKE) cleanobj + cd $(@D) && $(MAKE) rmlib + cd $(@D) && $(MAKE) CC=$(GCC) F77=$(F77) alllib + cp cblas/lib/LINUX/cblas_LINUX.a $@ + +cblas/libcblas_pic.a: cblas/Makefile.in debian/patch_applied + cd $(@D) && $(MAKE) cleanobj + cd $(@D) && $(MAKE) rmlib + cd $(@D) && $(MAKE) CC=$(GCC) F77=$(F77) PICOPTS=-fPIC alllib + cp cblas/lib/LINUX/cblas_LINUX.a $@ + +BIN:=s d c z +BIN:=$(addsuffix cblat1,$(BIN)) $(addsuffix cblat2,$(BIN)) $(addsuffix cblat3,$(BIN)) +BIN:=$(addprefix cblas/testing/x,$(BIN)) + +$(BIN): libblas.so + cd cblas && $(MAKE) alltst CC=$(GCC) F77=$(F77) CBLIB= + +FBN:=s d c z +FBN:=$(addsuffix blat1,$(FBN)) $(addsuffix blat2,$(FBN)) $(addsuffix blat3,$(FBN)) +FBN:=$(addprefix test/x,$(FBN)) + +test/%.f: test/% + cp $< $@ + +test/x%: test/%.f libblas.so + $(F77) $(F77FLAGS) -o $@ $< -L $$(pwd) -lblas + +OUT:=$(subst cblas/testing/,debian/test/,$(BIN)) +OUT1:=$(filter %1,$(OUT)) +OUT23:=$(filter-out %1,$(OUT)) + +$(OUT1): \ + debian/test/%: cblas/testing/% debian/test_dir libblas.so + LD_LIBRARY_PATH=$$(pwd):$$LD_LIBRARY_PATH $< > $@ + awk '/fail/ || /FAIL/ {exit 1}' $@ || (cat $@ && false) + +$(OUT23): \ + debian/test/%: cblas/testing/% debian/test_dir libblas.so + LD_LIBRARY_PATH=$$(pwd):$$LD_LIBRARY_PATH $< \ + < cblas/testing/$(subst x,,$(subst cblat,in,$*)) > $@ + awk '/fail/ || /FAIL/ {exit 1}' $@ || (cat $@ && false) + +FOT:=$(subst test/,debian/test/,$(FBN)) +FOT1:=$(filter %1,$(FOT)) +FOT23:=$(filter-out %1,$(FOT)) + +$(FOT1): \ + debian/test/%: test/% debian/test_dir libblas.so + LD_LIBRARY_PATH=$$(pwd):$$LD_LIBRARY_PATH $< > $@ + awk '/fail/ || /FAIL/ {exit 1}' $@ || (cat $@ && false) + +$(FOT23): \ + debian/test/%: test/% debian/test_dir libblas.so + rm -f *.SUMM + LD_LIBRARY_PATH=$$(pwd):$$LD_LIBRARY_PATH $< \ + < test/$(subst x,,$*)d + cat *.SUMM >$@ + awk '/fail/ || /FAIL/ {exit 1}' $@ || (cat $@ && false) + +debian/test_results: $(OUT) $(FOT) + cat $^ > $@ + cat $@ | awk '/fail/ || /FAIL/ {exit 1}' + + +debian/tmp/usr/lib/%: % + dh_testroot + mkdir -p $(@D) + [ -L $* ] || install -m 644 $* $(@D) + ! [ -L $* ] || cp -d $* $(@D) + +debian/tmp/usr/lib/blas/%: cblas/testing/% + dh_testroot + mkdir -p $(@D) + install -m 755 cblas/testing/$* $(@D) + +debian/tmp/usr/lib/blas/%: test/% + dh_testroot + mkdir -p $(@D) + install -m 755 test/$* $(@D) + +debian/tmp/usr/include/%: cblas/src/% + dh_testroot + mkdir -p $(@D) + install -m 644 cblas/src/$* $(@D) + +MFL:=$(shell echo man/manl/*) +%.3: %.l + cat $< |sed "s,^\( *.TH *[^ ]* *\)l\( *.*\),\1 3 \2,1" >$@ +MF:=$(subst .l,.3,$(MFL)) + +IF:=so so.$(SO) so.$(VERS) a +IF:=$(addprefix debian/tmp/usr/lib/libblas.,$(IF)) +IF:=$(IF) $(subst cblas/testing/,debian/tmp/usr/lib/blas/,$(BIN)) +IF:=$(IF) $(subst test/,debian/tmp/usr/lib/blas/,$(FBN)) +IF:=$(IF) $(addprefix debian/tmp/usr/include/cblas,.h _f77.h) $(MF) debian/test_results + +$(PDO): %.pdf: %.tex + cd $(@D) && pdflatex $(&2 'source and diff are obsolete - use dpkg-source -b'; false + +binary: binary-indep binary-arch +.PHONY: build clean binary-indep binary-arch binary \ + $(addprefix $(UD)/,blas.tgz cblas.tgz manpages.tgz) +.SUFFIXES: +.INTERMEDIATE: $(VO)/doc/blue.gif --- blas-1.1.orig/debian/a.f77 +++ blas-1.1/debian/a.f77 @@ -0,0 +1,31 @@ +#!/bin/bash + +set -e + +OPTS="" +FILE="" +while [ $# -gt 0 ] ; do + if echo $1 | grep '\.f$' >/dev/null ; then + FILE=$1 + else + OPTS="$OPTS $1" + fi + shift +done + +if [ "$FILE" != "" ] && grep -i abs1 $FILE >/dev/null ; then + + echo f2c $FILE + f2c $FILE + echo cc $OPTS $(echo $FILE | sed 's,\.f,.c,1') + cc $OPTS $(echo $FILE | sed 's,\.f,.c,1') + rm $(echo $FILE | sed 's,\.f,.c,1') + +else + + echo f77 $OPTS $FILE + f77 $OPTS $FILE + +fi + +exit 0 --- blas-1.1.orig/debian/patched-docs/cinterface.tex +++ blas-1.1/debian/patched-docs/cinterface.tex @@ -0,0 +1,1682 @@ +\documentclass{article} + +\begin{document} + +% +% cinterface.tex +% + +%% Chapter Authors: Clint, Sven, Zohair, Linda, Susan + +\section{C interface to the Legacy BLAS}\label{legacy:c} + +%{\footnotesize {\bf Current Status:}\\ +%First vote taken on section~\ref{legacy:c}, passed 9/0/0, 8/97.\\ +%Second vote taken on section~\ref{legacy:c}, passed 8/0/0, 12/97.\\ +%Third vote taken on section~\ref{legacy:c}, passed 13/0/0, 4/98. \\ +%Fourth vote taken on section~\ref{legacy:c}, passed 8/0/1 with 16 eligible voters, 8/98.} \\ + +This section gives a detailed discussion of the proposed C interface to the +legacy BLAS. Every mention of ``BLAS'' in this chapter should be taken to mean +the legacy BLAS. Each interface decision is discussed in its own section. +Each section also contains a {\em Considered methods} subsection, where +other solutions to that particular problem are discussed, along with +the reasons why those options were not chosen. These {\em Considered methods} +subsections are indented and {\it italicized} in order to distinguish them +from the rest of the text. + +It is largely agreed among the group (and unanimous among the vendors) +that user demand for a C interface to the BLAS is insufficient to motivate +vendors to support a completely separate standard. This proposal therefore +confines itself to an interface +which can be readily supported on top of the already existing +Fortran 77 callable BLAS (i.e., the legacy BLAS). + +The interface is expressed in terms of ANSI/ISO C. Very few platforms fail +to provide ANSI/ISO C compilers at this time, and for those platforms, free +ANSI/ISO C compilers are almost always available (eg., {\tt gcc}). + +\subsection{Naming scheme}\label{legacy:c_namingscheme} + +%{\footnotesize {\bf Current Status:}\\ +%First vote taken on section~\ref{legacy:c_namingscheme}, passed 9/0/0, 8/97.\\ +%Second vote taken on section~\ref{legacy:c_namingscheme}, passed 8/0/0, 12/97.\\ +%Third vote taken on section~\ref{legacy:c_namingscheme}, passed 14/0/0, 4/98. \\ +%Fourth vote taken on section~\ref{legacy:c_namingscheme}, passed 8/0/1 with 16 eligible voters, 8/98.} \\ + +The naming scheme consists of taking the Fortran 77 routine name, making it +lower case, and adding the prefix {\tt cblas\_}. Therefore, the routine +{\tt DGEMM} becomes {\tt cblas\_dgemm}. + +\subsubsection{Considered methods} +{\it +\begin{quotation} +Various other naming schemes have been proposed, such as adding {\tt C\_} +or {\tt c\_} to the name. Most of these schemes accomplish the requirement +of separating the Fortran 77 and C name spaces. It was argued, however, that +the addition of the {\tt blas} prefix unifies the naming scheme in a logical +and useful way (making it easy to search for BLAS use in a code, for instance), +while not placing too great a burden on the typist. The letter {\tt c} is used +to distinguish this language interface from possible future interfaces. +\end{quotation} +} + +\subsection{Indices and I\_AMAX} \label{sec-Indices} + +%{\footnotesize {\bf Current Status:}\\ +%First vote taken on section~\ref{sec-Indices}, passed 9/0/0, 8/97.\\ +%Second vote taken on section~\ref{sec-Indices}, passed 8/0/0, 12/97.\\ +%Third vote taken on section~\ref{sec-Indices}, passed 14/0/0, 4/98.\\ +%A binding vote for section~\ref{sec-Indices} was taken to see if we +%should allow N=0, passed 8/2/4, 4/98. \\ +%Fourth vote taken on section~\ref{sec-Indices}, passed 8/0/1 with 16 eligible voters, 8/98.}\\ + +The Fortran 77 BLAS return indices in the range $1 \leq I \leq N$ (where $N$ +is the number of entries in the dimension in question, and $I$ is the index), +in accordance with Fortran 77 array indexing conventions. This allows functions +returning indices to be directly used to index standard arrays. The C interface +therefore returns indices in the range $0 \leq I < N$ for the same reason. + +The only BLAS routine which returns an index is the function {\tt I\_AMAX}. +This function is declared to be of type {\tt CBLAS\_INDEX}, which is guaranteed +to be an integer type (i.e., no cast is required when assigning to any integer +type). {\tt CBLAS\_INDEX} will usually correspond to {\tt size\_t} to ensure +any array can be indexed, but implementors might choose the integer type which +matches their Fortran 77 {\tt INTEGER}, for instance. It is defined that zero +is returned as the index for a zero length vector (eg., For $N=0$, +{\tt I\_AMAX} will always return zero). + + +\subsection{Character arguments} +\label{sec-EnumType} + +%{\footnotesize {\bf Current Status:}\\ +%First vote taken on section~\ref{sec-EnumType}, passed 9/0/0, 8/97.\\ +%Second vote taken on section~\ref{sec-EnumType}, passed 8/0/0, 12/97.\\ +%Third vote taken on section~\ref{sec-EnumType}, passed 14/0/0, 4/98. \\ +%Fourth vote taken on section~\ref{sec-EnumType}, passed 8/0/1 with 16 eligible voters, 8/98.} \\ + +All arguments which were characters in the Fortran 77 interface are handled by +enumerated types in the C interface. This allows for tighter error checking, +and provides less opportunity for user error. The character arguments present +in the Fortran 77 interface are: {\tt SIDE}, {\tt UPLO}, {\tt TRANSPOSE}, and +{\tt DIAG}. This interface adds another such argument to all routines involving +two dimensional arrays, {\tt ORDER}. The standard dictates the following +enumerated types: + +\begin{verbatim} +enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102}; +enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113}; +enum CBLAS_UPLO {CblasUpper=121, CblasLower=122}; +enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132}; +enum CBLAS_SIDE {CblasLeft=141, CblasRight=142}; +\end{verbatim} + +\subsubsection{Considered methods} +{\it +\begin{quotation} +The other two most commonly suggested methods were accepting these arguments as +either {\tt char~*} or {\tt char}. It was noted that both of these options +require twice as many comparisons as normally required to branch (so that the +character may be either upper or lower case). Both methods also suffered from +ambiguity (what does it mean to have {\tt DIAG='H'}, for instance). +If {\tt char} was chosen, the words could not be written out as they can for the +Fortran 77 interface (you couldn't write "NoTranspose"). If {\tt char~*} were +used, some compilers might fail to optimize string constant use, causing +unnecessary memory usage. + +The main advantage of enumerated data types, however, is that much of the error +checking can be done at compile time, rather than at runtime (i.e., if the +user fails to pass one of the valid options, the compiler can issue the error). + +There was much discussion as to whether the integer values should be specified, +or whether only the enumerated names should be so specified. The group could +find no substansive way in which specifying the integer values would restrict +an implementor, and specifying the integer values was seen as an aid to +inter-language calls. +\end{quotation} +} + +\subsection{Handling of complex data types}\label{legacy:c_handlingcomplex} + +%{\footnotesize {\bf Current Status:}\\ +%First vote taken on section~\ref{legacy:c_handlingcomplex}, passed 9/0/0, 8/97.\\ +%Second vote taken on section~\ref{legacy:c_handlingcomplex}, passed 7/0/1, 12/97.\\ +%Third vote taken on section~\ref{legacy:c_handlingcomplex}, passed 14/0/0, 4/98. \\ +%Fourth vote taken on section~\ref{legacy:c_handlingcomplex}, passed 8/0/1 with 16 eligible voters, 8/98.} \\ + +All complex arguments are accepted as {\tt void *}. A complex element consists +of two consecutive memory locations of the underlying data type +(i.e., {\tt float} or {\tt double}), where the first location contains the +real component, and the second contains the imaginary part of the number. + +In practice, programmers' methods of handling complex types in C vary. +Some use various data structures (some examples are discussed below). +Others accept complex numbers as arrays of the underlying type. + +Complex numbers are accepted as void pointers so that widespread type casting +will not be required to avoid warning or errors during compilation of +complex code. + +An ANSI/ISO committee is presently working on an extension to ANSI/ISO C +which defines complex data types. The definition of a complex element +is the same as given above, and so the handling of complex types by this +interface will not need to be changed when ANSI/ISO C standard is +extended. + +\subsubsection{Considered methods} +{\it +\begin{quotation} +Probably the most strongly advocated alternative was defining complex numbers +via a structure such as \\ +{\tt struct NON\_PORTABLE\_COMPLEX~\{float~r;~float~i;\};} +The main problem with this solution is the lack of portability. By the +ANSI/ISO C standard, elements in a structure are not guaranteed to be +contiguous. +With the above structure, padding between elements has been experimentally +observed (on the CRAY T3D), so this problem is not purely theoretical. + +To get around padding problems within the structure, a structure +such as \\ +{\tt struct NON\_PORTABLE\_COMPLEX~\{float~v[2];\};} +has been suggested. With this +structure there will obviously be no padding +between the real and imaginary parts. However, there still exists the +possibility of padding between elements within an array. More importantly, this +structure does not lend itself nearly as well as the first to code clarity. + +A final proposal is to define a structure which may be addressed the same +as the one above (i.e., \verb+ptr->r+, \verb+ptr->i+), but whose actual +definition is platform dependent. Then, hopefully, various vendors will +either use the above structure and ensure via their compilers its +contiguousness, or they will create a different structure which can be +accessed in the same way. + +This requires vendors to support something which is not in the ANSI C standard, +and so there is no way to ensure this would take place. More to the point, +use of such a structure turns out to not offer much in the way of real +advantage, as discussed in the following section. + +All of these approaches require the programmer to either use the specified +data type throughout the code which will call the BLAS, or to perform type +casting on each BLAS call. When complex numbers are accepted as void pointers, +no type casting or data type is dictated, with the only restriction being +that a complex number have the definition given above. +\end{quotation} +} + +\subsection{Return values of complex functions}\label{legacy:c_returncomplex} + +%{\footnotesize {\bf Current Status:}\\ +%First vote taken on section~\ref{legacy:c_returncomplex}, passed 9/0/0, 8/97.\\ +%Second vote taken on section~\ref{legacy:c_returncomplex}, passed 7/0/1, 12/97.\\ +%Third vote taken on section~\ref{legacy:c_returncomplex}, passed 13/0/1, 4/98. \\ +%Fourth vote taken on section~\ref{legacy:c_returncomplex}, passed 8/0/1 with 16 eligible voters, 8/98.} \\ + +BLAS routines which return complex values in Fortran 77 are instead recast as +subroutines in the C interface, with the return value being an output parameter +added to the end of the argument list. This allows the output parameter to +be accepted as void pointers, as discussed above. + +Further, the name is suffixed by {\tt \_sub}. There are two main reasons +for this name change. First, the change from a function to a subroutine +is a significant change, and thus the name should reflect this. More +importantly, the ``traditional'' name space is specifically reserved +for use when the forthcoming ANSI/ISO C extension is finalized. When +this is done, this C interface will be extended to include functions using +the ``traditional'' names which utilize the new ANSI/ISO complex type to +return the values. + +\subsubsection{Considered methods} +{\it +\begin{quotation} +This is the area where use of a structure is most desired. Again, the most +common suggestion is a structure such as +\verb+struct NON_PORTABLE_COMPLEX {float r; float i;};+. + +If one is willing to use this structure throughout one's code, then this +provides a natural and convenient mechanism. If, however, the programmer has +utilized a different structure for complex, this ease of use breaks down. Then, +something like the following code fragment is required: +\begin{verbatim} + NON_PORTABLE_COMPLEX ctmp; + float cdot[2]; + + ctmp = cblas_cdotc(n, x, 1, y, 1); + cdot[0] = ctmp.r; + cdot[1] = ctmp.i; +\end{verbatim} +which is certainly much less convenient than: +\verb+cblas_cdotc_sub(n, x, 1, y, 1, cdot)+. + +It should also be noted that the primary reason for having a function instead +of a subroutine is already invalidated by C's lack of a standard complex type. +Functions are most useful when the result may be used directly as part of +an in-line computation. However, since ANSI/ISO C lacks support for +complex arithmetic primitives or operator overloading, complex functions cannot +be standardly used in this way. Since the function cannot be used as a part +of a larger expression, nothing is lost by recasting it as a subroutine; +indeed a slight performance win may be obtained. +\end{quotation} +} + +\subsection{Array arguments}\label{legacy:c_arrayarguments} + +%{\footnotesize {\bf Current Status:}\\ +%First vote taken on section~\ref{legacy:c_arrayarguments}, passed 9/0/0, 8/97.\\ +%Second vote taken on section~\ref{legacy:c_arrayarguments}, passed 8/0/0, 12/97.\\ +%Third vote taken on section~\ref{legacy:c_arrayarguments}, passed 13/0/1, 4/98. \\ +%Fourth vote taken on section~\ref{legacy:c_arrayarguments}, passed 8/0/1 with 16 eligible voters, 8/98.} \\ + +Arrays are constrained to being contiguous in memory. They +are accepted as pointers, not as arrays of pointers. +%This means that +%the C definition of a two dimensional array may not be used directly, since each +%row is an arbitrary pointer (i.e., the address of the second row cannot +%be obtained from the address of the first row). Note that if the user +%somehow ensures the C array is actually contiguous (eg. by allocating +%it himself), C two dimensional arrays can indeed be used. + +All BLAS routines which take one or more two dimensional arrays as arguments +receive one extra parameter as their first argument. This argument is +of the enumerated type \\ +{\tt enum~CBLAS\_ORDER~\{CblasRowMajor=101,~CblasColMajor=102\};}.\\ +If this parameter +is set to {\tt CblasRowMajor}, it is assumed that elements within a row of +the array(s) are contiguous in memory, while elements within array columns +are separated by a constant stride given in the {\tt stride} parameter (this +parameter corresponds to the leading dimension [e.g. {\tt LDA}] in the +Fortran 77 interface). + +If the order is given as {\tt CblasColMajor}, elements within array columns +are assumed to be contiguous, with elements within array rows separated +by {\tt stride} memory elements. + +Note that there is only one {\tt CBLAS\_ORDER} parameter +to a given routine: all array operands are required to use the same ordering. + +\subsubsection{Considered methods} +{\it +\begin{quotation} +This solution comes after much discussion. C users appear to split roughly +into two camps. Those people who have a history +of mixing C and Fortran 77 (in particular making use of the Fortran 77 BLAS +from C), tend to use column-major arrays in order to allow ease of +inter-language operations. Because of the flexibility of pointers, this +is not appreciably harder than using row-major arrays, even though C +``natively'' possesses row-major arrays. + +The second camp of C users +are not interested in overt C/Fortran 77 interoperability, and wish to +have arrays which are row-major, in accordance with standard C conventions. +The idea that they must recast their row-oriented algorithms to column-major +algorithms is unacceptable; many in this camp would probably not utilize +any BLAS which enforced a column-major constraint. + +Because both camps are fairly widely represented within the target +audience, it is impossible to choose one solution to the exclusion of +the other. + +Column-major array storage can obviously be supported directly on top of +the legacy Fortran 77 BLAS. Recent work, particularly code provided +by D.P. Manley of DEC, has shown that row-major array storage may also +be supported in this way with little cost. Appendix~\ref{app-ArrayStore} +discusses this issue in detail. To preview it here, we can say the level +1 and 3 BLAS require no extra operations or storage to support row-major +operations on top of the legacy BLAS. Level 2 real routines also require +no extra operations or storage. Some complex level 2 routines involving +the conjugate transpose will require extra storage and operations in order +to form explicit conjugates. However, this will always involve vectors, +not the matrix. In the worst case, we will need $n$ extra storage, and +$3n$ sign changes. + +One proposal was to accept arrays as arrays of pointers, instead of as +a single pointer. +%This would correspond exactly to the standard ANSI/ISO C +%two dimensional array. +The problems with this approach are manifold. First, +the existing Fortran 77 BLAS could not be used, since they demand contiguous +(though strided) storage. Second, this approach requires users of standard +C 2D arrays or 1D arrays to allocate and assign the appropriate pointer array. + +Beyond this, many of the vectors used in level 1 and level 2 BLAS come +from rows or columns of two dimensional arrays. Elements within columns of +row-major arrays are not uniformly strided, which means that a {\tt n}-element +column vector would need {\tt n} pointers to represent it. This then +leads to vectors being accepted as arrays of pointers as well. + +Now, assuming both our one and two dimensional arrays are accepted as arrays +of pointers, +we have a problem when we wish to perform sub-array access. If we wish to +pass an $m \times n$ subsection of a this array of pointers, starting at row $i$ +and column $j$, +we must allocate $m$ pointers, and assign them in a section of code such as: + +\begin{verbatim} + float **A, **subA; + + subA = malloc(m*sizeof(float*)); + for (k=0; k != m; k++) subA[k] = A[i+k] + j; + cblas_rout(... subA ...); +\end{verbatim} + +The same operation must be done if we wish to use a row or column as a vector. +This is not only an inconvenience, but can add up to a non-negligible +performance loss as well. + +A fix for these problems is that one and two dimensional arrays be passed +as arrays of +pointers, and then indices are passed in to indicate the sub-portion to +access. Thus you have a call that looks like: +\verb|cblas_rout(... A, i, j, ...);|. +This solution still requires some additional tweaks to allow using two +dimensional array rows and columns as vectors. Users presently using +C 2D arrays or 1D arrays would have to malloc the array of pointers as shown +in the preceding example in order to use this kind of interface. At any +rate, a library accepting pointers to pointers cannot be supported on top +of the Fortran 77 BLAS, while one supporting simple pointers can. + +If the programmer is utilizing the pointer to pointer style of array indexing, +it is still possible to use this library providing that the user ensures that +the operand matrix is contiguous, and that the rows are constantly +strided. If this is the case, the user may pass the operand matrix +to the library in precicely the same way as with a 2D C array: +\verb|cblas_rout(... &A[i][j] ...);|. + +\noindent +{\bf Example 1: making a library call with a C 2D array:} +\begin{verbatim} + double A[50][25]; /* standard C 2D array */ + + cblas_rout(CblasRowMajor, ... &A[i][j], 25, ...); +\end{verbatim} + +\noindent +{\bf Example 2: Legal use of pointer to pointer style programming and the CBLAS} +\begin{verbatim} + double **A, *p; + + A = malloc(M*sizeof(double *)); + p = malloc(M*N*sizeof(double)); + for (i=0; i < M; i++) A[i] = &p[i*N]; + + cblas_rout(CblasRowMajor, ... &A[i][j], N, ...); +\end{verbatim} + +\noindent +{\bf Example 3: Illegal use of pointer to pointer style programming and the CBLAS} +\begin{verbatim} + double **A, *p; + + A = malloc(M*sizeof(double *)); + p = malloc(M*N*sizeof(double)); + for (i=0; i < M; i++) A[i] = malloc(N*sizeof(double)); + + cblas_rout(CblasRowMajor, ... &A[i][j], N, ...); +\end{verbatim} + + +Note that Example 3 is illegal because the rows of A have no guaranteed stride. +\end{quotation} +} + +\subsection{Aliasing of arguments}\label{legacy:c_aliasing} + +%{\footnotesize {\bf Current Status:}\\ +%First vote taken on section~\ref{legacy:c_aliasing}, failed, too restrictive, +%back to subcommittee, 4/98.\\ +%Second vote taken on section~\ref{legacy:c_aliasing}, passed 13/0/0, 4/98.\\ +%Third vote taken on section~\ref{legacy:c_aliasing}, passed 8/0/1 with 16 eligible voters, 8/98.}\\ + +Unless specified otherwise, only input-only arguments (specified with the +{\tt const} qualifier), may be legally aliased on a call to the C interface +to the BLAS. + +\subsubsection{Considered methods} +The ANSI C standard allows for the aliasing of output arguments. However, +allowing this often carries a substantial performance penalty. This, along +with the fact that Fortran 77 (which we hope to call for optimized libraries) +does not allow aliasing of output arguments, led us to make this restriction. + +\subsection{C interface include file}\label{legacy:c_includefile} + +%{\footnotesize {\bf Current Status:}\\ +%First vote taken on section~\ref{legacy:c_includefile}, passed 14/0/0, 4/98.\\ +%Second vote taken on section~\ref{legacy:c_includefile}, passed 13/0/0, 4/98.\\ +%Third vote taken on section~\ref{legacy:c_includefile}, passed 8/0/1 with 16 eligible voters, 8/98.}\\ + +The C interface to the BLAS will have a standard include file, called +{\tt cblas.h}, which minimally contains the definition of the CBLAS types +and ANSI/ISO C prototypes for all BLAS routines. +It is not an error to include this file multiple times. +Section~\ref{sec-cblash} contains an example of a minimal {\tt cblas.h}. + +{{\bf ADVICE TO THE IMPLEMENTOR:}\\ \em +Note that the vendor is not constrained to using precisely +this include file; only the enumerated type definitions are fully specified. +The implementor is free to make any other changes which are not apparent +to the user. For instance, all matrix dimensions might be accepted as +{\tt size\_t} instead of {\tt int}, or the implementor might choose to +make some routines inline. +} + +\subsection{Error checking}\label{legacy:c_errorchecking} + +%{\footnotesize {\bf Current Status:}\\ +%First vote taken on section~\ref{legacy:c_errorchecking}, passed 9/0/0, 8/97.\\ +%Second vote taken on section~\ref{legacy:c_errorchecking}, passed 8/0/0, 12/97.\\ +%Third vote taken on section~\ref{legacy:c_errorchecking}, passed 13/0/0, 4/98.\\ +%Fourth vote taken on section~\ref{legacy:c_errorchecking}, passed 8/0/1 with 16 eligible voters, 8/98.}\\ + +The C interface to the legacy BLAS must supply error checking corresponding +to that provided by the reference Fortran 77 BLAS implementation. + +%{\it {\bf ADVICE TO IMPLEMENTOR:} +%If the Fortran 77 code is used to implement the C interface, most of the error +%checking may be done by Fortran 77 code, assuming error reporting is +%changed to reflect the differing C interface. +%} + +\subsection{Rules for obtaining the C interface from the Fortran 77} +\label{legacy:c_rules} + +%{\footnotesize {\bf Current Status:}\\ +%First vote taken on section~\ref{legacy:c_rules}, passed 9/0/1 with 16 eligible voters, 8/98.}\\ + +\begin{itemize} + \item The Fortran 77 routine name is changed to lower case, and prefixed by {\tt cblas\_}. + \item All routines which accept two dimensional arrays (i.e., level 2 and 3), + acquire a new parameter of type {\tt CBLAS\_ORDER} as their first + argument, which determines if the two dimensional arrays are row or + column major. + \item {\em Character arguments} are replaced by the appropriate enumerated type, + as shown in Section~\ref{sec-EnumType}. + \item {\em Input arguments} are declared with the {\tt const} modifier. + \item {\em Non-complex scalar input arguments} are passed by value. This + allows the user to put in constants when desired (eg., passing 10 on + the command line for \verb+N+). + \item {\em Complex scalar input arguments} are passed as void pointers, + since they do not exist as a predefined data type in ANSI/ISO C. + \item {\em Array arguments} are passed by address. + \item {\em Output scalar arguments} are passed by address. + \item {\em Complex functions} become subroutines which return the result via + a void pointer, added as the last parameter. The name is suffixed + with {\tt \_sub}. +\end{itemize} + +\subsection{cblas.h include file} +\label{sec-cblash} + +%{\footnotesize {\bf Current Status:}\\ +%First vote taken on section~\ref{sec-cblash}, passed 9/0/1 with 16 eligible voters, 8/98.}\\ + +\begin{verbatim} +#ifndef CBLAS_H +#define CBLAS_H +#include + +/* + * Enumerated and derived types + */ +#define CBLAS_INDEX size_t /* this may vary between platforms */ +enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102}; +enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113}; +enum CBLAS_UPLO {CblasUpper=121, CblasLower=122}; +enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132}; +enum CBLAS_SIDE {CblasLeft=141, CblasRight=142}; + +/* + * =========================================================================== + * Prototypes for level 1 BLAS functions (complex are recast as routines) + * =========================================================================== + */ +float cblas_sdsdot(const int N, const float alpha, const float *X, + const int incX, const float *Y, const int incY); +double cblas_dsdot(const int N, const float *X, const int incX, const float *Y, + const int incY); +float cblas_sdot(const int N, const float *X, const int incX, + const float *Y, const int incY); +double cblas_ddot(const int N, const double *X, const int incX, + const double *Y, const int incY); + +/* + * Functions having prefixes Z and C only + */ +void cblas_cdotu_sub(const int N, const void *X, const int incX, + const void *Y, const int incY, void *dotu); +void cblas_cdotc_sub(const int N, const void *X, const int incX, + const void *Y, const int incY, void *dotc); + +void cblas_zdotu_sub(const int N, const void *X, const int incX, + const void *Y, const int incY, void *dotu); +void cblas_zdotc_sub(const int N, const void *X, const int incX, + const void *Y, const int incY, void *dotc); + + +/* + * Functions having prefixes S D SC DZ + */ +float cblas_snrm2(const int N, const float *X, const int incX); +float cblas_sasum(const int N, const float *X, const int incX); + +double cblas_dnrm2(const int N, const double *X, const int incX); +double cblas_dasum(const int N, const double *X, const int incX); + +float cblas_scnrm2(const int N, const void *X, const int incX); +float cblas_scasum(const int N, const void *X, const int incX); + +double cblas_dznrm2(const int N, const void *X, const int incX); +double cblas_dzasum(const int N, const void *X, const int incX); + + +/* + * Functions having standard 4 prefixes (S D C Z) + */ +CBLAS_INDEX cblas_isamax(const int N, const float *X, const int incX); +CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX); +CBLAS_INDEX cblas_icamax(const int N, const void *X, const int incX); +CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX); + +/* + * =========================================================================== + * Prototypes for level 1 BLAS routines + * =========================================================================== + */ + +/* + * Routines with standard 4 prefixes (s, d, c, z) + */ +void cblas_sswap(const int N, float *X, const int incX, + float *Y, const int incY); +void cblas_scopy(const int N, const float *X, const int incX, + float *Y, const int incY); +void cblas_saxpy(const int N, const float alpha, const float *X, + const int incX, float *Y, const int incY); + +void cblas_dswap(const int N, double *X, const int incX, + double *Y, const int incY); +void cblas_dcopy(const int N, const double *X, const int incX, + double *Y, const int incY); +void cblas_daxpy(const int N, const double alpha, const double *X, + const int incX, double *Y, const int incY); + +void cblas_cswap(const int N, void *X, const int incX, + void *Y, const int incY); +void cblas_ccopy(const int N, const void *X, const int incX, + void *Y, const int incY); +void cblas_caxpy(const int N, const void *alpha, const void *X, + const int incX, void *Y, const int incY); + +void cblas_zswap(const int N, void *X, const int incX, + void *Y, const int incY); +void cblas_zcopy(const int N, const void *X, const int incX, + void *Y, const int incY); +void cblas_zaxpy(const int N, const void *alpha, const void *X, + const int incX, void *Y, const int incY); + + +/* + * Routines with S and D prefix only + */ +void cblas_srotg(float *a, float *b, float *c, float *s); +void cblas_srotmg(float *d1, float *d2, float *b1, const float b2, float *P); +void cblas_srot(const int N, float *X, const int incX, + float *Y, const int incY, const float c, const float s); +void cblas_srotm(const int N, float *X, const int incX, + float *Y, const int incY, const float *P); + +void cblas_drotg(double *a, double *b, double *c, double *s); +void cblas_drotmg(double *d1, double *d2, double *b1, const double b2, double *P); +void cblas_drot(const int N, double *X, const int incX, + double *Y, const int incY, const double c, const double s); +void cblas_drotm(const int N, double *X, const int incX, + double *Y, const int incY, const double *P); + + +/* + * Routines with S D C Z CS and ZD prefixes + */ +void cblas_sscal(const int N, const float alpha, float *X, const int incX); +void cblas_dscal(const int N, const double alpha, double *X, const int incX); +void cblas_cscal(const int N, const void *alpha, void *X, const int incX); +void cblas_zscal(const int N, const void *alpha, void *X, const int incX); +void cblas_csscal(const int N, const float alpha, void *X, const int incX); +void cblas_zdscal(const int N, const double alpha, void *X, const int incX); + +/* + * =========================================================================== + * Prototypes for level 2 BLAS + * =========================================================================== + */ + +/* + * Routines with standard 4 prefixes (S, D, C, Z) + */ +void cblas_sgemv(const enum CBLAS_ORDER order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const float alpha, const float *A, const int lda, + const float *X, const int incX, const float beta, + float *Y, const int incY); +void cblas_sgbmv(const enum CBLAS_ORDER order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const int KL, const int KU, const float alpha, + const float *A, const int lda, const float *X, + const int incX, const float beta, float *Y, const int incY); +void cblas_strmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const float *A, const int lda, + float *X, const int incX); +void cblas_stbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const float *A, const int lda, + float *X, const int incX); +void cblas_stpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const float *Ap, float *X, const int incX); +void cblas_strsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const float *A, const int lda, float *X, + const int incX); +void cblas_stbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const float *A, const int lda, + float *X, const int incX); +void cblas_stpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const float *Ap, float *X, const int incX); + +void cblas_dgemv(const enum CBLAS_ORDER order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const double alpha, const double *A, const int lda, + const double *X, const int incX, const double beta, + double *Y, const int incY); +void cblas_dgbmv(const enum CBLAS_ORDER order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const int KL, const int KU, const double alpha, + const double *A, const int lda, const double *X, + const int incX, const double beta, double *Y, const int incY); +void cblas_dtrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const double *A, const int lda, + double *X, const int incX); +void cblas_dtbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const double *A, const int lda, + double *X, const int incX); +void cblas_dtpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const double *Ap, double *X, const int incX); +void cblas_dtrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const double *A, const int lda, double *X, + const int incX); +void cblas_dtbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const double *A, const int lda, + double *X, const int incX); +void cblas_dtpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const double *Ap, double *X, const int incX); + +void cblas_cgemv(const enum CBLAS_ORDER order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *X, const int incX, const void *beta, + void *Y, const int incY); +void cblas_cgbmv(const enum CBLAS_ORDER order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const int KL, const int KU, const void *alpha, + const void *A, const int lda, const void *X, + const int incX, const void *beta, void *Y, const int incY); +void cblas_ctrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *A, const int lda, + void *X, const int incX); +void cblas_ctbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const void *A, const int lda, + void *X, const int incX); +void cblas_ctpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *Ap, void *X, const int incX); +void cblas_ctrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *A, const int lda, void *X, + const int incX); +void cblas_ctbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const void *A, const int lda, + void *X, const int incX); +void cblas_ctpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *Ap, void *X, const int incX); + +void cblas_zgemv(const enum CBLAS_ORDER order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *X, const int incX, const void *beta, + void *Y, const int incY); +void cblas_zgbmv(const enum CBLAS_ORDER order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const int KL, const int KU, const void *alpha, + const void *A, const int lda, const void *X, + const int incX, const void *beta, void *Y, const int incY); +void cblas_ztrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *A, const int lda, + void *X, const int incX); +void cblas_ztbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const void *A, const int lda, + void *X, const int incX); +void cblas_ztpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *Ap, void *X, const int incX); +void cblas_ztrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *A, const int lda, void *X, + const int incX); +void cblas_ztbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const void *A, const int lda, + void *X, const int incX); +void cblas_ztpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *Ap, void *X, const int incX); + + +/* + * Routines with S and D prefixes only + */ +void cblas_ssymv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *A, + const int lda, const float *X, const int incX, + const float beta, float *Y, const int incY); +void cblas_ssbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const int K, const float alpha, const float *A, + const int lda, const float *X, const int incX, + const float beta, float *Y, const int incY); +void cblas_sspmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *Ap, + const float *X, const int incX, + const float beta, float *Y, const int incY); +void cblas_sger(const enum CBLAS_ORDER order, const int M, const int N, + const float alpha, const float *X, const int incX, + const float *Y, const int incY, float *A, const int lda); +void cblas_ssyr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *X, + const int incX, float *A, const int lda); +void cblas_sspr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *X, + const int incX, float *Ap); +void cblas_ssyr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *X, + const int incX, const float *Y, const int incY, float *A, + const int lda); +void cblas_sspr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *X, + const int incX, const float *Y, const int incY, float *A); + +void cblas_dsymv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *A, + const int lda, const double *X, const int incX, + const double beta, double *Y, const int incY); +void cblas_dsbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const int K, const double alpha, const double *A, + const int lda, const double *X, const int incX, + const double beta, double *Y, const int incY); +void cblas_dspmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *Ap, + const double *X, const int incX, + const double beta, double *Y, const int incY); +void cblas_dger(const enum CBLAS_ORDER order, const int M, const int N, + const double alpha, const double *X, const int incX, + const double *Y, const int incY, double *A, const int lda); +void cblas_dsyr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *X, + const int incX, double *A, const int lda); +void cblas_dspr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *X, + const int incX, double *Ap); +void cblas_dsyr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *X, + const int incX, const double *Y, const int incY, double *A, + const int lda); +void cblas_dspr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *X, + const int incX, const double *Y, const int incY, double *A); + + +/* + * Routines with C and Z prefixes only + */ +void cblas_chemv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const void *alpha, const void *A, + const int lda, const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_chbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const int K, const void *alpha, const void *A, + const int lda, const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_chpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const void *alpha, const void *Ap, + const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_cgeru(const enum CBLAS_ORDER order, const int M, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_cgerc(const enum CBLAS_ORDER order, const int M, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_cher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const void *X, const int incX, + void *A, const int lda); +void cblas_chpr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const float *alpha, const void *X, + const int incX, void *A); +void cblas_cher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_chpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *Ap); + +void cblas_zhemv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const void *alpha, const void *A, + const int lda, const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_zhbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const int K, const void *alpha, const void *A, + const int lda, const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_zhpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const void *alpha, const void *Ap, + const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_zgeru(const enum CBLAS_ORDER order, const int M, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_zgerc(const enum CBLAS_ORDER order, const int M, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_zher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const void *X, const int incX, + void *A, const int lda); +void cblas_zhpr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const double *alpha, const void *X, + const int incX, void *A); +void cblas_zher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_zhpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *Ap); + +/* + * =========================================================================== + * Prototypes for level 3 BLAS + * =========================================================================== + */ + +/* + * Routines with standard 4 prefixes (S, D, C, Z) + */ +void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_TRANSPOSE TransB, const int M, const int N, + const int K, const float alpha, const float *A, + const int lda, const float *B, const int ldb, + const float beta, float *C, const int ldc); +void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const float alpha, const float *A, const int lda, + const float *B, const int ldb, const float beta, + float *C, const int ldc); +void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const float alpha, const float *A, const int lda, + const float beta, float *C, const int ldc); +void cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const float alpha, const float *A, const int lda, + const float *B, const int ldb, const float beta, + float *C, const int ldc); +void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const float alpha, const float *A, const int lda, + float *B, const int ldb); +void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const float alpha, const float *A, const int lda, + float *B, const int ldb); + +void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_TRANSPOSE TransB, const int M, const int N, + const int K, const double alpha, const double *A, + const int lda, const double *B, const int ldb, + const double beta, double *C, const int ldc); +void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const double alpha, const double *A, const int lda, + const double *B, const int ldb, const double beta, + double *C, const int ldc); +void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const double alpha, const double *A, const int lda, + const double beta, double *C, const int ldc); +void cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const double alpha, const double *A, const int lda, + const double *B, const int ldb, const double beta, + double *C, const int ldc); +void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const double alpha, const double *A, const int lda, + double *B, const int ldb); +void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const double alpha, const double *A, const int lda, + double *B, const int ldb); + +void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_TRANSPOSE TransB, const int M, const int N, + const int K, const void *alpha, const void *A, + const int lda, const void *B, const int ldb, + const void *beta, void *C, const int ldc); +void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *beta, void *C, const int ldc); +void cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const void *alpha, const void *A, const int lda, + void *B, const int ldb); +void cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const void *alpha, const void *A, const int lda, + void *B, const int ldb); + +void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_TRANSPOSE TransB, const int M, const int N, + const int K, const void *alpha, const void *A, + const int lda, const void *B, const int ldb, + const void *beta, void *C, const int ldc); +void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *beta, void *C, const int ldc); +void cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const void *alpha, const void *A, const int lda, + void *B, const int ldb); +void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const void *alpha, const void *A, const int lda, + void *B, const int ldb); + + +/* + * Routines with prefixes C and Z only + */ +void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const float alpha, const void *A, const int lda, + const float beta, void *C, const int ldc); +void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const float beta, + void *C, const int ldc); + +void cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const double alpha, const void *A, const int lda, + const double beta, void *C, const int ldc); +void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const double beta, + void *C, const int ldc); + +#endif +\end{verbatim} + +\subsection{Using Fortran 77 BLAS to support row-major BLAS operations} +\label{app-ArrayStore} + +%{\footnotesize {\bf Current Status:}\\ +%First vote taken on section~\ref{app-ArrayStore}, passed 9/0/1 with 16 eligible voters, 8/98.}\\ + +This section is not part of the standard per se. Rather, it exists as an +advice to the implementor on how row-major BLAS operations may be +implemented using column-major BLAS. This allows vendors to leverage +years of Fortran 77 BLAS developement in producing the C BLAS. + +Before this issue is examined in detail, a few general observations on array +storage are helpful. We must distinguish between the matrix and the array +which is used to store the matrix. The matrix, and its rows and columns, +have mathematical meaning. The array is simply the method of storing the +matrix, and its rows and columns are significant only for memory addressing. + +Thus we see we can store the columns of a matrix in the rows of an array, +for instance. When this occurs in the BLAS, the matrix is said to be +stored in transposed form. + +A row-major array stores elements along a row in contiguous storage, and +separates the column elements by some constant stride (often the actual +length of a row). Column-major arrays have contiguous columns, and strided +rows. The importance of this is to note that a row-major array storing +a matrix in the natural way, is a transposed column-major array (i.e., +it can be thought of as a column-major array where the rows of the matrix +are stored in the columns of the array). + +Similarly, an upper triangular row-major array corresponds to a transposed +lower triangular column-major array (the same is true in reverse [i.e., +lower-to-upper], obviously). To see this, simply think of what a upper +triangular matrix stored in a row-major array looks like. The first $n$ +entries contain the first matrix row, followed by a non-negative gap, +followed by the second matrix row. + +If this same array is viewed as column-major, the first $n$ entries are a +column, instead of a row, so that the columns of the array store the +rows of the matrix (i.e., it is transposed). This means that if we wish +to use the Fortran 77 (column-major) BLAS with triangular matrices coming +from C (possibly row-major), we will be reversing the setting of {\tt UPLO}, +while simultaneously reversing the setting of {\tt TRANS} (this gets slightly +more complicated when the conjugate transpose is involved, as we will see). + +Finally, note that if a matrix is symmetric or Hermitian, its rows are the +same as its columns, so we may merely switch {\tt UPLO}, without bothering with +{\tt TRANS}. + +In the BLAS, there are two separate cases of importance. one dimensional +arrays (storage for vectors) have the same meaning in both C and Fortran 77, +so if we are +solving a linear algebra problem who's answer is a vector, we will need to +solve the same problem for both languages. However, if the answer is a +matrix, in terms of calling routines which use column-major storage from +one using row-major storage, we will want to solve the {\em transpose} +of the problem. + +To get an idea of what this means, consider a contrived example. Say we +have routines for simple matrix-matrix and matrix-vector multiply. The vector +operation is $y \leftarrow A \times x$, and the matrix operation is +$C \leftarrow A \times B$. Now say we are implementing these as calls +from row-major array storage to column-major storage. Since the matrix-vector +multiply's answer is a vector, the problem we are solving is remains the same, +but we must remember that our C array $A$ is a Fortran 77 $A^T$. +On the other hand, the matrix-matrix multiply has a matrix +for a result, so when the differing array storage is taken into account, +the problem we want to solve is $C^T \leftarrow B^T \times A^T$. + +This last example demonstrates another general result. Some level 3 BLAS +contain a {\tt SIDE} parameter, determining which side a matrix is applied +on. In general, if we are solving the transpose of this operation, the side +parameter will be reversed. + +With these general principles, it is possible to show that all that +row-major level 3 BLAS can be expressed in terms of column-major BLAS without +any extra array storage or extra operations. In the level 2 BLAS, no +extra storage or array accesses are required for the real routines. Complex +routines involving the conjugate transpose, however, may require a +$n$-element temporary, and up to $3n$ more operations (vendors may avoid all +extra workspace and operations +by overloading the {\tt TRANS} option for the level 2 BLAS: letting it also +allow conjugation without doing the transpose). +The level 1 BLAS, which deal exclusively with vectors, are unaffected by +this storage issue. + +With these ideas in mind, we will now show how to support a row-major BLAS +on top of a column major BLAS. +This information will be presented in tabular form. +For brevity, row-major storage will be referred to as coming from C (even +though column-major arrays can also come from C), while +column-major storage will be referred to as F77. + +Each table will show a BLAS invocation coming from C, the operation that the +BLAS should perform, the operation required once F77 storage is taken +into account (if this changes), and the call to the appropriate F77 BLAS. +Not every possible +combination of parameters is shown, since many are simply reflections of +another (i.e., when we are applying the {\tt Upper, NoTranspose} becomes +{\tt Lower, Transpose} rule, we will show it for only the upper case. +In order to make the notation more concise, let us define $\overline{x}$ +to be $conj(x)$. + +\subsubsection{Level 2 BLAS} +\subsubsection{GEMV} + +\noindent +{\small +\begin{tabular}{ll} +C call & {\tt cblas\_cgemv(CblasRowMajor, CblasNoTrans, m, n, $\alpha$, A, lda, x, incx, $\beta$, y, incy)}\\ +op & $y \leftarrow \alpha A x + \beta y$\\ +F77 call & {\tt CGEMV('T', n, m, $\alpha$, A, lda, x, incx, $\beta$, y, incy)}\\\\ +% +C call & {\tt cblas\_cgemv(CblasRowMajor, CblasTrans, m, n, $\alpha$, A, lda, x, incx, $\beta$, y, incy)}\\ +op & $y \leftarrow \alpha A^T x + \beta y$\\ +F77 call & {\tt CGEMV('N', n, m, $\alpha$, A, lda, x, incx, $\beta$, y, incy)}\\\\ +% +C call & {\tt cblas\_cgemv(CblasRowMajor, CblasConjTrans, m, n, $\alpha$, A, lda, x, incx, $\beta$, y, incy)}\\ +op & $y \leftarrow \alpha A^H x + \beta y \Rightarrow + \overline{(\overline{y} \leftarrow \overline{\alpha} A^T \overline{x} + + \overline{\beta} \overline{y})}$\\ +F77 call & {\tt CGEMV('N', n, m, $\overline{\alpha}$, A, lda, $\overline{x}$, 1, $\overline{\beta}$, $\overline{y}$, incy)}\\\\ +\end{tabular} +} + +Note that we switch the value of transpose to handle the row/column major +ordering difference. +In the last case, we will require $n$ elements of workspace so that +we may store the conjugated vector $\overline{x}$. Then, we set $y = \overline{y}$, and +make the call. This gives +us the conjugate of the answer, so we once again set $y = \overline{y}$. Therefore, we +see that to support the conjugate transpose, we will need to allocate an $n$-element +vector, and perform $2m+n$ extra operations. + +\subsubsection{SYMV} + +SYMV requires no extra workspace or operations. + +{\small +\begin{tabular}{ll} +C call & {\tt cblas\_csymv(CblasRowMajor, CblasUpper, n, $\alpha$, A, lda, x, incx, $\beta$, y, incy)}\\ +op & $y \leftarrow \alpha A x + \beta y \Rightarrow + y \leftarrow \alpha A^T x + \beta y$\\ +F77 call & {\tt CSYMV('L', n, $\alpha$, A, lda, x, incx, $\beta$, y, incy)}\\\\ +% +%C call & {\tt cblas\_csymv(CblasRowMajor, CblasLower, n, $\alpha$, A, lda, x, incx, $\beta$, y, incy)}\\ +%op & $y \leftarrow \alpha A x + \beta y$\\ +%F77 call & {\tt CSYMV('U', n, $\alpha$, A, lda, x, incx, $\beta$, y, incy)}\\ +\end{tabular} +} + +\subsubsection{HEMV} +HEMV routine requires $3n$ conjugations, and $n$ extra storage. + +{\small +\begin{tabular}{ll} +C call & {\tt cblas\_chemv(CblasRowMajor, CblasUpper, n, $\alpha$, A, lda, x, incx, $\beta$, y, incy)}\\ +op & $y \leftarrow \alpha A x + \beta y \Rightarrow + y \leftarrow \alpha A^H x + \beta y \Rightarrow + \overline{(\overline{y} \leftarrow \overline{\alpha} A^T \overline{x} + + \overline{\beta} \overline{y})}$\\ +F77 call & {\tt CHEMV('L', n, $\overline{\alpha}$, A, lda, $\overline{x}$, incx, $\overline{\beta}$, $\overline{y}$, incy)}\\\\ +% +%C call & {\tt cblas\_chemv(CblasRowMajor, CblasLower, n, $\alpha$, A, lda, x, incx, $\beta$, y, incy)}\\ +%op & $y \leftarrow \alpha A x + \beta y$\\ +%F77 call & {\tt CHEMV('U', n, $\alpha$, A, lda, x, incx, $\beta$, y, incy)}\\ +\end{tabular} +} + +\subsubsection{TRMV/TRSV} + +\noindent +{ \small +\begin{tabular}{ll} +C call & {\tt cblas\_ctrmv(CblasRowMajor, CblasUpper, CblasNoTrans, diag, n, A, lda, x, incx)}\\ +op & $x \leftarrow A x$\\ +F77 call & {\tt CTRMV('L', 'T', diag, n, A, lda, x, incx)}\\\\ +% +C call & {\tt cblas\_ctrmv(CblasRowMajor, CblasUpper, CblasTrans, diag, n, A, lda, x, incx)}\\ +op & $x \leftarrow A^T x$\\ +F77 call & {\tt CTRMV('L', 'N', diag, n, A, lda, x, incx)}\\\\ +% +C call & {\tt cblas\_ctrmv(CblasRowMajor, CblasUpper, CblasConjTrans, diag, n, A, lda, x, incx)}\\ +op & $x \leftarrow A^H x \Rightarrow \overline{(\overline{x} = A^T \overline{x})}$\\ +F77 call & {\tt CTRMV('L', 'N', diag, n, A, lda, $\overline{x}$, incx)}\\\\ +\end{tabular} +} + +Again, we see that we will need some extra operations when we are handling the +conjugate transpose. We conjugate $x$ before the call, giving us the conjugate +of the answer we seek. We then conjugate this again to return the correct answer. +This routine therefore needs $2n$ extra operations for the complex conjugate case. + +The calls with the C array being {\tt Lower} are merely the reflection of these +calls, and thus are not shown. The analysis for TRMV is the same, since it +involves the same principle of what a transpose of a triangular matrix is. + +\subsubsection{GER/GERU} + +This is our first routine that has a matrix as the solution. Recalling that +this means we solve the transpose of the original problem, we get: + +{ \small +\noindent +\begin{tabular}{ll} +C call & {\tt cblas\_cgeru(CblasRowMajor, m, n, $\alpha$, x, incx, y, incy, A, lda)}\\ +C op & $A \leftarrow \alpha x y^T + A$ \\ +F77 op & $A^T \leftarrow \alpha y x^T +A^T$ \\ +F77 call & {\tt CGERU(n, m, $\alpha$, y, incy, x, incx, A, lda)}\\\\ +\end{tabular} +} + +No extra storage or operations are required. + +\subsubsection{GERC} +{ \small +\begin{tabular}{ll} +C call & {\tt cblas\_cgerc(CblasRowMajor, m, n, $\alpha$, x, incx, y, incy, A, lda)}\\ +C op & $A \leftarrow \alpha x y^H + A$ \\ +F77 op & $A^T \leftarrow \alpha (x y^H)^T + A^T = \alpha \overline{y} x^T + A^T$ \\ +F77 call & {\tt CGERU(n, m, $\alpha$, $\overline{y}$, incy, x, incx, A, lda)}\\\\ +\end{tabular} +} + +Note that we need to allocate $n$-element workspace to hold +the conjugated $y$, and we call {\tt GERU}, not {\tt GERC}. + +\subsubsection{HER} +{ \small +\begin{tabular}{ll} +C call & {\tt cblas\_cher(CblasRowMajor, CblasUpper, n, $\alpha$, x, incx, A, lda)}\\ +C op & $A \leftarrow \alpha x x^H + A$ \\ +F77 op & $A^T \leftarrow \alpha \overline{x} x^T + A^T$ \\ +F77 call & {\tt CHER('L', n, $\alpha$, $\overline{x}$, 1, A, lda)}\\\\ +\end{tabular} +} + +Again, we have an $n$-element workspace and $n$ extra operations. + +\subsubsection{HER2} +{ \small +\begin{tabular}{ll} +C call & {\tt cblas\_cher2(CblasRowMajor, CblasUpper, n, $\alpha$, x, incx, y, incy, A, lda)}\\ +C op & $A \leftarrow \alpha x y^H + y (\alpha x)^H + A$ \\ +F77 op & $A^T \leftarrow \alpha \overline{y} x^T + \overline{\alpha} \overline{x} y^T + A^T = + \alpha \overline{y} (\overline{x})^H + \overline{x} (\alpha \overline{y})^H + A^T$ \\ +F77 call & {\tt CHER2('L', n, $\alpha$, $\overline{y}$, 1, $\overline{x}$, 1, A, lda)}\\\\ +\end{tabular} +} + +So we need $2n$ extra workspace and operations to form the conjugates of $x$ +and $y$. + +\subsubsection{SYR} +{\small +\begin{tabular}{ll} +C call & {\tt cblas\_ssyr(CblasRowMajor, CblasUpper, n, $\alpha$, x, incx, A, lda)}\\ +C op & $A \leftarrow \alpha x x^T + A$ \\ +F77 op & $A^T \leftarrow \alpha x x^T + A^T$ \\ +F77 call & {\tt SSYR('L', n, $\alpha$, x, incx, A, lda)}\\\\ +\end{tabular} +} + +No extra storage or operations required. + +\subsubsection{SYR2} +{\small +\begin{tabular}{ll} +C call & {\tt cblas\_ssyr2(CblasRowMajor, CblasUpper, n, $\alpha$, x, incx, y, incy, A, lda)}\\ +C op & $A \leftarrow \alpha x y^T + \alpha y x^T + A$ \\ +F77 op & $A^T \leftarrow \alpha y x^T + \alpha x y^T + A^T$ \\ +F77 call & {\tt SSYR2('L', n, $\alpha$, y, incy, x, incx, A, lda)}\\\\ +\end{tabular} +} + +No extra storage or operations required. + +\subsubsection{Level 3 BLAS} + +\subsubsection{GEMM} +{\footnotesize +\begin{tabular}{ll} +C call & {\tt cblas\_cgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, $\alpha$, A, lda, B, ldb, $\beta$, C, ldc)}\\ +C op & $C \leftarrow \alpha A B + \beta C$\\ +F77 op & $C^T \leftarrow \alpha B^T A^T + \beta C^T$\\ +F77 call & {\tt CGEMM('N', 'N', n, m, k, $\alpha$, B, ldb, A, lda, $\beta$, C, ldc)}\\\\ +% +C call & {\tt cblas\_cgemm(CblasRowMajor, CblasNoTrans, CblasTrans, m, n, k, $\alpha$, A, lda, B, ldb, $\beta$, C, ldc)}\\ +C op & $C \leftarrow \alpha A B^T + \beta C$\\ +F77 op & $C^T \leftarrow \alpha B A^T + \beta C^T$\\ +F77 call & {\tt CGEMM('T', 'N', n, m, k, $\alpha$, B, ldb, A, lda, $\beta$, C, ldc)}\\\\ +% +C call & {\tt cblas\_cgemm(CblasRowMajor, CblasNoTrans, CblasConjTrans, m, n, k, $\alpha$, A, lda, B, ldb, $\beta$, C, ldc)}\\ +C op & $C \leftarrow \alpha A B^H + \beta C$\\ +F77 op & $C^T \leftarrow \alpha \overline{B} A^T + \beta C^T$\\ +F77 call & {\tt CGEMM('C', 'N', n, m, k, $\alpha$, B, ldb, A, lda, $\beta$, C, ldc)}\\\\ +% +C call & {\tt cblas\_cgemm(CblasRowMajor, CblasTrans, CblasNoTrans, m, n, k, $\alpha$, A, lda, B, ldb, $\beta$, C, ldc)}\\ +C op & $C \leftarrow \alpha A^T B + \beta C$\\ +F77 op & $C^T \leftarrow \alpha B^T A + \beta C^T$\\ +F77 call & {\tt CGEMM('N', 'T', n, m, k, $\alpha$, B, ldb, A, lda, $\beta$, C, ldc)}\\\\ +% +C call & {\tt cblas\_cgemm(CblasRowMajor, CblasTrans, CblasTrans, m, n, k, $\alpha$, A, lda, B, ldb, $\beta$, C, ldc)}\\ +C op & $C \leftarrow \alpha A^T B^T + \beta C$\\ +F77 op & $C^T \leftarrow \alpha B A + \beta C^T$\\ +F77 call & {\tt CGEMM('T', 'T', n, m, k, $\alpha$, B, ldb, A, lda, $\beta$, C, ldc)}\\\\ +% +C call & {\tt cblas\_cgemm(CblasRowMajor, CblasTrans, CblasConjTrans, m, n, k, $\alpha$, A, lda, B, ldb, $\beta$, C, ldc)}\\ +C op & $C \leftarrow \alpha A^T B^H + \beta C$\\ +F77 op & $C^T \leftarrow \alpha \overline{B} A + \beta C^T$\\ +F77 call & {\tt CGEMM('C', 'T', n, m, k, $\alpha$, B, ldb, A, lda, $\beta$, C, ldc)}\\\\ +% +C call & {\tt cblas\_cgemm(CblasRowMajor, CblasConjTrans, CblasNoTrans, m, n, k, $\alpha$, A, lda, B, ldb, $\beta$, C, ldc)}\\ +C op & $C \leftarrow \alpha A^H B + \beta C$\\ +F77 op & $C^T \leftarrow \alpha B^T \overline{A} + \beta C^T$\\ +F77 call & {\tt CGEMM('N', 'C', n, m, k, $\alpha$, B, ldb, A, lda, $\beta$, C, ldc)}\\\\ +% +C call & {\tt cblas\_cgemm(CblasRowMajor, CblasConjTrans, CblasTrans, m, n, k, $\alpha$, A, lda, B, ldb, $\beta$, C, ldc)}\\ +C op & $C \leftarrow \alpha A^H B^T + \beta C$\\ +F77 op & $C^T \leftarrow \alpha B \overline{A} + \beta C^T$\\ +F77 call & {\tt CGEMM('T', 'C', n, m, k, $\alpha$, B, ldb, A, lda, $\beta$, C, ldc)}\\\\ +% +C call & {\tt cblas\_cgemm(CblasRowMajor, CblasConjTrans, CblasConjTrans, m, n, k, $\alpha$, A, lda, B, ldb, $\beta$, C, ldc)}\\ +C op & $C \leftarrow \alpha A^H B^H + \beta C$\\ +F77 op & $C^T \leftarrow \alpha \overline{B} \overline{A} + \beta C^T$\\ +F77 call & {\tt CGEMM('C', 'C', n, m, k, $\alpha$, B, ldb, A, lda, $\beta$, C, ldc)}\\\\ +\end{tabular} +} + +\subsubsection{SYMM/HEMM} +{\small +\begin{tabular}{ll} +C call & {\tt cblas\_chemm(CblasRowMajor, CblasLeft, CblasUpper, m, n, $\alpha$, A, lda, B, ldb, $\beta$, C, ldc)}\\ +C op & $C \leftarrow \alpha A B + \beta C$\\ +F77 op & $C^T \leftarrow \alpha B^T A^T + \beta C^T$\\ +F77 call & {\tt CHEMM('R', 'L', n, m, $\alpha$, A, lda, B, ldb, $\beta$, C, ldc)}\\\\ +% +C call & {\tt cblas\_chemm(CblasRowMajor, CblasRight, CblasUpper, m, n, $\alpha$, A, lda, B, ldb, $\beta$, C, ldc)}\\ +C op & $C \leftarrow \alpha B A + \beta C$\\ +F77 op & $C^T \leftarrow \alpha A^T B^T + \beta C^T$\\ +F77 call & {\tt CHEMM('L', 'L', n, m, $\alpha$, A, lda, B, ldb, $\beta$, C, ldc)}\\\\ +\end{tabular} +} + +\subsubsection{SYRK} +{\small +\begin{tabular}{ll} +C call & {\tt cblas\_csyrk(CblasRowMajor, CblasUpper, CblasNoTrans, n, k, $\alpha$, A, lda, $\beta$, C, ldc)}\\ +C op & $C \leftarrow \alpha A A^T + \beta C$\\ +F77 op & $C^T \leftarrow \alpha A A^T + \beta C^T$\\ +F77 call & {\tt CSYRK('L', 'T', n, k, $\alpha$, A, lda, B, ldb, $\beta$, C, ldc)}\\\\ +% +C call & {\tt cblas\_csyrk(CblasRowMajor, CblasUpper, CblasTrans, n, k, $\alpha$, A, lda, $\beta$, C, ldc)}\\ +C op & $C \leftarrow \alpha A^T A + \beta C$\\ +F77 op & $C^T \leftarrow \alpha A^T A + \beta C^T$\\ +F77 call & {\tt CSYRK('L', 'N', n, k, $\alpha$, A, lda, B, ldb, $\beta$, C, ldc)}\\\\ +\end{tabular} +} + +In reading the above descriptions, it is important to remember a few things. +First, the symmetric matrix is $C$, and thus we change {\tt UPLO} to accommodate +the differing storage of $C$. {\tt TRANSPOSE} is then varied to handle the +storage effects on $A$. + +\subsubsection{HERK} +{\small +\begin{tabular}{ll} +C call & {\tt cblas\_cherk(CblasRowMajor, CblasUpper, CblasNoTrans, n, k, $\alpha$, A, lda, $\beta$, C, ldc)}\\ +C op & $C \leftarrow \alpha A A^H + \beta C$\\ +F77 op & $C^T \leftarrow \alpha \overline{A} A^T + \beta C^T$\\ +F77 call & {\tt CHERK('L', 'C', n, k, $\alpha$, A, lda, B, ldb, $\beta$, C, ldc)}\\\\ +% +C call & {\tt cblas\_cherk(CblasRowMajor, CblasUpper, CblasConjTrans, n, k, $\alpha$, A, lda, $\beta$, C, ldc)}\\ +C op & $C \leftarrow \alpha A^H A + \beta C$\\ +F77 op & $C^T \leftarrow \alpha A^T \overline{A} + \beta C^T$\\ +F77 call & {\tt CHERK('L', 'N', n, k, $\alpha$, A, lda, B, ldb, $\beta$, C, ldc)}\\\\ +\end{tabular} +} + +\subsubsection{SYR2K} +{ \footnotesize +\begin{tabular}{ll} +C call & {\tt cblas\_csyr2k(CblasRowMajor, CblasUpper, CblasNoTrans, n, k, $\alpha$, A, lda, B, ldb, $\beta$, C, ldc)}\\ +C op & $C \leftarrow \alpha A B^T + \alpha B A^T + \beta C$\\ +F77 op & $C^T \leftarrow \alpha B A^T + \alpha A B^T + \beta C^T = + \alpha A B^T + \alpha B A^T + \beta C^T$\\ +F77 call & {\tt CSYR2K('L', 'T', n, k, $\alpha$, A, lda, B, ldb, $\beta$, C, ldc)}\\\\ +% +C call & {\tt cblas\_csyr2k(CblasRowMajor, CblasUpper, CblasTrans, n, k, $\alpha$, A, lda, B, ldb, $\beta$, C, ldc)}\\ +C op & $C \leftarrow \alpha A^T B + \alpha B^T A + \beta C$\\ +F77 op & $C^T \leftarrow \alpha B^T A + \alpha A^T B + \beta C^T = + \alpha A^T B + \alpha B^T A + \beta C^T$\\ +F77 call & {\tt CSYR2K('L', 'N', n, k, $\alpha$, A, lda, B, ldb, $\beta$, C, ldc)}\\\\ +\end{tabular} +} + +Note that we once again wind up with an operation that looks the same from C and +Fortran 77, saving that the C operations wishes to form $C^T$, instead of $C$. +So once again we flip the setting of {\tt UPLO} to handle the difference in the +storage of $C$. We then flip the setting of {\tt TRANS} to handle the storage +effects for $A$ and $B$. + +\subsubsection{HER2K} +{\footnotesize +\begin{tabular}{ll} +C call & {\tt cblas\_cher2k(CblasRowMajor, CblasUpper, CblasNoTrans, n, k, $\alpha$, A, lda, B, ldb, $\beta$, C, ldc)}\\ +C op & $C \leftarrow \alpha A B^H + \overline{\alpha} B A^H + \beta C$\\ +F77 op & $C^T \leftarrow \alpha \overline{B} A^T + \overline{\alpha} \overline{A} B^T + \beta C^T = + \overline{\alpha} \overline{A} B^T + \alpha \overline{B} A^T + \beta C^T$\\ +F77 call & {\tt CHER2K('L', 'C', n, k, $\overline{\alpha}$, A, lda, B, ldb, $\beta$, C, ldc)}\\\\ +% +C call & {\tt cblas\_cher2k(CblasRowMajor, CblasUpper, CblasConjTrans, n, k, $\alpha$, A, lda, B, ldb, $\beta$, C, ldc)}\\ +C op & $C \leftarrow \alpha A^H B + \overline{\alpha} B^H A + \beta C$\\ +F77 op & $C^T \leftarrow \alpha B^T \overline{A} + \overline{\alpha} A^T \overline{B} + \beta C^T = + \overline{\alpha} A^T \overline{B} + \alpha B^T \overline{A} + \beta C^T$\\ +F77 call & {\tt CHER2K('L', 'N', n, k, $\overline{\alpha}$, A, lda, B, ldb, $\beta$, C, ldc)}\\\\ +\end{tabular} +} + +\subsubsection{TRMM/TRSM} + +Because of their identical use of the {\tt SIDE}, {\tt UPLO}, and {\tt TRANSA} +parameters, TRMM and TRSM share the same general analysis. +Remember that A is a triangular matrix, and thus when we handle its storage by +flipping {\tt UPLO}, we implicitly change its {\tt TRANS} setting as well. +With this in mind, we have: + +{\footnotesize +\noindent +\begin{tabular}{ll} +C call & {\tt cblas\_ctrmm(CblasRowMajor, CblasLeft, CblasUpper, CblasNoTrans, diag, m, n, $\alpha$, A, lda, B, ldb)}\\ +C op & $B \leftarrow \alpha A B$\\ +F77 op & $B^T \leftarrow \alpha B^T A^T$\\ +F77 call & {\tt CTRMM('R', 'L', 'N', diag, n, m, $\alpha$, A, lda, B, ldb)}\\\\ +% +C call & {\tt cblas\_ctrmm(CblasRowMajor, CblasLeft, CblasUpper, CblasTrans, diag, m, n, $\alpha$, A, lda, B, ldb)}\\ +C op & $B \leftarrow \alpha A^T B$\\ +F77 op & $B^T \leftarrow \alpha B^T A$\\ +F77 call & {\tt CTRMM('R', 'L', 'T', diag, n, m, $\alpha$, A, lda, B, ldb)}\\\\ +% +C call & {\tt cblas\_ctrmm(CblasRowMajor, CblasLeft, CblasUpper, CblasConjTrans, diag, m, n, $\alpha$, A, lda, B, ldb)}\\ +C op & $B \leftarrow \alpha A^H B$\\ +F77 op & $B^T \leftarrow \alpha B^T \overline{A}$\\ +F77 call & {\tt CTRMM('R', 'L', 'C', diag, n, m, $\alpha$, A, lda, B, ldb)}\\\\ +\end{tabular} +} + +\subsubsection{Banded routines}\label{cint:banded} +The above techniques can be used for the banded routines only if a C (row-major) +banded array has some sort of meaning when expanded as a Fortran banded +array. It turns out that when this is done, you get the transpose of +the C array, just as in the dense case. + +In Fortran 77, the banded array is an array whose rows correspond to +the diagonals of the matrix, and whose columns contain the selected portion +of the matrix column. To rephrase this, the diagonals of the matrix are +stored in strided storage, and the relevant pieces of the columns of the +matrix are stored in contiguous memory. This makes sense: in a column-based +algorithm, you will want your columns to be contiguous for efficiency +reasons. + +In order to ensure our columns are contiguous, we will structure the banded +array as shown below. Notice that the first $K_U$ rows +of the array store the superdiagonals, appropriately spaced to line up +correctly in the column direction with the main diagonal. The last $K_L$ +rows contain the subdiagonals. + +{\samepage +\begin{verbatim} + ------ Super diagonal KU + ----------- Super diagonal 2 + ------------ Super diagonal 1 +------------- main diagonal (D) +------------ Sub diagonal 1 +----------- Sub diagonal 2 +------ Sub diagonal KL +\end{verbatim} +} + +If we have a row-major storage, and thus a row-oriented algorithm, we will +similarly want our rows to be contiguous in order to ensure efficiency. +The storage scheme that is thus dictated is shown below. Notice +that the first $K_L$ columns store the subdiagonals, appropriately padded +to line up with the main diagonal along rows. + +{\samepage +\begin{verbatim} + KL D KU + | | | | + | | | | | + | | | | | | +| | | | | | +| | | | | +| | | | +\end{verbatim} +} + +Now, let us contrast these two storage schemes. Both store +the diagonals of the matrix along the non-contiguous dimension of the matrix. +The column-major banded array stores the matrix columns along the contiguous +dimension, whereas the row-major banded array stores the matrix rows along the +contiguous storage. + +This gives us our first hint as to what to do: rows stored where columns +should be, indicated, in the dense routines, that we needed to set a +transpose parameter. We will see that we can do this for the banded routines +as well. + +We can further note that in the column-major banded array, the first part of the +non-contiguous dimension (i.e. the first rows) store superdiagonals, whereas +the first part of the non-contiguous dimension of row-major arrays (i.e., the +first columns) store the subdiagonals. + +We now note that when you transpose a matrix, the superdiagonals of the matrix +become the subdiagonals of the matrix transpose (and vice versa). + +Along the contiguous dimension, we note that we skip $K_U$ elements before +coming to our first entry in a column-major banded array. The same happens +in our row-major banded array, except that the skipping factor is $K_L$. + +All this leads to the idea that when we have a row-major banded array, we can +consider it as a transpose of the Fortran 77 column-major banded array, where +we will swap not only $m$ and $n$, but also $K_U$ and $K_L$. An example should +help demonstrate this principle. Let us say we have the matrix +$ +A = \left [ +\begin{array}{rrrr} +1 & 3 & 5 & 7\\ +2 & 4 & 6 & 8 +\end{array} +\right ] +$ + +If we express this entire array in banded form (a fairly dumb thing to do, +but good for example purposes), we get +$K_U = 3$, $K_L = 1$. In row-major banded storage this becomes: +$ +C_b = \left [ +\begin{array}{rrrrr} +X & 1 & 3 & 5 & 7\\ +2 & 4 & 6 & 8 & X +\end{array} +\right ] $ + +So, we believe this should be the transpose if interpreted as a Fortran 77 +banded array. The matrix transpose, and its Fortran 77 banded storage is shown +below: + +$A^T = \left [ +\begin{array}{rr} +1 & 2\\ +3 & 4\\ +5 & 6\\ +7 & 8 +\end{array} +\right ] \Rightarrow +F_b = \left [ +\begin{array}{rr} +X & 2\\ +1 & 4\\ +3 & 6\\ +5 & 8\\ +7 & X +\end{array} +\right ]$ + +Now we simply note that since $C_b$ is row major, and $F_b$ is column-major, +they are actually the same array in memory. + +With the idea that row-major banded matrices produce the transpose of the matrix +when interpreted as column-major banded matrices, we can use the same analysis +for the banded BLAS as we used for the dense BLAS, noting that we must also +always swap $K_U$ and $K_L$. + +\subsubsection{Packed routines} +Packed routines are much simpler than banded. Here we have a triangular, +symmetric or Hermitian matrix which is packed so that only the relevant triangle +is stored. Thus if we have an upper triangular matrix stored in column-major +packed storage, the first element holds the relevant portion of the first column +of the matrix, the next two elements hold the relevant portion of the second +column, etc. + +With an upper triangular matrix stored in row-major packed storage, the first +$N$ elements hold the first row of the matrix, the next $N-1$ elements hold +the next row, etc. + +Thus we see in the Hermitian and symmetric cases, to get a row-major packed +array correctly interpreted by Fortran 77, we will simply switch the setting +of {\tt UPLO}. This will mean that the rows of the matrix will be read in as the +columns, but this is not a problem, as we have seen before. In the symmetric case, +since $A = A^T$ the column and rows are the same, so there is obviously no +problem. In the Hermitian case, we must be sure that the imaginary component +of the diagonal is not used, and it assumed to be zero. However, the diagonal +element in a row when our matrix is upper will correspond to the diagonal +element in a column when our matrix is called lower, so this is handled as well. + +In the triangular cases, we will need to change both {\tt UPLO} and {\tt TRANS}, +just as in the dense routines. + +With these ideas in mind, the analysis for the dense routines may be used +unchanged for packed. +\clearpage +\end{document} --- blas-1.1.orig/debian/patched-docs/cinterface.tex.diff +++ blas-1.1/debian/patched-docs/cinterface.tex.diff @@ -0,0 +1,57 @@ +cd /fix/t1/camm/blas-report/ +diff -c /fix/t1/camm/blas-report/cinterface_new.tex /fix/t1/camm/blas-report/cinterface.tex +*** /fix/t1/camm/blas-report/cinterface_new.tex Tue Jan 13 11:14:10 2004 +--- /fix/t1/camm/blas-report/cinterface.tex Thu Oct 19 09:53:02 2000 +*************** +*** 1,7 **** +- \documentclass{article} +- +- \begin{document} +- + % + % cinterface.tex + % +--- 1,3 ---- +*************** +*** 396,402 **** + \begin{verbatim} + double **A, *p; + +! A = malloc(M*sizeof(double *)); + p = malloc(M*N*sizeof(double)); + for (i=0; i < M; i++) A[i] = &p[i*N]; + +--- 392,398 ---- + \begin{verbatim} + double **A, *p; + +! A = malloc(M); + p = malloc(M*N*sizeof(double)); + for (i=0; i < M; i++) A[i] = &p[i*N]; + +*************** +*** 408,414 **** + \begin{verbatim} + double **A, *p; + +! A = malloc(M*sizeof(double *)); + p = malloc(M*N*sizeof(double)); + for (i=0; i < M; i++) A[i] = malloc(N*sizeof(double)); + +--- 404,410 ---- + \begin{verbatim} + double **A, *p; + +! A = malloc(M); + p = malloc(M*N*sizeof(double)); + for (i=0; i < M; i++) A[i] = malloc(N*sizeof(double)); + +*************** +*** 1679,1682 **** + With these ideas in mind, the analysis for the dense routines may be used + unchanged for packed. + \clearpage +- \end{document} +--- 1675,1677 ---- + +Diff finished at Tue Jan 13 11:31:13 --- blas-1.1.orig/debian/control +++ blas-1.1/debian/control @@ -0,0 +1,76 @@ +Source: blas +Priority: optional +Maintainer: Camm Maguire +Standards-Version: 3.6.1 +Build-Depends: g77 , debhelper ( >= 4.1.0 ), tetex-bin +Section: libs + +Package: blas +Architecture: any +Provides: blas2 +Conflicts: blas1 +Replaces: blas1 +Depends: ${shlibs:Depends} +Description: Basic Linear Algebra Subroutines, shared library + BLAS (Basic Linear Algebra Subroutines) is a set of efficient + routines for most of the basic vector and matrix operations. + They are widely used as the basis for other high quality linear + algebra software, for example lapack and linpack. This + implementation is the Fortran 77 reference implementation found + at netlib. + . + This package contains a shared version of the library. + +Package: blas-dev +Architecture: any +Provides: blas2-dev +Section: devel +Conflicts: atlas2-dev (<= 3.0-8), atlas2-headers (<< 3.2.1ln-1) +Replaces: atlas2-dev (<= 3.0-8), atlas2-headers (<< 3.2.1ln-1) +Depends: blas (= ${Source-Version}), libc6-dev, ${g77} +Description: Basic Linear Algebra Subroutines, static library + BLAS (Basic Linear Algebra Subroutines) is a set of efficient + routines for most of the basic vector and matrix operations. + They are widely used as the basis for other high quality linear + algebra software, for example lapack and linpack. This + implementation is the Fortran 77 reference implementation found + at netlib. + . + This package contains a static version of the library. + +Package: blas-test +Architecture: any +Depends: ${shlibs:Depends} +Description: Basic Linear Algebra Subroutines, testing programs + BLAS (Basic Linear Algebra Subroutines) is a set of efficient + routines for most of the basic vector and matrix operations. + They are widely used as the basis for other high quality linear + algebra software, for example lapack and linpack. This + implementation is the Fortran 77 reference implementation found + at netlib. + . + This package contains a set of programs which test the integrity + of an installed blas-compatible shared library. These programs + may therefore be used to test the libraries provided by the + blas package as well as those provided by the atlas packages. + The programs are dynamically linked -- one can explicitly select + a library to test by setting the LD_LIBRARY_PATH or LD_PRELOAD + environment variables. Likewise, one can display the library + selected using the ldd program in an identical environment. + +Package: blas-doc +Architecture: all +Section: doc +Depends: +Replaces: refblas3-doc +Description: Basic Linear Algebra Subroutines, documentation + BLAS (Basic Linear Algebra Subroutines) is a set of efficient + routines for most of the basic vector and matrix operations. + They are widely used as the basis for other high quality linear + algebra software, for example lapack and linpack. This + implementation is the Fortran 77 reference implementation found + at netlib. + . + This package contains manual pages for the routines, and other + supporting documentation + --- blas-1.1.orig/debian/shlibs.local +++ blas-1.1/debian/shlibs.local @@ -0,0 +1 @@ +libblas 2 atlas2-base | blas | blas2 --- blas-1.1.orig/debian/changelog +++ blas-1.1/debian/changelog @@ -0,0 +1,222 @@ +blas (1.1-14) unstable; urgency=high + + * Bug fix: "blas-doc: Cannot be installed together with refblas3-doc", + thanks to Erik Schnetter (Closes: #292695). blas-doc Replaces: with + refblas3-doc. + * Bug fix: "Why does blas-dev depend on g77?", thanks to Philipp + Frauenfelder (Closes: #262029). Replace g77 dependency in blas-dev + with libg2c0-dev, which is needed to resolve certain symbols when + linking against blas. cf. bug #113086. + * Standards 3.6.1 + * Fix section in .TH header of man pages. + * Fix whatis entry in dcabs1.l + * Rename development alternatives libblas2.so -> libblas-2.so, as with + refblas3-dev, to avoid ldconfig precedence. + + -- Camm Maguire Tue, 22 Feb 2005 22:41:44 +0000 + +blas (1.1-13) unstable; urgency=low + + * Bug fix: "blas-doc: mistake in cinterface.pdf", thanks to Jochen Voss + (Closes: #227346). Corrected mistake in source to cinterface.pdf + found in the blas-report.tgz file on upstream website, recompiled, and + modified rules to overwrite with the new version. + * debhelper compat level 4 + * -O6 -> -O3 + * No special g77-3.3/sparc rules + * Remove hppa patch, no longer necessary + + -- Camm Maguire Tue, 13 Jan 2004 17:06:24 +0000 +blas (1.1-12) unstable; urgency=low + + * Build dep on g77 for all but sparc, and g77-3.3 for sparc, Closes: + #199436 + * Declare cblas functions extern C for c++ use, Closes: #200800 + + -- Camm Maguire Tue, 14 Oct 2003 20:06:02 +0000 + +blas (1.1-11) unstable; urgency=low + + * Declare cabs1/dcabs1 volatile to ensure proper precision comparisons + in icamax/izamax, Closes: #185838 + * removed dcabs1 manpage - not part of api + * Added NAME fields to zscal, cscal, and zrotg manpages + * versioned Build-depend on debhelper + * standards update to 3.5.9 + + -- Camm Maguire Sat, 19 Apr 2003 14:37:36 +0000 + +blas (1.1-10) unstable; urgency=low + + * ${g77} added to Depends of blas-dev, Closes: #113086 + + -- Camm Maguire Mon, 28 Oct 2002 18:53:44 -0500 + +blas (1.1-9) unstable; urgency=high + + * blas-dev section is devel + * libblas2.so alternative support + + -- Camm Maguire Sun, 31 Mar 2002 16:26:09 -0500 + +blas (1.1-8) unstable; urgency=high + + * Arch dependent patch mechanism + * Omit error exit tests at package build time from hppa due to loader + bug on this platorm + * README.Debian about the hppa loader bug preventing successful tests + of 'error exits' + + -- Camm Maguire Thu, 28 Mar 2002 00:26:15 -0500 + +blas (1.1-7) unstable; urgency=low + + * CC variable in rules -> GCC + * Changed wrapper prototypes in cblas/testing/c_{s,c}blas1.c to return + double where function originally returned float. This is apparently + necessary for C routines to make them correctly callable from g77 + fortran (as alluded to in g77 info manual) + + -- Camm Maguire Fri, 1 Mar 2002 16:02:56 -0500 + +blas (1.1-6) unstable; urgency=low + + * Conflicts/Replaces on versioned atlas2-headers, Closes: #135610 + + -- Camm Maguire Sun, 24 Feb 2002 22:48:07 -0500 + +blas (1.1-5) unstable; urgency=low + + * g77-3.0 for mips and mipsel + + -- Camm Maguire Wed, 20 Feb 2002 17:07:57 -0500 + +blas (1.1-4) unstable; urgency=low + + * Prepend instead of cloberring LD_LIBRARY_PATH, Closes: #134517 + * Conflicts/replaces for blas-dev on atlas2-dev (<= 3.0-8), + Closes: #134559 + + -- Camm Maguire Mon, 18 Feb 2002 11:41:40 -0500 + +blas (1.1-3) unstable; urgency=low + + * Made directories under Debian correctly, Closes: #134326 + + -- Camm Maguire Sun, 17 Feb 2002 11:09:34 -0500 + +blas (1.1-2) unstable; urgency=low + + * ORed construction in shlibs and shlibs.local: libblas 2 + atlas2-base | blas | blas2 + + -- Camm Maguire Fri, 15 Feb 2002 23:43:34 -0500 + +blas (1.1-1) unstable; urgency=low + + * New maintainer + * Renamed package to blas1 to blas + * blas2-dev virtual package + * Incorporated cblas into libblas + * Added tester programs in blas-test package + * Added documentation from website + * Incorporated manpages, blas-doc now built from blas source + * Documented that this is the ref. implementation, Closes: #103107 + * Short description lines, Closes: #119949, Closes: #131008 + * Accept NMU re: complex abs, Closes: #101635 + * Accept NMU re: faq referencing internet image, convert to png, + Closes: #65259 + * Short description lines, Closes: #130939 + * Short description lines, Closes: #130973 + * Removing xerbla and lsame manpages from lapack-doc, Closes: #61236 + + -- Camm Maguire Wed, 13 Feb 2002 13:51:44 -0500 + +blas (1.0-9) unstable; urgency=low + + * NMU, with maintainer permission + * Revert to gcc for final shared link, clearing lintian error, and + enabling programs linking against the blas to refrain from expecting + libg2c functions in blas equivalents, such as those provided by + atlas. + * Add z_abs.f and c_abs.f, as well as change crotg.f and zrotg.f to + use them, to make the above possible + + -- Camm Maguire Tue, 21 Aug 2001 14:13:02 -0400 + +blas (1.0-8) unstable; urgency=low + + * NMU, with maintainer permission + * Put virtual package blas2 in dependency field of shlibs file + + -- Camm Maguire Fri, 20 Jul 2001 15:36:34 -0400 + +blas (1.0-7) unstable; urgency=low + + * NMU, with permission from maintainer + * Link with g77 instead of gcc to compile in c_abs et. al. fortran + intrinsics, Closes: #101635 + * Support for blas2 virtual package + * Removed images from faq.html, Closes: #65259 + * Added Section: devel to source section of control, clearing lintian + error + * Todo: linking with g77 puts in static non-fPIC code into shared + library, an unavoidable lintian error + + -- Camm Maguire Tue, 10 Jul 2001 23:02:31 -0400 + +blas (1.0-6) unstable; urgency=low + + * Found right compile options so no longer needs f2c on the alpha + * Changes relating to above should closes: #92289 + + -- James A. Treacy Fri, 30 Mar 2001 11:54:20 -0400 + +blas (1.0-5) unstable; urgency=low + + * Fixed Build-Depends line: closes: #92062 + + -- James A. Treacy Thu, 29 Mar 2001 11:54:20 -0400 + +blas (1.0-4) unstable; urgency=low + + * Grrrr. replaced 'alpha' with 'any' in control file + * Add f2c as a dependency for the alpha. closes: #71516, #72356 + * Added Build-Depends line + + -- James A. Treacy Mon, 26 Mar 2001 11:54:20 -0400 + +blas (1.0-3.2) frozen; urgency=low + + * Non-maintainer upload, replaced 'any' with 'alpha' in control file + + -- Camm Maguire Wed, 12 Jan 2000 15:51:47 +0100 + +blas (1.0-3.1) frozen; urgency=low + + * Non-maintainer upload, enables compilation on alpha with g77 2.95.2. + Fortan routines calling cabs1 or abs1 must be compiled with + f2c and then gcc with this (current) version of the compiler. + + -- Camm Maguire Wed, 12 Jan 2000 15:51:47 +0100 + +blas (1.0-3) unstable; urgency=low + + * set the lib version to 2 so current apps don't break + + -- James A. Treacy Fri, 29 Oct 1999 11:54:20 -0400 + +blas (1.0-2) unstable; urgency=low + + * Corrected shlibs file. Fixes Bug #48464 + * Conflicts with old lapack packages. Fixes Bug #46556 + + -- James A. Treacy Wed, 27 Oct 1999 12:19:55 -0400 + +blas (1.0-1) unstable; urgency=low + + * Initial Release. This package separates BLAS from the lapack package. + + -- James A. Treacy Tue, 28 Sep 1999 15:48:36 -0400 + + --- blas-1.1.orig/debian/hppa/patch.old +++ blas-1.1/debian/hppa/patch.old @@ -0,0 +1,168 @@ +diff -ruN blas-1.1.ori/cblas/testing/cin2 blas-1.1/cblas/testing/cin2 +--- blas-1.1.ori/cblas/testing/cin2 Sat Mar 13 03:53:23 1999 ++++ blas-1.1/cblas/testing/cin2 Thu Mar 28 04:46:43 2002 +@@ -2,7 +2,7 @@ + -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) + F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. + F LOGICAL FLAG, T TO STOP ON FAILURES. +-T LOGICAL FLAG, T TO TEST ERROR EXITS. ++F LOGICAL FLAG, T TO TEST ERROR EXITS. + 2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED) + 16.0 THRESHOLD VALUE OF TEST RATIO + 6 NUMBER OF VALUES OF N +diff -ruN blas-1.1.ori/cblas/testing/cin3 blas-1.1/cblas/testing/cin3 +--- blas-1.1.ori/cblas/testing/cin3 Sat Mar 13 03:53:24 1999 ++++ blas-1.1/cblas/testing/cin3 Thu Mar 28 04:47:03 2002 +@@ -2,7 +2,7 @@ + -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) + F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. + F LOGICAL FLAG, T TO STOP ON FAILURES. +-T LOGICAL FLAG, T TO TEST ERROR EXITS. ++F LOGICAL FLAG, T TO TEST ERROR EXITS. + 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH + 16.0 THRESHOLD VALUE OF TEST RATIO + 6 NUMBER OF VALUES OF N +diff -ruN blas-1.1.ori/cblas/testing/din2 blas-1.1/cblas/testing/din2 +--- blas-1.1.ori/cblas/testing/din2 Sat Mar 13 03:53:29 1999 ++++ blas-1.1/cblas/testing/din2 Thu Mar 28 04:46:05 2002 +@@ -2,7 +2,7 @@ + -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) + F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. + F LOGICAL FLAG, T TO STOP ON FAILURES. +-T LOGICAL FLAG, T TO TEST ERROR EXITS. ++F LOGICAL FLAG, T TO TEST ERROR EXITS. + 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH + 16.0 THRESHOLD VALUE OF TEST RATIO + 6 NUMBER OF VALUES OF N +diff -ruN blas-1.1.ori/cblas/testing/din3 blas-1.1/cblas/testing/din3 +--- blas-1.1.ori/cblas/testing/din3 Sat Mar 13 03:53:30 1999 ++++ blas-1.1/cblas/testing/din3 Thu Mar 28 04:46:25 2002 +@@ -2,7 +2,7 @@ + -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) + F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. + F LOGICAL FLAG, T TO STOP ON FAILURES. +-T LOGICAL FLAG, T TO TEST ERROR EXITS. ++F LOGICAL FLAG, T TO TEST ERROR EXITS. + 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH + 16.0 THRESHOLD VALUE OF TEST RATIO + 6 NUMBER OF VALUES OF N +diff -ruN blas-1.1.ori/cblas/testing/sin2 blas-1.1/cblas/testing/sin2 +--- blas-1.1.ori/cblas/testing/sin2 Sat Mar 13 03:53:36 1999 ++++ blas-1.1/cblas/testing/sin2 Thu Mar 28 04:44:06 2002 +@@ -2,7 +2,7 @@ + -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) + F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. + F LOGICAL FLAG, T TO STOP ON FAILURES. +-T LOGICAL FLAG, T TO TEST ERROR EXITS. ++F LOGICAL FLAG, T TO TEST ERROR EXITS. + 2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED) + 16.0 THRESHOLD VALUE OF TEST RATIO + 6 NUMBER OF VALUES OF N +diff -ruN blas-1.1.ori/cblas/testing/sin3 blas-1.1/cblas/testing/sin3 +--- blas-1.1.ori/cblas/testing/sin3 Sat Mar 13 03:53:37 1999 ++++ blas-1.1/cblas/testing/sin3 Thu Mar 28 04:45:27 2002 +@@ -2,7 +2,7 @@ + -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) + F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. + F LOGICAL FLAG, T TO STOP ON FAILURES. +-T LOGICAL FLAG, T TO TEST ERROR EXITS. ++F LOGICAL FLAG, T TO TEST ERROR EXITS. + 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH + 16.0 THRESHOLD VALUE OF TEST RATIO + 6 NUMBER OF VALUES OF N +diff -ruN blas-1.1.ori/cblas/testing/zin2 blas-1.1/cblas/testing/zin2 +--- blas-1.1.ori/cblas/testing/zin2 Sat Mar 13 03:53:43 1999 ++++ blas-1.1/cblas/testing/zin2 Thu Mar 28 04:47:24 2002 +@@ -2,7 +2,7 @@ + -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) + F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. + F LOGICAL FLAG, T TO STOP ON FAILURES. +-T LOGICAL FLAG, T TO TEST ERROR EXITS. ++F LOGICAL FLAG, T TO TEST ERROR EXITS. + 2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED) + 16.0 THRESHOLD VALUE OF TEST RATIO + 6 NUMBER OF VALUES OF N +diff -ruN blas-1.1.ori/cblas/testing/zin3 blas-1.1/cblas/testing/zin3 +--- blas-1.1.ori/cblas/testing/zin3 Sat Mar 13 03:53:44 1999 ++++ blas-1.1/cblas/testing/zin3 Thu Mar 28 04:51:25 2002 +@@ -2,7 +2,7 @@ + -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) + F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. + F LOGICAL FLAG, T TO STOP ON FAILURES. +-T LOGICAL FLAG, T TO TEST ERROR EXITS. ++F LOGICAL FLAG, T TO TEST ERROR EXITS. + 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH + 16.0 THRESHOLD VALUE OF TEST RATIO + 6 NUMBER OF VALUES OF N +diff -ruN blas-1.1.ori/test/cblat2d blas-1.1/test/cblat2d +--- blas-1.1.ori/test/cblat2d Thu Apr 26 05:00:00 1990 ++++ blas-1.1/test/cblat2d Thu Mar 28 04:54:22 2002 +@@ -4,7 +4,7 @@ + -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) + F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. + F LOGICAL FLAG, T TO STOP ON FAILURES. +-T LOGICAL FLAG, T TO TEST ERROR EXITS. ++F LOGICAL FLAG, T TO TEST ERROR EXITS. + 16.0 THRESHOLD VALUE OF TEST RATIO + 6 NUMBER OF VALUES OF N + 0 1 2 3 5 9 VALUES OF N +diff -ruN blas-1.1.ori/test/dblat2d blas-1.1/test/dblat2d +--- blas-1.1.ori/test/dblat2d Thu Apr 26 05:00:00 1990 ++++ blas-1.1/test/dblat2d Thu Mar 28 04:54:22 2002 +@@ -4,7 +4,7 @@ + -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) + F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. + F LOGICAL FLAG, T TO STOP ON FAILURES. +-T LOGICAL FLAG, T TO TEST ERROR EXITS. ++F LOGICAL FLAG, T TO TEST ERROR EXITS. + 16.0 THRESHOLD VALUE OF TEST RATIO + 6 NUMBER OF VALUES OF N + 0 1 2 3 5 9 VALUES OF N +diff -ruN blas-1.1.ori/test/dblat3d blas-1.1/test/dblat3d +--- blas-1.1.ori/test/dblat3d Thu Apr 26 05:00:00 1990 ++++ blas-1.1/test/dblat3d Thu Mar 28 04:54:23 2002 +@@ -4,7 +4,7 @@ + -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) + F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. + F LOGICAL FLAG, T TO STOP ON FAILURES. +-T LOGICAL FLAG, T TO TEST ERROR EXITS. ++F LOGICAL FLAG, T TO TEST ERROR EXITS. + 16.0 THRESHOLD VALUE OF TEST RATIO + 6 NUMBER OF VALUES OF N + 0 1 2 3 5 9 VALUES OF N +diff -ruN blas-1.1.ori/test/sblat2d blas-1.1/test/sblat2d +--- blas-1.1.ori/test/sblat2d Thu Apr 26 05:00:00 1990 ++++ blas-1.1/test/sblat2d Thu Mar 28 04:54:23 2002 +@@ -4,7 +4,7 @@ + -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) + F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. + F LOGICAL FLAG, T TO STOP ON FAILURES. +-T LOGICAL FLAG, T TO TEST ERROR EXITS. ++F LOGICAL FLAG, T TO TEST ERROR EXITS. + 16.0 THRESHOLD VALUE OF TEST RATIO + 6 NUMBER OF VALUES OF N + 0 1 2 3 5 9 VALUES OF N +diff -ruN blas-1.1.ori/test/sblat3d blas-1.1/test/sblat3d +--- blas-1.1.ori/test/sblat3d Thu Apr 26 05:00:00 1990 ++++ blas-1.1/test/sblat3d Thu Mar 28 04:54:23 2002 +@@ -4,7 +4,7 @@ + -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) + F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. + F LOGICAL FLAG, T TO STOP ON FAILURES. +-T LOGICAL FLAG, T TO TEST ERROR EXITS. ++F LOGICAL FLAG, T TO TEST ERROR EXITS. + 16.0 THRESHOLD VALUE OF TEST RATIO + 6 NUMBER OF VALUES OF N + 0 1 2 3 5 9 VALUES OF N +diff -ruN blas-1.1.ori/test/zblat2d blas-1.1/test/zblat2d +--- blas-1.1.ori/test/zblat2d Thu Apr 26 05:00:00 1990 ++++ blas-1.1/test/zblat2d Thu Mar 28 04:54:23 2002 +@@ -4,7 +4,7 @@ + -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) + F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. + F LOGICAL FLAG, T TO STOP ON FAILURES. +-T LOGICAL FLAG, T TO TEST ERROR EXITS. ++F LOGICAL FLAG, T TO TEST ERROR EXITS. + 16.0 THRESHOLD VALUE OF TEST RATIO + 6 NUMBER OF VALUES OF N + 0 1 2 3 5 9 VALUES OF N --- blas-1.1.orig/debian/copyright +++ blas-1.1/debian/copyright @@ -0,0 +1,8 @@ +This package was debianized by James A. Treacy on +Tue, 28 Sep 1999 15:48:36 -0400. + +It was downloaded from ftp://ftp.netlib.org/blas/blas.tgz + +Copyright: + +This software is in the public domain --- blas-1.1.orig/debian/blas.files +++ blas-1.1/debian/blas.files @@ -0,0 +1 @@ +usr/lib/*.so.* --- blas-1.1.orig/debian/README.Debian +++ blas-1.1/debian/README.Debian @@ -0,0 +1,51 @@ +BLAS for Debian +---------------------- + +This is the Fortran 77 reference implementation of the BLAS (Basic +Linear Algebra Subroutines) standard found at netlib. The C interface +has been incorporated into the library as well -- see +/usr/share/doc/blas-doc/cinterface.pdf. A few minor changes were made +to the crotg.f and zrotg.f files to use a separate complex absolute +value (c_abs.f and z_abs.f) in place of the Fortran intrinsic. This +is because at the time of this writing, g77 does not supply a PIC +version of libg2c. + +Patches to the tester programs made in the course of development of +atlas have also been incorporated. By in large, the purpose of the +patches is to allow correct error reporting in case of failures in the +tests. + +The testers are dynamically linked, and can so be used to test any +shared blas-compatible library, such as that provided by the atlas +packages. For example, + +LD_LIBRARY_PATH=/usr/lib ldd /usr/lib/blas/xsblat2 + (should show /usr/lib/libblas.so.2) + +LD_LIBRARY_PATH=/usr/lib /usr/lib/blas/xsblat2 + < /usr/share/doc/blas-doc/examples/sblat2d + +(apt-get install atlas2-base) + +LD_LIBRARY_PATH=/usr/lib/atlas ldd /usr/lib/blas/xsblat2 + (should show /usr/lib/atlas/libblas.so.2) + +LD_LIBRARY_PATH=/usr/lib/atlas /usr/lib/blas/xsblat2 + < /usr/share/doc/blas-doc/examples/sblat2d + +When installing the atlas packages in conjunction with this blas +package, your system should be setup to use the fastest blas library +your system can run by default, i.e. in the absence of any +LD_LIBRARY_PATH environment variable. + + +Architecture Specific Notes: +--------------------------- + +Due to a current loader bug on hppa, the tests on the 'error exits' of +the blas routines has been removed from the suite of tests performed +at compile time, and from the sample tester input files supplied in +the blas-test package. + + -- Camm Maguire , 20020213 + --- blas-1.1.orig/debian/postinst +++ blas-1.1/debian/postinst @@ -0,0 +1,12 @@ +#!/bin/sh + +set -e + +if [ "$1" = "configure" ]; then + ldconfig +fi + +# dh_installdeb will replace this with shell code automatically +# generated by other debhelper scripts. + +#DEBHELPER# --- blas-1.1.orig/debian/blas-doc.doc-base.faq +++ blas-1.1/debian/blas-doc.doc-base.faq @@ -0,0 +1,11 @@ +Document: blas-doc.faq +Title: Debian Blas FAQ +Author: J. Dongarra, et. al. +Abstract: Frequently asked Questions +Section: devel + +Format: HTML +Index: /usr/share/doc/blas-doc/faq.html +Files: /usr/share/doc/blas-doc/faq.html /usr/share/doc/blas-doc/blue.png + + --- blas-1.1.orig/debian/blas-doc.docs +++ blas-1.1/debian/blas-doc.docs @@ -0,0 +1,6 @@ +doc/cinterface.pdf +doc/faq.html +doc/blue.png +doc/blasqr.ps +doc/blas2-paper.ps +doc/blas3-paper.ps --- blas-1.1.orig/debian/blas-test.files +++ blas-1.1/debian/blas-test.files @@ -0,0 +1 @@ +usr/lib/blas --- blas-1.1.orig/debian/blas-dev.files +++ blas-1.1/debian/blas-dev.files @@ -0,0 +1,2 @@ +usr/lib/*.{so,a} +usr/include --- blas-1.1.orig/debian/xscblat1.1 +++ blas-1.1/debian/xscblat1.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/xsblat1.1 +++ blas-1.1/debian/xsblat1.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/compat +++ blas-1.1/debian/compat @@ -0,0 +1 @@ +4 --- blas-1.1.orig/debian/blas-test.examples +++ blas-1.1/debian/blas-test.examples @@ -0,0 +1,16 @@ +test/cblat2d +test/cblat3d +test/dblat2d +test/dblat3d +test/sblat2d +test/sblat3d +test/zblat2d +test/zblat3d +cblas/testing/cin2 +cblas/testing/cin3 +cblas/testing/din2 +cblas/testing/din3 +cblas/testing/sin2 +cblas/testing/sin3 +cblas/testing/zin2 +cblas/testing/zin3 --- blas-1.1.orig/debian/blas-test.1 +++ blas-1.1/debian/blas-test.1 @@ -0,0 +1,71 @@ +.\" Hey, EMACS: -*- nroff -*- +.\" First parameter, NAME, should be all caps +.\" Second parameter, SECTION, should be 1-8, maybe w/ subsection +.\" other parameters are allowed: see man(7), man(1) +.TH BLAS-TEST 1 "January 29, 2002" netlib "Linux Programmer's Manual" +.\" Please adjust this date whenever revising the manpage. +.\" +.\" Some roff macros, for reference: +.\" .nh disable hyphenation +.\" .hy enable hyphenation +.\" .ad l left justify +.\" .ad b justify to both left and right margins +.\" .nf disable filling +.\" .fi enable filling +.\" .br insert line break +.\" .sp insert n+1 empty lines +.\" for manpage-specific macros, see man(7) +.SH NAME +xcblat1 xcblat3 xdblat2 xsblat1 xsblat3 xzblat2 +xcblat2 xdblat1 xdblat3 xsblat2 xzblat1 xzblat3 +xccblat1 xdcblat1 xscblat1 xzcblat1 +xccblat2 xdcblat2 xscblat2 xzcblat2 +xccblat3 xdcblat3 xscblat3 xzcblat3 +\- testing programs for blas +.SH SYNOPSIS +.B x{s,d,c,z}[c]blat1 +.br +.B x{s,d,c,z}[c]blat{2,3} +.RI < " parameter file" +.SH DESCRIPTION +This manual page documents briefly the blas testing programs. +This manual page was written for the Debian GNU/Linux distribution +because the original programs do not have manual pages. +.PP +.\" TeX users may be more comfortable with the \fB\fP and +.\" \fI\fP escape sequences to invode bold face and italics, +.\" respectively. +.B blas +is a library of Basic Linear Algebra routines, accessed through either +a Fortran and/or a C interface. These routines are categorized into +three levels: level1 referring to vecotr-vector operations, level2 to +matrix-vector operations, and level3 to matrix-matrix operations. In +addition, all routines are written in four "precisions", single +precision real (s), double precision real (d), cingle precision +complex (c), and double precision complex (z). +.br +The testing binaries examine routines in the level +indicated by the last character in the program name and in the +precision indicated by the second character in the program name. The +stem "blat" in the program name refers to a Fortran interface tester, +whereas a "cblat" stem refers to a C interface tester. +.br +The test programs for levels 2 and 3 read a set of parameters +specifying the problem ranges to examine on standard input. Sample +input files are found in /usr/share/doc/blas-test/examples. Files of +the form {s,d,c,z}in{2,3} are input files for the C interface testing +programs, while files or the form {s,d,c,z}blat{2,3}d are input files +to the Fortran testing programs. +.br +On Debian systems, these programs are dynamically linked against the +blas library. This enables the user to test and compare +alternate versions of the library, such as those provided by +atlas, through the use of the LD_LIBRARY_PATH and LD_PRELOAD +environment variables. One can confirm the versions of the libraries +being tested in a given environment with the aid of the ldd program. +.SH OPTIONS +These programs take no command line options. +.\" .SH SEE ALSO +.SH AUTHOR +This manual page was written by Camm Maguire , +for the Debian GNU/Linux system (but may be used by others). --- blas-1.1.orig/debian/xsblat2.1 +++ blas-1.1/debian/xsblat2.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/blas-dev.postinst +++ blas-1.1/debian/blas-dev.postinst @@ -0,0 +1,15 @@ +#! /bin/sh + +set -e + +update-alternatives --install /usr/lib/libblas-2.so libblas-2.so /usr/lib/libblas.so 20 \ + --slave /usr/lib/libblas-2.a libblas-2.a /usr/lib/libblas.a + +# dh_installdeb will replace this with shell code automatically +# generated by other debhelper scripts. + +#DEBHELPER# + +exit 0 + + --- blas-1.1.orig/debian/xscblat2.1 +++ blas-1.1/debian/xscblat2.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/blas-dev.prerm +++ blas-1.1/debian/blas-dev.prerm @@ -0,0 +1,12 @@ +#!/bin/sh + +set -e + +if [ "$1" != "upgrade" ] +then + update-alternatives --remove libblas-2.so /usr/lib/libblas.so +fi + +#DEBHELPER# + +exit 0 --- blas-1.1.orig/debian/xsblat3.1 +++ blas-1.1/debian/xsblat3.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/xscblat3.1 +++ blas-1.1/debian/xscblat3.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/xdblat1.1 +++ blas-1.1/debian/xdblat1.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/xdcblat1.1 +++ blas-1.1/debian/xdcblat1.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/xdblat2.1 +++ blas-1.1/debian/xdblat2.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/xdcblat2.1 +++ blas-1.1/debian/xdcblat2.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/xdblat3.1 +++ blas-1.1/debian/xdblat3.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/xdcblat3.1 +++ blas-1.1/debian/xdcblat3.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/xcblat1.1 +++ blas-1.1/debian/xcblat1.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/xccblat1.1 +++ blas-1.1/debian/xccblat1.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/xcblat2.1 +++ blas-1.1/debian/xcblat2.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/xccblat2.1 +++ blas-1.1/debian/xccblat2.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/xcblat3.1 +++ blas-1.1/debian/xcblat3.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/xccblat3.1 +++ blas-1.1/debian/xccblat3.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/xzblat1.1 +++ blas-1.1/debian/xzblat1.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/xzcblat1.1 +++ blas-1.1/debian/xzcblat1.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/xzblat2.1 +++ blas-1.1/debian/xzblat2.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/xzcblat2.1 +++ blas-1.1/debian/xzcblat2.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/xzblat3.1 +++ blas-1.1/debian/xzblat3.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/xzcblat3.1 +++ blas-1.1/debian/xzcblat3.1 @@ -0,0 +1 @@ +.so man1/blas-test.1 --- blas-1.1.orig/debian/blas-dev.docs +++ blas-1.1/debian/blas-dev.docs @@ -0,0 +1,2 @@ +debian/README.Debian +debian/test_results --- blas-1.1.orig/debian/blas.docs +++ blas-1.1/debian/blas.docs @@ -0,0 +1,2 @@ +debian/README.Debian +debian/test_results --- blas-1.1.orig/debian/blas-test.docs +++ blas-1.1/debian/blas-test.docs @@ -0,0 +1 @@ +debian/README.Debian