--- clustalx-1.83.orig/debian/clustalx.1
+++ clustalx-1.83/debian/clustalx.1
@@ -0,0 +1,38 @@
+.TH CLUSTALW 1 "2003 July 1" "Manual for clustalw"
+.SH NAME
+clustalw \- a multiple sequence alignment program
+
+.SH SYNOPSIS
+.B clustalw
+.I "[options]"
+.SH "DESCRIPTION"
+This manual page documents briefly the
+.BR clustalx
+command.
+This manual page was written for the Debian GNU/Linux distribution
+because the original program does not have a manual page.
+Instead, it has other documentation; see below.
+.PP
+.B clustalx
+is a windows interface for the ClustalW multiple sequence alignment
+program. It provides an integrated environment for performing multiple sequence
+and profile alignments and analysing the results. The sequence alignment is
+displayed in a window on the screen. A versatile coloring scheme has been
+incorporated allowing you to highlight conserved features  in the alignment.
+The pull-down menus at the top of the window allow you to select all the
+options required for traditional multiple sequence and profile alignment.
+.P
+You can cut-and-paste sequences to change the order of the alignment; you can
+select a subset of sequences to be aligned; you can select a sub-range of the
+alignment to be realigned and inserted back into the original alignment.
+.P
+Alignment quality analysis can be performed and low-scoring segments or
+exceptional residues can be highlighted.
+
+.SH "SEE ALSO"
+Files in /usr/share/doc/clustalx contain a lot
+of details. The Web page <http://www-igbmc.u-strasbg.fr/BioInfo/ClustalW/>
+is also helpful.
+.SH AUTHOR
+This manual page was written by Andreas Tille <tille@debian.org>,
+for the Debian GNU/Linux system (but may be used by others).
--- clustalx-1.83.orig/debian/control
+++ clustalx-1.83/debian/control
@@ -0,0 +1,34 @@
+Source: clustalx
+Section: non-free/science
+Priority: optional
+Maintainer: Debian-Med Packaging Team <debian-med-packaging@lists.alioth.debian.org>
+DM-Upload-Allowed: yes
+Uploaders: Steffen Moeller <moeller@debian.org>, Charles Plessy <charles-debian-nospam@plessy.org>
+Build-Depends: debhelper (>= 5), libncbi6-dev, libvibrant6-dev, lesstif2-dev, quilt
+Standards-Version: 3.7.3
+Vcs-Browser: http://svn.debian.org/wsvn/debian-med/trunk/packages/clustalx/trunk/?rev=0&sc=0
+Vcs-Svn: svn://svn.debian.org/svn/debian-med/trunk/packages/clustalx/trunk/
+XS-Autobuild: yes
+Homepage: ftp://ftp.ebi.ac.uk/pub/software/unix/clustalx/
+
+Package: clustalx
+Architecture: any
+Depends: ${shlibs:Depends}
+Suggests: texshade|texlive-latex-extra, boxshade
+Description: GUI for Clustal W
+ This package offers a GUI interface for the Clustal W multiple sequence
+ alignment program. It provides an integrated environment for performing
+ multiple sequence- and profile-alignments to analyse the results.
+ The sequence alignment is displayed in a window on the screen.
+ A versatile coloring scheme has been incorporated to highlight conserved
+ features in the alignment. For professional presentations, one should
+ use the texshade LaTeX package or boxshade.
+ .
+ The pull-down menus at the top of the window allow you to select all the
+ options required for traditional multiple sequence and profile alignment.
+ You can cut-and-paste sequences to change the order of the alignment; you can
+ select a subset of sequences to be aligned; you can select a sub-range of the
+ alignment to be realigned and inserted back into the original alignment.
+ .
+ An alignment quality analysis can be performed and low-scoring segments or
+ exceptional residues can be highlighted.
--- clustalx-1.83.orig/debian/clustalx.menu
+++ clustalx-1.83/debian/clustalx.menu
@@ -0,0 +1,5 @@
+?package(clustalx):needs="X11" \
+    section="Applications/Science/Biology" \
+    title="Clustal X" \
+    command="/usr/bin/clustalx"\
+    hints="GUI for clustalw"
--- clustalx-1.83.orig/debian/patches/clustalw.h.patch
+++ clustalx-1.83/debian/patches/clustalw.h.patch
@@ -0,0 +1,13 @@
+Index: clustalw-1.83/clustalw.h
+===================================================================
+--- clustalw-1.83.orig/clustalw.h
++++ clustalw-1.83/clustalw.h
+@@ -238,7 +238,7 @@
+ char *blank_to_(char *str);
+ char *upstr(char *str);
+ char *lowstr(char *str);
+-void getstr(char *instr, char *outstr);
++void getstr(char *instr, int n, char *outstr);
+ double getreal(char *instr, double minx, double maxx, double def);
+ int getint(char *instr, int minx, int maxx, int def);
+ void do_system(void);
--- clustalx-1.83.orig/debian/patches/clustalx.html.patch
+++ clustalx-1.83/debian/patches/clustalx.html.patch
@@ -0,0 +1,2123 @@
+Index: clustalw-1.83/clustalx.html
+===================================================================
+--- clustalw-1.83.orig/clustalx.html
++++ clustalw-1.83/clustalx.html
+@@ -2029,6 +2029,2118 @@
+ <P>
+ Thompson,J.D., Gibson,T.J., Plewniak,F., Jeanmougin,F. and Higgins,D.G. (1997)
+ The ClustalX windows interface: flexible strategies for multiple sequence 
++alignment aided by quality analysis tools. Nucleic Acids Research, 24:4876-4882.
++</P>
++<P>
++</P>
++<P>
++<STRONG>
++The ClustalW program is described in the manuscript:
++</STRONG>
++</P>
++<P>
++Thompson, J.D., Higgins, D.G. and Gibson, T.J. (1994) CLUSTAL W: improving the
++sensitivity of progressive multiple sequence alignment through sequence
++weighting, positions-specific gap penalties and weight matrix choice.  Nucleic
++Acids Research, 22:4673-4680.
++</P>
++<P>
++</P>
++<P>
++<STRONG>
++The ClustalV program is described in the manuscript:
++</STRONG>
++</P>
++<P>
++Higgins,D.G., Bleasby,A.J. and Fuchs,R. (1992) CLUSTAL V: improved software for
++multiple sequence alignment. CABIOS 8,189-191.
++</P>
++<P>
++</P>
++<P>
++<STRONG>
++The original Clustal program is described in the manuscripts:
++</STRONG>
++</P>
++<P>
++Higgins,D.G. and Sharp,P.M. (1989) Fast and sensitive multiple sequence
++alignments on a microcomputer.
++CABIOS 5,151-153.
++</P>
++<P>
++Higgins,D.G. and Sharp,P.M. (1988) CLUSTAL: a package for performing multiple
++sequence alignment on a microcomputer. Gene 73,237-244.
++</P>
++<P>
++<STRONG>
++Some tips on using Clustal X:
++</STRONG>
++</P>
++<P>
++Jeannmougin,F., Thompson,J.D., Gouy,M., Higgins,D.G. and Gibson,T.J. (1998)
++Multiple sequence alignment with Clustal X. Trends Biochem Sci, 23, 403-5.
++</P>
++<P>
++<STRONG>
++Some tips on using Clustal W:
++</STRONG>
++</P>
++<P>
++Higgins, D. G., Thompson, J. D. and Gibson, T. J. (1996) Using CLUSTAL for
++multiple sequence alignments. Methods Enzymol., 266, 383-402.
++</P>
++<P>
++<STRONG>
++You can get the latest version of the ClustalX program by anonymous ftp to:
++</STRONG>
++</P>
++<P>
++ftp-igbmc.u-strasbg.fr
++ftp.embl-heidelberg.de
++ftp.ebi.ac.uk
++</P>
++<P>
++<STRONG>
++Or, have a look at the following WWW site:
++</STRONG>
++</P>
++<P>
++http://www-igbmc.u-strasbg.fr/BioInfo/
++</P>
++<P>
++</P>
++<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
++<HEAD>
++<TITLE>ClustalX Help</TITLE>
++</HEAD>
++<BODY BGCOLOR=white>
++<CENTER><H1>ClustalX Help</H1></CENTER>
++<P>
++You can get the latest version of the ClustalX program here:
++</P>
++<DL><DD>
++<A HREF="ftp://ftp-igbmc.u-strasbg.fr/pub/ClustalX/">
++ftp://ftp-igbmc.u-strasbg.fr/pub/ClustalX/</A>
++</DL>
++<P>For full details of usage and algorithms, please read the <A HREF="clustalw.doc"><EM>ClustalW.Doc</EM></A> file.</P>
++<PRE><EM>
++Toby  Gibson                         EMBL, Heidelberg, Germany.
++Des   Higgins                        UCC, Cork, Ireland.
++Julie Thompson/Francois Jeanmougin   IGBMC, Strasbourg, France.
++</EM></PRE>
++<CENTER><H2><A NAME="Index">Index</A></H2></CENTER>
++<OL>
++<LI><A HREF="#G">                      General help for CLUSTAL X (1.8)
++</A></LI>
++<LI><A HREF="#F">                      Input / Output Files 
++</A></LI>
++<LI><A HREF="#E">                          Editing Alignments
++</A></LI>
++<LI><A HREF="#M">                          Multiple Alignments
++</A></LI>
++<LI><A HREF="#P">                   Profile and Structure Alignments
++</A></LI>
++<LI><A HREF="#B">            Secondary Structure / Gap Penalty Masks
++</A></LI>
++<LI><A HREF="#T">                            Phylogenetic Trees
++</A></LI>
++<LI><A HREF="#C">                               Colors
++</A></LI>
++<LI><A HREF="#Q">                       Alignment Quality Analysis
++</A></LI>
++<LI><A HREF="#9">              Command Line Parameters
++</A></LI>
++<LI><A HREF="#R">                             References
++</A></LI>
++</OL>
++<CENTER><H2><A NAME="G">                      General help for CLUSTAL X (1.8)
++</A></H2></CENTER>
++<P>
++</P>
++<P>
++Clustal X is a windows interface for the ClustalW multiple sequence alignment
++program. It provides an integrated environment for performing multiple sequence
++and profile alignments and analysing the results. The sequence alignment is
++displayed in a window on the screen. A versatile coloring scheme has been
++incorporated allowing you to highlight conserved features  in the alignment.
++The pull-down menus at the top of the window allow you to select all the
++options required for traditional multiple sequence and profile alignment.
++</P>
++<P>
++You can cut-and-paste sequences to change the order of the alignment; you can
++select a subset of sequences to be aligned; you can select a sub-range of the
++alignment to be realigned and inserted back into the original alignment.
++</P>
++<P>
++Alignment quality analysis can be performed and low-scoring segments or
++exceptional residues can be highlighted.
++</P>
++<P>
++ClustalX is available for a number of different platforms including: SUN
++Solaris, IRIX5.3 on Silicon Graphics, Digital UNIX on DECStations, Microsoft
++Windows (32 bit) for PC's, Linux ELF for x86 PC's and Macintosh PowerMac. (See
++the README file for Installation instructions.)
++</P>
++<P>
++</P>
++<P>
++<H4>
++SEQUENCE INPUT
++</H4>
++</P>
++<P>
++Sequences and profiles (a term for pre-existing alignments) are input using 
++the FILE menu. Invalid options will be disabled. All sequences must be included
++into 1 file. 7 formats are automatically recognised: NBRF/PIR, EMBL/SWISSPROT,
++Pearson (Fasta), Clustal (*.aln), GCG/MSF (Pileup), GCG9 RSF and GDE flat file.
++All non-alphabetic characters (spaces, digits, punctuation marks) are ignored
++except "-" which is used to indicate a GAP ("." in MSF/RSF).  
++</P>
++<P>
++<H4>
++SEQUENCE / PROFILE ALIGNMENTS
++</H4>
++</P>
++<P>
++Clustal X has two modes which can be selected using the switch directly above
++the sequence display: MULTIPLE ALIGNMENT MODE and PROFILE ALIGNMENT MODE.
++</P>
++<P>
++To do a MULTIPLE ALIGNMENT on a set of sequences, make sure MULTIPLE ALIGNMENT
++MODE is selected. A single sequence data area is then displayed. The ALIGNMENT
++menu then allows you to either produce a guide tree for the alignment, or to do
++a multiple alignment following the guide tree, or to do a full multiple
++alignment.
++</P>
++<P>
++In PROFILE ALIGNMENT MODE, two sequence data areas are displayed, allowing you
++to align 2 alignments (termed profiles). Profiles are also used to add a new
++sequence to an old alignment, or to use secondary structure to guide the
++alignment process. GAPS in the old alignments are indicated using the "-" 
++character. PROFILES can be input in ANY of the allowed formats; just  use "-"
++(or "." for MSF/RSF) for each gap position. In Profile Alignment Mode, a button
++"Lock Scroll" is displayed which allows you to scroll the two profiles together
++using a single scroll bar. When the Lock Scroll is turned off, the two profiles
++can be scrolled independently.
++</P>
++<P>
++<H4>
++PHYLOGENETIC TREES
++</H4>
++</P>
++<P>
++Phylogenetic trees can be calculated from old alignments (read in with "-"
++characters to indicate gaps) OR after a multiple alignment while the alignment
++is still displayed.
++</P>
++<P>
++<H4>
++ALIGNMENT DISPLAY
++</H4>
++</P>
++<P>
++The alignment is displayed on the screen with the sequence names on the left
++hand side. The sequence alignment is for display only, it cannot be edited here
++(except for changing the sequence order by cutting-and-pasting on the sequence
++names). 
++</P>
++<P>
++A ruler is displayed below the sequences, starting at 1 for the first residue
++position (residue numbers in the sequence input file are ignored).
++</P>
++<P>
++A line above the alignment is used to mark strongly conserved positions. Three
++characters ('*', ':' and '.') are used:
++</P>
++<P>
++'*' indicates positions which have a single, fully conserved residue
++</P>
++<P>
++':' indicates that one of the following 'strong' groups is fully conserved:-
++<PRE>
++                 STA  
++                 NEQK  
++                 NHQK  
++                 NDEQ  
++                 QHRK  
++                 MILV  
++                 MILF  
++                 HY  
++                 FYW  
++</PRE>
++</P>
++<P>
++'.' indicates that one of the following 'weaker' groups is fully conserved:-
++<PRE>
++                 CSA  
++                 ATV  
++                 SAG  
++                 STNK  
++                 STPA  
++                 SGND  
++                 SNDEQK  
++                 NDEQHK  
++                 NEQHRK  
++                 FVLIM  
++                 HFY  
++</PRE>
++</P>
++<P>
++These are all the positively scoring groups that occur in the Gonnet Pam250
++matrix. The strong and weak groups are defined as strong score >0.5 and weak
++score =<0.5 respectively.
++</P>
++<P>
++For profile alignments, secondary structure and gap penalty masks are displayed
++above the sequences, if any data is found in the profile input file.
++</P>
++<P>
++</P>
++<P>
++</P>
++<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
++<CENTER><H2><A NAME="F">                      Input / Output Files 
++</A></H2></CENTER>
++<P>
++</P>
++<P>
++LOAD SEQUENCES reads sequences from one of 7 file formats, replacing any
++sequences that are already loaded. All sequences must be in 1 file. The formats
++that are automatically recognised are: NBRF/PIR, EMBL/SWISSPROT, Pearson
++(Fasta), Clustal (*.aln), GCG/MSF (Pileup), GCG9/RSF and GDE flat file.  All
++non-alphabetic characters (spaces, digits, punctuation  marks) are ignored
++except "-" which is used to indicate a GAP ("." in MSF/RSF).
++</P>
++<P>
++The program tries to automatically recognise the different file formats used
++and to guess whether the sequences are amino acid or nucleotide.  This is not
++always foolproof.
++</P>
++<P>
++FASTA and NBRF/PIR formats are recognised by having a ">" as the first 
++character in the file.  
++</P>
++<P>
++EMBL/Swiss Prot formats are recognised by the letters "ID" at the start of the
++file (the token for the entry name field).  
++</P>
++<P>
++CLUSTAL format is recognised by the word CLUSTAL at the beginning of the file.
++</P>
++<P>
++GCG/MSF format is recognised by one of the following:
++<UL>
++<LI>
++       - the word PileUp at the start of the file.
++</LI><LI>
++       - the word !!AA_MULTIPLE_ALIGNMENT or !!NA_MULTIPLE_ALIGNMENT
++         at the start of the file.
++</LI><LI>
++       - the word MSF on the first line of the file, and the characters ..
++         at the end of this line.
++</LI>
++</UL>
++</P>
++<P> 
++GCG/RSF format is recognised by the word !!RICH_SEQUENCE at the beginning of
++the file.
++</P>
++<P>
++</P>
++<P>
++If 85% or more of the characters in the sequence are from A,C,G,T,U or N, the
++sequence will be assumed to be nucleotide.  This works in 97.3% of cases but
++watch out!
++</P>
++<P>
++APPEND SEQUENCES is only valid in MULTIPLE ALIGNMENT MODE. The input sequences
++do not replace those already loaded, but are appended at the end of the
++alignment.
++</P>
++<P>
++SAVE SEQUENCES AS... offers the user a choice of one of six output formats:
++CLUSTAL, NBRF/PIR, GCG/MSF, PHYLIP, NEXUS or GDE. All sequences are written
++to a single file. Options are available to save a range of the alignment, 
++switch between UPPER/LOWER case for GDE files, and to output SEQUENCE NUMBERING
++for CLUSTAL files.
++</P>
++<P>
++LOAD PROFILE 1 reads sequences in the same 7 file formats, replacing any
++sequences already loaded as Profile 1. This option will also remove any
++sequences which are loaded in Profile 2.
++</P>
++<P>
++LOAD PROFILE 2 reads sequences in the same 7 file formats, replacing any
++sequences already loaded as Profile 2.
++</P>
++<P>
++SAVE PROFILE 1 AS... is similar to the Save Sequences option except that only
++those sequences in Profile 1 will be written to the output file.
++</P>
++<P>
++SAVE PROFILE 2 AS... is similar to the Save Sequences option except that only
++those sequences in Profile 2 will be written to the output file.
++</P>
++<P>
++WRITE ALIGNMENT AS POSTSCRIPT will write the sequence display to a postscript
++format file. This will include any secondary structure / gap penalty mask 
++information and the consensus and ruler lines which are displayed on the
++screen. The Alignment Quality curve can be optionally included in the output
++file.
++</P>
++<P>
++WRITE PROFILE 1 AS POSTSCRIPT is similar to WRITE ALIGNMENT AS POSTSCRIPT
++except that only the profile 1 display will be printed.
++</P>
++<P>
++WRITE PROFILE 2 AS POSTSCRIPT is similar to WRITE ALIGNMENT AS POSTSCRIPT
++except that only the profile 2 display will be printed.
++</P>
++<P>
++</P>
++<P>
++<H4>
++POSTSCRIPT PARAMETERS
++</H4>
++</P>
++<P>
++A number of options are available to allow you to configure your postscript
++output file.
++</P>
++<P>
++PS COLORS FILE:
++</P>
++<P>
++The exact RGB values required to reproduce the colors used in the alignment
++window will vary from printer to printer. A PS colors file can be specified
++that contains the RGB values for all the colors required by each of your
++postscript printers.
++</P>
++<P>
++By default, Clustal X looks for a file called 'colprint.par' in the current
++directory (if your running under UNIX, it then looks in your home directory,
++and finally in the directories in your PATH environment variable). If no PS
++colors file is found or a color used on the screen is not defined here, the
++screen RGB values (from the Color Parameter File) are used.
++</P>
++<P>
++The PS colors file consists of one line for each color to be defined, with the
++color name followed by the RGB values (on a scale of 0 to 1). For example,
++</P>
++<P>
++RED          0.9 0.1 0.1
++</P>
++<P>
++Blank lines and comments (lines beginning with a '#' character) are ignored.
++</P>
++<P>
++</P>
++<P>
++PAGE SIZE:  The alignment can be displayed on either A4, A3 or US Letter size
++pages.
++</P>
++<P>
++ORIENTATION: The alignment can be displayed on either a landscape or portrait
++page.
++</P>
++<P>
++PRINT HEADER: An optional header including the postscript filename, and
++creation date can be printed at the top of each page.
++</P>
++<P>
++PRINT QUALITY CURVE: The Alignment Quality curve which is displayed underneath
++the alignment on the screen can be included in the postscript output.
++</P>
++<P>
++PRINT RULER: The ruler which is displayed underneath the alignment on the 
++screen can be included in the postscript output.
++</P>
++<P>
++PRINT RESIDUE NUMBERS: Sequence residue numbers can be printed at the right
++hand side of the alignment.
++</P>
++<P>
++RESIZE TO FIT PAGE: By default, the alignment is scaled to fit the page size
++selected. This option can be turned off, in which case a font size of 10 will
++be used for the sequences.
++</P>
++<P>
++PRINT FROM POSITION/TO: A range of the alignment can be printed. The default
++is to print the full alignment. The first and last residues to be printed are
++specified here.
++</P>
++<P>
++USE BLOCK LENGTH: The alignment can be divided into blocks of residues. The
++number of residues in a block is specified here. More than one block may then
++be printed on a single page. This is useful for long alignments of a small
++number of sequences. If the block length is set to 0, The alignment will not
++be divided into blocks, but printed across a number of pages.
++</P>
++<P>
++</P>
++<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
++<CENTER><H2><A NAME="E">                          Editing Alignments
++</A></H2></CENTER>
++<P>
++</P>
++<P>
++Clustal X allows you to change the order of the sequences in the alignment, by
++cutting-and-pasting the sequence names.
++</P>
++<P>
++To select a group of sequences to be moved, click on a sequence name and drag
++the cursor until all the required sequences are highlighted. Holding down the
++Shift key when clicking on the first name will add new sequences to those
++already selected.
++</P>
++<P>
++(Options are provided to Select All Sequences, Select Profile 1 or Select 
++Profile 2.)
++</P>
++<P>
++The selected sequences can be removed from the alignment by using the EDIT
++menu, CUT option.
++</P>
++<P>
++To add the cut sequences back into an alignment, select a sequence by clicking
++on the sequence name. The cut sequences will be added to the alignment,
++immediately following the selected sequence, by the EDIT menu, PASTE option.
++</P>
++<P>
++To add the cut sequences to an empty alignment (eg. when cutting sequences from
++Profile 1 and pasting them to Profile 2), click on the empty sequence name
++display area, and select the EDIT menu, PASTE option as before.
++</P>
++<P>
++The sequence selection and sequence range selection can be cleared using the
++EDIT menu, CLEAR SEQUENCE SELECTION and CLEAR RANGE SELECTION options
++respectively.
++</P>
++<P>
++To search for a string of residues in the sequences, select the sequences to be
++searched by clicking on the sequence names. You can then enter the string to
++search for by selecting the SEARCH FOR STRING option. If the string is found in
++any of the sequences selected, the sequence name and column number is printed
++below the sequence display.
++</P>
++<P>
++In PROFILE ALIGNMENT MODE, the two profiles can be merged (normally done after
++alignment) by selecting ADD PROFILE 2 TO PROFILE 1. The sequences currently
++displayed as Profile 2 will be appended to Profile 1. 
++</P>
++<P>
++The REMOVE ALL GAPS option will remove all gaps from the sequences currently
++selected.
++WARNING: This option removes ALL gaps, not only those introduced by ClustalX,
++but also those that were read from the input alignment file. Any secondary
++structure information associated with the alignment will NOT be automatically
++realigned.
++</P>
++<P>
++The REMOVE GAP-ONLY COLUMNS will remove those positions in the alignment which
++contain gaps in all sequences. This can occur as a result of removing divergent
++sequences from an alignment, or if an alignment has been realigned.
++</P>
++<P>
++</P>
++<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
++<CENTER><H2><A NAME="M">                          Multiple Alignments
++</A></H2></CENTER>
++<P>
++</P>
++<P>
++Make sure MULTIPLE ALIGNMENT MODE is selected, using the switch directly above
++the sequence display area. Then, use the ALIGNMENT menu to do multiple
++alignments.
++</P>
++<P>
++Multiple alignments are carried out in 3 stages:
++</P>
++<P> 
++1) all sequences are compared to each other (pairwise alignments);
++</P>
++<P> 
++2) a dendrogram (like a phylogenetic tree) is constructed, describing the
++approximate groupings of the sequences by similarity (stored in a file).
++</P>
++<P> 
++3) the final multiple alignment is carried out, using the dendrogram as a guide.
++</P>
++<P>
++The 3 stages are carried out automatically by the DO COMPLETE ALIGNMENT option.
++You can skip the first stages (pairwise alignments; guide tree) by using an old
++guide tree file (DO ALIGNMENT FROM GUIDE TREE); or you can just produce the
++guide tree with no final multiple alignment (PRODUCE GUIDE TREE ONLY).
++</P>
++<P>
++</P>
++<P>
++REALIGN SELECTED SEQUENCES is used to realign badly aligned sequences in the
++alignment. Sequences can be selected by clicking on the sequence names - see
++Editing Alignments for more details. The unselected sequences are then 'fixed'
++and a profile is made including only the unselected sequences. Each of the
++selected sequences in turn is then realigned to this profile. The realigned
++sequences will be displayed as a group at the end the alignment.
++</P>
++<P>
++</P>
++<P>
++REALIGN SELECTED SEQUENCE RANGE is used to realign a small region of the 
++alignment. A residue range can be selected by clicking on the sequence display
++area. A multiple alignment is then performed, following the 3 stages described
++above, but only using the selected residue range. Finally the new alignment of
++the range is pasted back into the full sequence alignment.
++</P>
++<P>
++By default, gap penalties are used at each end of the subrange in order to 
++penalise terminal gaps. If the REALIGN SEGMENT END GAP PENALTIES option is
++switched off, gaps can be introduced at the ends of the residue range at no
++cost.
++</P>
++<P>
++</P>
++<P>
++ALIGNMENT PARAMETERS displays a sub-menu with the following options:
++</P>
++<P>
++RESET NEW GAPS BEFORE ALIGNMENT will remove any new gaps introduced into the
++sequences during multiple alignment if you wish to change the parameters and
++try again. This only takes effect just before you do a second multiple
++alignment. You can make phylogenetic trees after alignment whether or not this
++is ON. If you turn this OFF, the new gaps are kept even if you do a second
++multiple alignment. This allows you to iterate the alignment gradually.
++Sometimes, the alignment is improved by a second or third pass.
++</P>
++<P>
++RESET ALL GAPS BEFORE ALIGNMENT will remove all gaps in the sequences including
++gaps which were read in from the sequence input file. This only takes effect
++just before you do a second multiple alignment.  You can make phylogenetic
++trees after alignment whether or not this is ON.  If you turn this OFF, all
++gaps are kept even if you do a second multiple alignment. This allows you to
++iterate the alignment gradually.  Sometimes, the alignment is improved by a
++second or third pass.
++</P>
++<P>
++</P>
++<P>
++PAIRWISE ALIGNMENT PARAMETERS control the speed/sensitivity of the initial
++alignments.
++</P>
++<P>
++MULTIPLE ALIGNMENT PARAMETERS control the gaps in the final multiple
++alignments.
++</P>
++<P>
++PROTEIN GAP PARAMETERS displays a temporary window which allows you to set
++various parameters only used in the alignment of protein sequences.
++</P>
++<P>
++(SECONDARY STRUCTURE PARAMETERS, for use with the Profile Alignment Mode only,
++allows you to set various parameters only used with gap penalty masks.)
++</P>
++<P>
++SAVE LOG FILE will write the alignment calculation scores to a file. The log
++filename is the same as the input sequence filename, with an extension .log
++appended.
++</P>
++<P>
++</P>
++<P>
++<H4>
++OUTPUT FORMAT OPTIONS
++</H4>
++</P>
++<P>
++You can choose from 6 different alignment formats (CLUSTAL, GCG, NBRF/PIR,
++PHYLIP, GDE and NEXUS).  You can choose more than one (or all 6 if you wish).  
++</P>
++<P>
++CLUSTAL format output is a self explanatory alignment format. It shows the
++sequences aligned in blocks. It can be read in again at a later date to (for
++example) calculate a phylogenetic tree or add in new sequences by profile
++alignment.
++</P>
++<P>
++GCG output can be used by any of the GCG programs that can work on multiple
++alignments (e.g. PRETTY, PROFILEMAKE, PLOTALIGN). It is the same as the GCG
++.msf format files (multiple sequence file); new in version 7 of GCG.
++</P>
++<P>
++NEXUS format is used by several phylogeny programs, including PAUP and
++MacClade.
++</P>
++<P>
++PHYLIP format output can be used for input to the PHYLIP package of Joe 
++Felsenstein.  This is a very widely used package for doing every imaginable
++form of phylogenetic analysis (MUCH more than the the modest introduction
++offered by this program).
++</P>
++<P>
++NBRF/PIR: this is the same as the standard PIR format with ONE ADDITION. Gap
++characters "-" are used to indicate the positions of gaps in the multiple 
++alignment. These files can be re-used as input in any part of clustal that
++allows sequences (or alignments or profiles) to be read in.  
++</P>
++<P>
++GDE:  this format is used by the GDE package of Steven Smith and is understood
++by SEQLAB in GCG 9 or later.
++</P>
++<P>
++GDE OUTPUT CASE: sequences in GDE format may be written in either upper or
++lower case.
++</P>
++<P> 
++CLUSTALW SEQUENCE NUMBERS: residue numbers may be added to the end of the
++alignment lines in clustalw format.
++</P>
++<P>
++OUTPUT ORDER is used to control the order of the sequences in the output
++alignments. By default, it uses the order in which the sequences were aligned
++(from the guide tree/dendrogram), thus automatically grouping closely related
++sequences. It can be switched to be the same as the original input order.
++</P>
++<P>
++PARAMETER OUTPUT: This option will save all your parameter settings in a
++parameter file (suffix .par) during alignment. The file can be subsequently
++used to rerun ClustalW using the same parameters.
++</P>
++<P>
++</P>
++<P>
++<H3>
++ALIGNMENT PARAMETERS
++</H3>
++</P>
++<P>
++<STRONG>
++PAIRWISE ALIGNMENT PARAMETERS
++</STRONG>
++</P>
++<P>
++A distance is calculated between every pair of sequences and these are used to
++construct the phylogenetic tree which guides the final multiple alignment. The
++scores are calculated from separate pairwise alignments. These can be
++calculated using 2 methods: dynamic programming (slow but accurate) or by the
++method of Wilbur and Lipman (extremely fast but approximate).   
++</P>
++<P>
++You can choose between the 2 alignment methods using the PAIRWISE ALIGNMENTS
++option. The slow/accurate method is fast enough for short sequences but will be
++VERY SLOW for many (e.g. >100) long (e.g. >1000 residue) sequences.   
++</P>
++<P>
++</P>
++<P>
++<STRONG>
++SLOW-ACCURATE alignment parameters:
++</STRONG>
++</P>
++<P>
++These parameters do not have any affect on the speed of the alignments. They
++are used to give initial alignments which are then rescored to give percent
++identity scores. These % scores are the ones which are displayed on the 
++screen. The scores are converted to distances for the trees.
++</P>
++<P>
++Gap Open Penalty:      the penalty for opening a gap in the alignment.
++</P>
++<P>
++Gap Extension Penalty: the penalty for extending a gap by 1 residue.
++</P>
++<P>
++Protein Weight Matrix: the scoring table which describes the similarity of 
++each amino acid to each other.
++</P>
++<P>
++Load protein matrix: allows you to read in a comparison table from a file.
++</P>
++<P>
++DNA weight matrix: the scores assigned to matches and mismatches (including
++IUB ambiguity codes).
++</P>
++<P>
++Load DNA matrix: allows you to read in a comparison table from a file.
++</P>
++<P>
++See the Multiple alignment parameters, MATRIX option below for details of the
++matrix input format.
++</P>
++<P>
++</P>
++<P>
++<STRONG>
++FAST-APPROXIMATE alignment parameters:
++</STRONG>
++</P>
++<P>
++These similarity scores are calculated from fast, approximate, global align-
++ments, which are controlled by 4 parameters. 2 techniques are used to make
++these alignments very fast: 1) only exactly matching fragments (k-tuples) are
++considered; 2) only the 'best' diagonals (the ones with most k-tuple matches)
++are used.
++</P>
++<P>
++GAP PENALTY:   This is a penalty for each gap in the fast alignments. It has
++little effect on the speed or sensitivity except for extreme values.
++</P>
++<P>
++K-TUPLE SIZE:  This is the size of exactly matching fragment that is used. 
++INCREASE for speed (max= 2 for proteins; 4 for DNA), DECREASE for sensitivity.
++For longer sequences (e.g. >1000 residues) you may wish to increase the
++default.
++</P>
++<P>
++TOP DIAGONALS: The number of k-tuple matches on each diagonal (in an imaginary
++dot-matrix plot) is calculated. Only the best ones (with most matches) are used
++in the alignment. This parameter specifies how many. Decrease for speed;
++increase for sensitivity.
++</P>
++<P>
++WINDOW SIZE:  This is the number of diagonals around each of the 'best' 
++diagonals that will be used. Decrease for speed; increase for sensitivity.
++</P>
++<P>
++</P>
++<P>
++<STRONG>
++MULTIPLE ALIGNMENT PARAMETERS
++</STRONG>
++</P>
++<P>
++These parameters control the final multiple alignment. This is the core of the
++program and the details are complicated. To fully understand the use of the
++parameters and the scoring system, you will have to refer to the documentation.
++</P>
++<P>
++Each step in the final multiple alignment consists of aligning two alignments 
++or sequences. This is done progressively, following the branching order in the
++GUIDE TREE. The basic parameters to control this are two gap penalties and the
++scores for various identical/non-indentical residues. 
++</P>
++<P>
++The GAP OPENING and EXTENSION PENALTIES can be set here. These control the 
++cost of opening up every new gap and the cost of every item in a gap.  
++Increasing the gap opening penalty will make gaps less frequent. Increasing 
++the gap extension penalty will make gaps shorter. Terminal gaps are not 
++penalised.
++</P>
++<P>
++The DELAY DIVERGENT SEQUENCES switch delays the alignment of the most distantly
++related sequences until after the most closely related sequences have  been
++aligned. The setting shows the percent identity level required to delay the
++addition of a sequence; sequences that are less identical than this level to
++any other sequences will be aligned later.
++</P>
++<P>
++The TRANSITION WEIGHT gives transitions (A<-->G or C<-->T i.e. purine-purine or
++pyrimidine-pyrimidine substitutions) a weight between 0 and 1; a weight of zero
++means that the transitions are scored as mismatches, while a weight of 1 gives
++the transitions the match score. For distantly related DNA sequences, the
++weight should be near to zero; for closely related sequences it can be useful
++to assign a higher score. The default is set to 0.5.
++</P>
++<P>
++</P>
++<P>
++The PROTEIN WEIGHT MATRIX option allows you to choose a series of weight
++matrices. For protein alignments, you use a weight matrix to determine the
++similarity of non-identical amino acids. For example, Tyr aligned with Phe is
++usually judged to be 'better' than Tyr aligned with Pro.
++</P>
++<P>
++There are three 'in-built' series of weight matrices offered. Each consists of
++several matrices which work differently at different evolutionary distances. To
++see the exact details, read the documentation. Crudely, we store several
++matrices in memory, spanning the full range of amino acid distance (from almost
++identical sequences to highly divergent ones). For very similar sequences, it
++is best to use a strict weight matrix which only gives a high score to
++identities and the most favoured conservative substitutions. For more divergent
++sequences, it is appropriate to use "softer" matrices which give a high score
++to many other frequent substitutions.
++</P>
++<P>
++1) BLOSUM (Henikoff). These matrices appear to be the best available for 
++carrying out data base similarity (homology searches). The matrices currently
++used are: Blosum 80, 62, 45 and 30. BLOSUM was the default in earlier Clustal X
++versions.
++</P>
++<P>
++2) PAM (Dayhoff). These have been extremely widely used since the late '70s. We
++currently use the PAM 20, 60, 120, 350 matrices.
++</P>
++<P>
++3) GONNET. These matrices were derived using almost the same procedure as the
++Dayhoff one (above) but are much more up to date and are based on a far larger
++data set. They appear to be more sensitive than the Dayhoff series. We
++currently use the GONNET 80, 120, 160, 250 and 350 matrices. This series is the
++default for Clustal X version 1.8.
++</P>
++<P>
++We also supply an identity matrix which gives a score of 10 to two identical 
++amino acids and a score of zero otherwise. This matrix is not very useful.
++</P>
++<P>
++Load protein matrix: allows you to read in a comparison matrix from a file.
++This can be either a single matrix or a series of matrices (see below for
++format). 
++</P>
++<P>
++</P>
++<P>
++DNA WEIGHT MATRIX option allows you to select a single matrix (not a series)
++used for aligning nucleic acid sequences. Two hard-coded matrices are available:
++</P>
++<P>
++1) IUB. This is the default scoring matrix used by BESTFIT for the comparison
++of nucleic acid sequences. X's and N's are treated as matches to any IUB
++ambiguity symbol. All matches score 1.9; all mismatches for IUB symbols score 0.
++</P>
++<P>
++2) CLUSTALW(1.6). A previous system used by ClustalW, in which matches score
++1.0 and mismatches score 0. All matches for IUB symbols also score 0.
++</P>
++<P>
++Load DNA matrix: allows you to read in a nucleic acid comparison matrix from a
++file (just one matrix, not a series).
++</P>
++<P>
++</P>
++<P>
++SINGLE MATRIX INPUT FORMAT
++The format used for a single matrix is the same as the BLAST program. The
++scores in the new weight matrix should be similarities. You can use negative as
++well as positive values if you wish, although the matrix will be automatically
++adjusted to all positive scores, unless the NEGATIVE MATRIX option is selected.
++Any lines beginning with a # character are assumed to be comments. The first
++non-comment line should contain a list of amino acids in any order, using the 1
++letter code, followed by a * character. This should be followed by a square
++matrix of scores, with one row and one column for each amino acid. The last row
++and column of the matrix (corresponding to the * character) contain the minimum
++score over the whole matrix.
++</P>
++<P>
++MATRIX SERIES INPUT FORMAT
++ClustalX uses different matrices depending on the mean percent identity of the
++sequences to be aligned. You can specify a series of matrices and the range of
++the percent identity for each matrix in a matrix series file. The file is
++automatically recognised by the word CLUSTAL_SERIES at the beginning of the
++file. Each matrix in the series is then specified on one line which should
++start with the word MATRIX. This is followed by the lower and upper limits of
++the sequence percent identities for which you want to apply the matrix. The
++final entry on the matrix line is the filename of a Blast format matrix file
++(see above for details of the single matrix file format).
++</P>
++<P>
++Example.
++</P>
++<P>
++CLUSTAL_SERIES
++</P>
++<P> 
++MATRIX 81 100 /us1/user/julie/matrices/blosum80
++MATRIX 61 80 /us1/user/julie/matrices/blosum62
++MATRIX 31 60 /us1/user/julie/matrices/blosum45
++MATRIX 0 30 /us1/user/julie/matrices/blosum30
++</P>
++<P>
++</P>
++<P>
++<STRONG>
++PROTEIN GAP PARAMETERS
++</STRONG>
++</P>
++<P>
++RESIDUE SPECIFIC PENALTIES are amino acid specific gap penalties that reduce or
++increase the gap opening penalties at each position in the alignment or 
++sequence. See the documentation for details. As an example, positions that are
++rich in glycine are more likely to have an adjacent gap than positions that are
++rich in valine.
++</P>
++<P>
++HYDROPHILIC GAP PENALTIES are used to increase the chances of a gap within a
++run (5 or more residues) of hydrophilic amino acids; these are likely to be
++loop or random coil regions where gaps are more common. The residues that are
++"considered" to be hydrophilic can be entered in HYDROPHILIC RESIDUES.
++</P>
++<P>
++GAP SEPARATION DISTANCE tries to decrease the chances of gaps being too close
++to each other. Gaps that are less than this distance apart are penalised more
++than other gaps. This does not prevent close gaps; it makes them less frequent,
++promoting a block-like appearance of the alignment.
++</P>
++<P>
++END GAP SEPARATION treats end gaps just like internal gaps for the purposes of
++avoiding gaps that are too close (set by GAP SEPARATION DISTANCE above). If you
++turn this off, end gaps will be ignored for this purpose. This is useful when
++you wish to align fragments where the end gaps are not biologically meaningful.
++</P>
++<P>
++</P>
++<P>
++</P>
++<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
++<CENTER><H2><A NAME="P">                   Profile and Structure Alignments
++</A></H2></CENTER>
++<P>
++</P>
++<P>   
++By PROFILE ALIGNMENT, we mean alignment using existing alignments. Profile 
++alignments allow you to store alignments of your favourite sequences and add
++new sequences to them in small bunches at a time. A profile is simply an
++alignment of one or more sequences (e.g. an alignment output file from Clustal
++X). Each input can be a single sequence. One or both sets of input sequences
++may include secondary structure assignments or gap penalty masks to guide the
++alignment. 
++</P>
++<P>
++Make sure PROFILE ALIGNMENT MODE is selected, using the switch directly above
++the sequence display area. Then, use the ALIGNMENT menu to do profile and
++secondary structure alignments.
++</P>
++<P>
++The profiles can be in any of the allowed input formats with "-" characters
++used to specify gaps (except for GCG/MSF where "." is used).
++</P>
++<P>
++You have to load the 2 profiles by choosing FILE, LOAD PROFILE 1 and  LOAD
++PROFILE 2. Then ALIGNMENT, ALIGN PROFILE 2 TO PROFILE 1 will align the 2
++profiles to each other. Secondary structure masks in either profile can be used
++to guide the alignment. This option compares all the sequences in profile 1
++with all the sequences in profile 2 in order to build guide trees which will be
++used to calculate sequence weights, and select appropriate alignment parameters
++for the final profile alignment.
++</P>
++<P>
++You can skip the first stage (pairwise alignments; guide trees) by using old
++guide tree files (ALIGN PROFILES FROM GUIDE TREES). 
++</P>
++<P>
++The ALIGN SEQUENCES TO PROFILE 1 option will take the sequences in the second
++profile and align them to the first profile, 1 at a time.  This is useful to
++add some new sequences to an existing alignment, or to align a set of sequences
++to a known structure. In this case, the second profile set need not be
++pre-aligned.
++</P>
++<P>
++You can skip the first stage (pairwise alignments; guide tree) by using an old
++guide tree file (ALIGN SEQUENCES TO PROFILE 1 FROM TREE). 
++</P>
++<P>
++SAVE LOG FILE will write the alignment calculation scores to a file. The log
++filename is the same as the input sequence filename, with an extension .log
++appended.
++</P>
++<P>
++The alignment parameters can be set using the ALIGNMENT PARAMETERS menu,
++Pairwise Parameters, Multiple Parameters and Protein Gap Parameters options.
++These are EXACTLY the same parameters as used by the general, automatic
++multiple alignment procedure. The general multiple alignment procedure is
++simply a series of profile alignments. Carrying out a series of profile
++alignments on larger and larger groups of sequences, allows you to manually
++build up a complete alignment, if necessary editing intermediate alignments.
++</P>
++<P>
++<STRONG>
++SECONDARY STRUCTURE PARAMETERS
++</STRONG>
++</P>
++<P>
++Use this menu to set secondary structure options. If a solved structure is
++known, it can be used to guide the alignment by raising gap penalties within
++secondary structure elements, so that gaps will preferentially be inserted into
++unstructured surface loop regions. Alternatively, a user-specified gap penalty
++mask can be supplied for a similar purpose.
++</P>
++<P>
++A gap penalty mask is a series of numbers between 1 and 9, one per position in 
++the alignment. Each number specifies how much the gap opening penalty is to be 
++raised at that position (raised by multiplying the basic gap opening penalty
++by the number) i.e. a mask figure of 1 at a position means no change
++in gap opening penalty; a figure of 4 means that the gap opening penalty is
++four times greater at that position, making gaps 4 times harder to open.
++</P>
++<P>
++The format for gap penalty masks and secondary structure masks is explained in
++a separate help section.
++</P>
++<P>
++</P>
++<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
++<CENTER><H2><A NAME="B">            Secondary Structure / Gap Penalty Masks
++</A></H2></CENTER>
++<P>
++</P>
++<P>
++The use of secondary structure-based penalties has been shown to improve  the
++accuracy of sequence alignment. Clustal X now allows secondary structure/ gap
++penalty masks to be supplied with the input sequences used during profile
++alignment. (NB. The secondary structure information is NOT used during multiple
++sequence alignment). The masks work by raising gap penalties in specified
++regions (typically secondary structure elements) so that gaps are
++preferentially opened in the less well conserved regions (typically surface
++loops).
++</P>
++<P>
++The USE PROFILE 1(2) SECONDARY STRUCTURE / GAP PENALTY MASK options control
++whether the input 2D-structure information or gap penalty masks will be used
++during the profile alignment.
++</P>
++<P>
++The OUTPUT options control whether the secondary structure and gap penalty
++masks should be included in the Clustal X output alignments. Showing both is
++useful for understanding how the masks work. The 2D-structure information is
++itself useful in judging the alignment quality and in seeing how residue
++conservation patterns vary with secondary structure. 
++</P>
++<P>
++The HELIX and STRAND GAP PENALTY options provide the value for raising the gap
++penalty at core Alpha Helical (A) and Beta Strand (B) residues. In CLUSTAL
++format, capital residues denote the A and B core structure notation. Basic gap
++penalties are multiplied by the amount specified.
++</P>
++<P>
++The LOOP GAP PENALTY option provides the value for the gap penalty in Loops.
++By default this penalty is not raised. In CLUSTAL format, loops are specified
++by "." in the secondary structure notation.
++</P>
++<P>
++The SECONDARY STRUCTURE TERMINAL PENALTY provides the value for setting the gap
++penalty at the ends of secondary structures. Ends of secondary structures are
++known to grow or shrink, comparing related structures. Therefore by default
++these are given intermediate values, lower than the core penalties. All
++secondary structure read in as lower case in CLUSTAL format gets the reduced
++terminal penalty.
++</P>
++<P>
++The HELIX and STRAND TERMINAL POSITIONS options specify the range of structure
++termini for the intermediate penalties. In the alignment output, these are
++indicated as lower case. For Alpha Helices, by default, the range spans the 
++end-helical turn (3 residues). For Beta Strands, the default range spans the
++end residue and the adjacent loop residue, since sequence conservation often
++extends beyond the actual H-bonded Beta Strand.
++</P>
++<P>
++Clustal X can read the masks from SWISS-PROT, CLUSTAL or GDE format input
++files. For many 3-D protein structures, secondary structure information is
++recorded in the feature tables of SWISS-PROT database entries. You should
++always check that the assignments are correct - some are quite inaccurate.
++Clustal X looks for SWISS-PROT HELIX and STRAND assignments e.g.
++</P>
++<P>
++</P>
++<P>
++<PRE>
++FT   HELIX       100    115
++FT   STRAND      118    119
++</PRE>
++</P>
++<P>
++The structure and penalty masks can also be read from CLUSTAL alignment format 
++as comment lines beginning "!SS_" or "!GM_" e.g.
++</P>
++<P>
++<PRE>
++!SS_HBA_HUMA    ..aaaAAAAAAAAAAaaa.aaaAAAAAAAAAAaaaaaaAaaa.........aaaAAAAAA
++!GM_HBA_HUMA    112224444444444222122244444444442222224222111111111222444444
++HBA_HUMA        VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGK
++</PRE>
++</P>
++<P>
++Note that the mask itself is a set of numbers between 1 and 9 each of which is 
++assigned to the residue(s) in the same column below. 
++</P>
++<P>
++In GDE flat file format, the masks are specified as text and the names must
++begin with "SS_ or "GM_.
++</P>
++<P>
++Either a structure or penalty mask or both may be used. If both are included
++in an alignment, the user will be asked which is to be used.
++</P>
++<P>
++</P>
++<P>
++</P>
++<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
++<CENTER><H2><A NAME="T">                            Phylogenetic Trees
++</A></H2></CENTER>
++<P>
++</P>
++<P>
++Before calculating a tree, you must have an ALIGNMENT in memory. This can be
++input using the FILE menu, LOAD SEQUENCES option or you should have just
++carried out a full multiple alignment and the alignment is still in memory.
++Remember YOU MUST ALIGN THE SEQUENCES FIRST!!!!
++</P>
++<P>
++The method used is the NJ (Neighbour Joining) method of Saitou and Nei. First
++you calculate distances (percent divergence) between all pairs of sequence from
++a multiple alignment; second you apply the NJ method to the distance matrix.
++</P>
++<P>
++To calculate a tree, use the DRAW N-J TREE option. This gives an UNROOTED tree
++and all branch lengths. The root of the tree can only be inferred by using an
++outgroup (a sequence that you are certain branches at the outside of the tree
++.... certain on biological grounds) OR if you assume a degree of constancy in
++the 'molecular clock', you can place the root in the 'middle' of the tree
++(roughly equidistant from all tips).
++</P>
++<P>
++BOOTSTRAP N-J TREE uses a method for deriving confidence values for the 
++groupings in a tree (first adapted for trees by Joe Felsenstein). It involves
++making N random samples of sites from the alignment (N should be LARGE, e.g.
++500 - 1000); drawing N trees (1 from each sample) and counting how many times
++each grouping from the original tree occurs in the sample trees. You can set N
++using the NUMBER OF BOOTSTRAP TRIALS option in the BOOTSTRAP TREE window. In
++practice, you should use a large number of bootstrap replicates (1000 is
++recommended, even if it means running the program for an hour on a slow 
++computer). You can also supply a seed number for the random number generator
++here. Different runs with the same seed will give the same answer. See the
++documentation for more details.
++</P>
++<P>
++EXCLUDE POSITIONS WITH GAPS? With this option, any alignment positions where
++ANY of the sequences have a gap will be ignored. This means that 'like' will
++be compared to 'like' in all distances, which is highly desirable. It also
++automatically throws away the most ambiguous parts of the alignment, which are
++concentrated around gaps (usually). The disadvantage is that you may throw away
++much of the data if there are many gaps (which is why it is difficult for us to
++make it the default).  
++</P>
++<P>
++CORRECT FOR MULTIPLE SUBSTITUTIONS? For small divergence (say <10%) this option
++makes no difference. For greater divergence, this option corrects for the fact
++that observed distances underestimate actual evolutionary distances. This is
++because, as sequences diverge, more than one substitution will happen at many
++sites. However, you only see one difference when you look at the present day
++sequences. Therefore, this option has the effect of stretching branch lengths
++in trees (especially long branches). The corrections used here (for DNA or
++proteins) are both due to Motoo Kimura. See the documentation for details.  
++</P>
++<P>
++Where possible, this option should be used. However, for VERY divergent
++sequences, the distances cannot be reliably corrected. You will be warned if
++this happens. Even if none of the distances in a data set exceed the reliable
++threshold, if you bootstrap the data, some of the bootstrap distances may
++randomly exceed the safe limit.  
++</P>
++<P>
++SAVE LOG FILE will write the tree calculation scores to a file. The log
++filename is the same as the input sequence filename, with an extension .log
++appended.
++</P>
++<P>
++<H4>
++OUTPUT FORMAT OPTIONS
++</H4>
++</P>
++<P>
++Three different formats are allowed. None of these displays the tree visually.
++You can display the tree using the NJPLOT program distributed with Clustal X
++OR get the PHYLIP package and use the tree drawing facilities there. 
++</P>
++<P> 
++1) CLUSTAL FORMAT TREE. This format is verbose and lists all of the distances
++between the sequences and the number of alignment positions used for each. The
++tree is described at the end of the file. It lists the sequences that are
++joined at each alignment step and the branch lengths. After two sequences are
++joined, it is referred to later as a NODE. The number of a NODE is the number
++of the lowest sequence in that NODE.   
++</P>
++<P>
++2) PHYLIP FORMAT TREE. This format is the New Hampshire format, used by many
++phylogenetic analysis packages. It consists of a series of nested parentheses,
++describing the branching order, with the sequence names and branch lengths. It
++can be read by the NJPLOT program distributed with ClustalX. It can also be
++used by the RETREE, DRAWGRAM and DRAWTREE programs of the PHYLIP package to see
++the trees graphically. This is the same format used during multiple alignment
++for the guide trees. Some other packages that can read and display New
++Hampshire format are TreeTool, TreeView, and Phylowin.
++</P>
++<P>
++3) PHYLIP DISTANCE MATRIX. This format just outputs a matrix of all the
++pairwise distances in a format that can be used by the PHYLIP package. It used
++to be useful when one could not produce distances from protein sequences in the
++Phylip package but is now redundant (PROTDIST of Phylip 3.5 now does this).
++</P>
++<P>
++4) NEXUS FORMAT TREE. This format is used by several popular phylogeny programs,
++including PAUP and MacClade. The format is described fully in:
++Maddison, D. R., D. L. Swofford and W. P. Maddison.  1997.
++NEXUS: an extensible file format for systematic information.
++Systematic Biology 46:590-621.
++</P>
++<P>
++BOOTSTRAP LABELS ON: By default, the bootstrap values are correctly placed on
++the tree branches of the phylip format output tree. The toggle allows them to
++be placed on the nodes, which is incorrect, but some display packages (e.g.
++TreeTool, TreeView and Phylowin) only support node labelling but not branch
++labelling. Care should be taken to note which branches and labels go together. 
++</P>
++<P>
++</P>
++<P>
++</P>
++<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
++<CENTER><H2><A NAME="C">                               Colors
++</A></H2></CENTER>
++<P>
++</P>
++<P>
++Clustal X provides a versatile coloring scheme for the sequence alignment 
++display. The sequences (or profiles) are colored automatically, when they are
++loaded. Sequences can be colored either by assigning a color to specific
++residues, or on the basis of an alignment consensus. In the latter case, the
++alignment consensus is calculated automatically, and the residues in each
++column are colored according to the consensus character assigned to that
++column. In this way, you can choose to highlight, for example, conserved
++hydrophylic or hydrophobic positions in the alignment.
++</P>
++<P>
++The 'rules' used to color the alignment are specified in a COLOR PARAMETER
++FILE. Clustal X automatically looks for a file called 'colprot.par' for protein
++sequences or 'coldna.par' for DNA, in the current directory. (If your running
++under UNIX, it then looks in your home directory, and finally in the
++directories in your PATH environment variable).
++</P>
++<P>
++By default, if no color parameter file is found, protein sequences are colored
++by residue as follows:
++</P>
++<P>
++<PRE>
++	Color			Residue Code
++</P>
++<P>
++	ORANGE			GPST
++	RED			HKR
++	BLUE			FWY
++	GREEN			ILMV
++</PRE>
++</P>
++<P>
++In the case of DNA sequences, the default colors are as follows:
++</P>
++<P>
++<PRE>
++	Color			Residue Code
++</P>
++<P>
++	ORANGE			A
++	RED			C
++	BLUE			T
++	GREEN			G
++</PRE>
++</P>
++<P>
++</P>
++<P>
++The default BACKGROUND COLORING option shows the sequence residues using a
++black character on a colored background. It can be switched off to show
++residues as a colored character on a white background. 
++</P>
++<P>
++Either BLACK AND WHITE or DEFAULT COLOR options can be selected. The Color
++option looks first for the color parameter file (as described above) and, if no
++file is found, uses the default residue-specific colors.
++</P>
++<P>
++You can specify your own coloring scheme by using the LOAD COLOR PARAMETER FILE
++option. The format of the color parameter file is described below.
++</P>
++<P>
++<H4>
++COLOR PARAMETER FILE
++</H4>
++</P>
++<P>
++This file is divided into 3 sections:
++</P>
++<P>
++1) the names and rgb values of the colors
++2) the rules for calculating the consensus
++3) the rules for assigning colors to the residues
++</P>
++<P> 
++An example file is given here.
++</P>
++<P>
++<PRE>
++ --------------------------------------------------------------------
++@rgbindex
++RED          0.9 0.1 0.1
++BLUE         0.1 0.1 0.9
++GREEN        0.1 0.9 0.1
++YELLOW       0.9 0.9 0.0
++</P>
++<P>
++@consensus
++% = 60% w:l:v:i:m:a:f:c:y:h:p
++# = 80% w:l:v:i:m:a:f:c:y:h:p
++- = 50% e:d
+++ = 60% k:r
++q = 50% q:e
++p = 50% p
++n = 50% n
++t = 50% t:s
++</P>
++<P>
++@color
++g = RED
++p = YELLOW
++t = GREEN if t:%:#
++n = GREEN if n
++w = BLUE if %:#:p
++k = RED if +
++ --------------------------------------------------------------------
++</PRE>
++</P>
++<P>
++The first section is optional and is identified by the header @rgbindex. If
++this section exists, each color used in the file must be named and the rgb
++values specified (on a scale from 0 to 1). If the rgb index section is not
++found, the following set of hard-coded colors will be used.
++</P>
++<P>
++<PRE>
++RED          0.9 0.1 0.1
++BLUE         0.1 0.1 0.9
++GREEN        0.1 0.9 0.1
++ORANGE       0.9 0.7 0.3
++CYAN         0.1 0.9 0.9
++PINK         0.9 0.5 0.5
++MAGENTA      0.9 0.1 0.9
++YELLOW       0.9 0.9 0.0
++</PRE>
++</P>
++<P>
++The second section is optional and is identified by the header @consensus. It
++defines how the consensus is calculated.
++</P>
++<P> 
++The format of each consensus parameter is:-
++</P>
++<P> 
++<PRE>
++c = n% residue_list
++</P>
++<P> 
++        where
++              c             is a character used to identify the parameter.
++              n             is an integer value used as the percentage cutoff
++                            point.
++              residue_list  is a list of residues denoted by a single
++                            character, delimited by a colon (:).
++</PRE>
++</P>
++<P> 
++For example:   # = 60% w:l:v:i
++</P>
++<P>
++will assign a consensus character # to any column in the alignment which
++contains more than 60% of the residues w,l,v and i.
++</P>
++<P>        
++</P>
++<P> 
++The third section is identified by the header @color, and defines how colors
++are assigned to each residue in the alignment.
++</P>
++<P> 
++The color parameters can take one of two formats:
++</P>
++<P>
++<PRE>
++1) r = color
++2) r = color if consensus_list
++</P>
++<P> 
++        where
++              r             is a character used to denote a residue.
++              color         is one of the colors in the GDE color lookup table.
++              residue_list  is a list of residues denoted by a single
++                            character, delimited by a colon (:).
++</PRE>
++</P>
++<P> 
++Examples:
++1) g = ORANGE
++</P>
++<P>
++will color all glycines ORANGE, regardless of the consensus.
++</P>
++<P>
++2) w = BLUE if w:%:#
++</P>
++<P>
++will color BLUE any tryptophan which is found in a column with a consensus of
++w, % or #.
++</P>
++<P> 
++</P>
++<P>
++</P>
++<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
++<CENTER><H2><A NAME="Q">                       Alignment Quality Analysis
++</A></H2></CENTER>
++<P>
++</P>
++<P>
++<H3>
++QUALITY SCORES
++</H3>
++</P>
++<P>
++Clustal X provides an indication of the quality of an alignment by plotting
++a 'conservation score' for each column of the alignment. A high score indicates
++a well-conserved column; a low score indicates low conservation. The quality
++curve is drawn below the alignment.
++</P>
++<P>
++Two methods are also provided to indicate single residues or sequence segments
++which score badly in the alignment.
++</P>
++<P> 
++Low-scoring residues are expected to occur at a moderate frequency in all the
++sequences because of their steady divergence due to the natural processes of
++evolution. The most divergent sequences are likely to have the most outliers.
++However, the highlighted residues are especially useful in pointing to
++sequence misalignments. Note that clustering of highlighted residues is a
++strong indication of misalignment. This can arise due to various reasons, for
++example:
++</P>
++<P> 
++        1. Partial or total misalignments caused by a failure in the
++        alignment algorithm. Usually only in difficult alignment cases.
++</P>
++<P> 
++        2. Partial or total misalignments because at least one of the
++        sequences in the given set is partly or completely unrelated to the
++        other sequences. It is up to the user to check that the set of
++        sequences are alignable.
++</P>
++<P>
++        3. Frameshift translation errors in a protein sequence causing local
++        mismatched regions to be heavily highlighted. These are surprisingly
++        common in database entries. If suspected, a 3-frame translation of
++        the source DNA needs to be examined.
++</P>
++<P> 
++Occasionally, highlighted residues may point to regions of some biological
++significance. This might happen for example if a protein alignment contains a
++sequence which has acquired new functions relative to the main sequence set. It
++is important to exclude other explanations, such as error or the natural
++divergence of sequences, before invoking a biological explanation.
++</P>
++<P>
++</P>
++<P>
++<H3>
++LOW-SCORING SEGMENTS
++</H3>
++</P>
++<P>
++Unreliable regions in the alignment can be highlighted using the Low-Scoring
++Segments option. A sequence-weighted profile is used to indicate any segments
++in the sequences which score badly. Because the profile calculation may take
++some time, an option is provided to calculate LOW-SCORING SEGMENTS. The 
++segment display can then be toggled on or off without having to repeat the
++time-consuming calculations.
++</P>
++<P>
++For details of the low-scoring segment calculation, see the CALCULATION section
++below.
++</P>
++<P>
++</P>
++<P>
++<H4>
++LOW-SCORING SEGMENT PARAMETERS
++</H4>
++</P>
++<P>
++MINIMUM LENGTH OF SEGMENTS: short segments (or even single residues) can be
++hidden by increasing the minimum length of segments which will be displayed.
++</P>
++<P>
++DNA MARKING SCALE is used to remove less significant segments from the 
++highlighted display. Increase the scale to display more segments; decrease the
++scale to remove the least significant.
++</P>
++<P>
++</P>
++<P>
++PROTEIN WEIGHT MATRIX: the scoring table which describes the similarity of each
++amino acid to each other. The matrix is used to calculate the sequence-
++weighted profile scores. There are four 'in-built' Log-Odds matrices offered:
++the Gonnet PAM 80, 120, 250, 350 matrices. A more stringent matrix which only
++gives a high score to identities and the most favoured conservative
++substitutions, may be more suitable when the sequences are closely related. For
++more divergent sequences, it is appropriate to use "softer" matrices which give
++a high score to many other frequent substitutions. This  option automatically
++recalculates the low-scoring segments.
++</P>
++<P>
++</P>
++<P>
++DNA WEIGHT MATRIX: Two hard-coded matrices are available:
++</P>
++<P>
++1) IUB. This is the default scoring matrix used by BESTFIT for the comparison
++of nucleic acid sequences. X's and N's are treated as matches to any IUB
++ambiguity symbol. All matches score 1.0; all mismatches for IUB symbols score
++0.9.
++</P>
++<P>
++2) CLUSTALW(1.6). The previous system used by ClustalW, in which matches score
++1.0 and mismatches score 0. All matches for IUB symbols also score 0. 
++</P>
++<P>
++A new matrix can be read from a file on disk, if the filename consists only
++of lower case characters. The values in the new weight matrix should be
++similarities and should be NEGATIVE for infrequent substitutions.
++</P>
++<P> 
++INPUT FORMAT. The format used for a new matrix is the same as the BLAST
++program. Any lines beginning with a # character are assumed to be comments. The
++first non-comment line should contain a list of amino acids in any order, using
++the 1 letter code, followed by a * character. This should be followed by a
++square matrix of scores, with one row and one column for each amino acid. The
++last row and column of the matrix (corresponding to the * character) contain
++the minimum score over the whole matrix.
++</P>
++<P>
++<H4>
++QUALITY SCORE PARAMETERS
++</H4>
++</P>
++<P>
++You can customise the column 'quality scores' plotted underneath the alignment
++display using the following options.
++</P>
++<P>
++SCORE PLOT SCALE: this is a scalar value from 1 to 10, which can be used to
++change the scale of the quality score plot. 
++</P>
++<P>
++RESIDUE EXCEPTION CUTOFF: this is a scalar value from 1 to 10, which can be
++used to change the number of residue exceptions which are highlighted in the
++alignment display. (For an explanation of this cutoff, see the CALCULATION OF
++RESIDUE EXCEPTIONS section below.)
++</P>
++<P>
++PROTEIN WEIGHT MATRIX: the scoring table which describes the similarity of 
++each amino acid to each other. 
++</P>
++<P> 
++DNA WEIGHT MATRIX: two hard-coded matrices are available: IUB and CLUSTALW(1.6).
++</P>
++<P>
++For more information about the weight matrices, see the help above for
++the Low-scoring Segments Weight Matrix.
++</P>
++<P>
++For details of the quality score calculations, see the CALCULATION section
++below.
++</P>
++<P>
++</P>
++<P>
++<STRONG>
++SHOW LOW-SCORING SEGMENTS
++</STRONG>
++</P>
++<P>                       
++The low-scoring segment display can be toggled on or off. This option does not
++recalculate the profile scores.
++</P>
++<P>
++</P>
++<P>
++<STRONG>
++SHOW EXCEPTIONAL RESIDUES
++</STRONG>
++</P>
++<P>                       
++This option highlights individual residues which score badly in the alignment
++quality calculations. Residues which score exceptionally low are highlighted by
++using a white character on a grey background.
++</P>
++<P>
++<STRONG>
++SAVE QUALITY SCORES TO FILE
++</STRONG>
++</P>
++<P>
++The quality scores that are plotted underneath the alignment display can also
++be saved in a text file. Each column in the alignment is written on one line in
++the output file, with the value of the quality score at the end of the line.
++Only the sequences currently selected in the display are written to the file.
++One use for quality scores is to color residues in a protein structure by
++sequence conservation. In this way conserved surface residues can be
++highlighted to locate functional regions such as ligand-binding sites.
++</P>
++<P>
++</P>
++<P>
++<H3>
++CALCULATION OF QUALITY SCORES
++</H3>
++</P>
++<P>
++Suppose we have an alignment of m sequences of length n. Then, the alignment
++can be written as:
++</P>
++<P>
++<PRE>
++        A11 A12 A13 .......... A1n
++        A21 A22 A23 .......... A2n
++        .
++        .
++        Am1 Am2 Am3 .......... Amn
++</PRE>
++</P>
++<P>
++We also have a residue comparison matrix of size R where C(i,j) is the score
++for aligning residue i with residue j.
++</P>
++<P>
++We want to calculate a score for the conservation of the jth position in the
++alignment.
++</P>
++<P>
++To do this, we define an R-dimensional sequence space. For the jth position in 
++the alignment, each sequence consists of a single residue which is assigned a
++point S in the space. S has R dimensions, and for sequence i, the rth dimension
++is defined as:
++</P>
++<P>
++<PRE>
++	Sr =    C(r,Aij)
++</PRE>
++</P>
++<P>
++We then calculate a consensus value for the jth position in the alignment. This
++value X also has R dimensions, and the rth dimension is defined as:
++</P>
++<P>
++<PRE>
++	Xr = (   SUM   (Fij * C(i,r)) ) / m
++               1<=i<=R
++</PRE>
++</P>
++<P>
++where Fij is the count of residues i at position j in the alignment.
++</P>
++<P>
++Now we can calculate the distance Di between each sequence i and the consensus 
++position X in the R-dimensional space.
++</P>
++<P>
++<PRE>
++	Di = SQRT   (   SUM   (Xr - Sr)(Xr - Sr) )
++                      1<=i<=R
++</P>
++<P>
++</PRE>
++</P>
++<P>
++The quality score for the jth position in the alignment is defined as the mean
++of the sequence distances Di.
++</P>
++<P>
++The score is normalised by multiplying by the percentage of sequences which
++have residues (and not gaps) at this position.
++</P>
++<P>
++<H3>
++CALCULATION OF RESIDUE EXCEPTIONS
++</H3>
++</P>
++<P>
++The jth residue of the ith sequence is considered as an exception if the
++distance Di of the sequence from the consensus value P is greater than (Upper
++Quartile + Inter Quartile Range * Cutoff). The value used as a cutoff for
++displaying exceptions can be set from the SCORE PARAMETERS menu. A high cutoff
++value will only display very significant exceptions; a low value will allow
++more, less significant, exceptions to be highlighted.
++</P>
++<P>
++(NB. Sequences which contain gaps at this position are not included in the
++exception calculation.)
++</P>
++<P>
++</P>
++<P>
++<H3>
++CALCULATION OF LOW-SCORING SEGMENTS
++</H3>
++</P>
++<P>
++Suppose we have an alignment of m sequences of length n. Then, the alignment
++can be written as:
++</P>
++<P>
++<PRE>
++        A11 A12 A13 .......... A1n
++        A21 A22 A23 .......... A2n
++        .
++        .
++        Am1 Am2 Am3 .......... Amn
++</PRE>
++</P>
++<P>
++We also have a residue comparison matrix of size R where C(i,j) is the score
++for aligning residue i with residue j.
++</P>
++<P>
++We calculate sequence weights by building a neighbour-joining tree, in which
++branch lengths are proportional to divergence. Summing the branches by branch
++ownership provides the weights. See (Thompson et al., CABIOS, 10, 19 (1994) and
++Henikoff et al.,JMB, 243, 574 1994).
++</P>
++<P>
++To find the low-scoring segments in a sequence Si, we build a weighted profile
++of the remaining sequences in the alignment. Suppose we find residue r at 
++position j in the sequence; then the score for the jth position in the sequence
++is defined as
++</P>
++<P>
++<PRE>
++	Score(Si,j) = Profile(j,r)   where Profile(j,r) is the profile score
++                                       for residue r at position j in the
++                                       alignment.
++</PRE>
++</P>
++<P>
++These residue scores are summed along the sequence in both forward and backward
++directions. If the sum of the scores is positive, then it is reset to zero.
++Segments which score negatively in both directions are considered as 
++'low-scoring' and will be highlighted in the alignment display.
++</P>
++<P>
++</P>
++<P>
++</P>
++<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
++<CENTER><H2><A NAME="9">              Command Line Parameters
++</A></H2></CENTER>
++<CENTER><H3>                DATA (sequences)
++</H3></CENTER>
++<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
++<TR>
++<TD><STRONG>Parameter</STRONG></TD>
++<TD><STRONG><EM>Description</EM></STRONG></TD>
++</TR>
++<TR>
++<TD><TT>-PROFILE1=file.ext  and  -PROFILE2=file.ext  </TT></TD>
++<TD><EM>profiles (aligned sequences)</EM></TD>
++</TR>
++</TABLE></CENTER>
++<CENTER><H3>                VERBS (do things)
++</H3></CENTER>
++<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
++<TR>
++<TD><STRONG>Parameter</STRONG></TD>
++<TD><STRONG><EM>Description</EM></STRONG></TD>
++</TR>
++<TR>
++<TD><TT>-HELP  or -CHECK    </TT></TD>
++<TD><EM>outline the command line parameters</EM></TD>
++</TR>
++<TR>
++<TD><TT>-ALIGN              </TT></TD>
++<TD><EM>do full multiple alignment </EM></TD>
++</TR>
++<TR>
++<TD><TT>-TREE               </TT></TD>
++<TD><EM>calculate NJ tree</EM></TD>
++</TR>
++<TR>
++<TD><TT>-BOOTSTRAP(=n)      </TT></TD>
++<TD><EM>bootstrap a NJ tree (n= number of bootstraps; def. = 1000)</EM></TD>
++</TR>
++<TR>
++<TD><TT>-CONVERT            </TT></TD>
++<TD><EM>output the input sequences in a different file format</EM></TD>
++</TR>
++</TABLE></CENTER>
++<CENTER><H3>                PARAMETERS (set things)
++</H3></CENTER>
++<CENTER><P><STRONG>***General settings:****
++</STRONG></P></CENTER>
++<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
++<TR>
++<TD><STRONG>Parameter</STRONG></TD>
++<TD><STRONG><EM>Description</EM></STRONG></TD>
++</TR>
++<TR>
++<TD><TT>-INTERACTIVE </TT></TD>
++<TD><EM>read command line, then enter normal interactive menus</EM></TD>
++</TR>
++<TR>
++<TD><TT>-QUICKTREE   </TT></TD>
++<TD><EM>use FAST algorithm for the alignment guide tree</EM></TD>
++</TR>
++<TR>
++<TD><TT>-TYPE=       </TT></TD>
++<TD><EM>PROTEIN or DNA sequences</EM></TD>
++</TR>
++<TR>
++<TD><TT>-NEGATIVE    </TT></TD>
++<TD><EM>protein alignment with negative values in matrix</EM></TD>
++</TR>
++<TR>
++<TD><TT>-OUTFILE=    </TT></TD>
++<TD><EM>sequence alignment file name</EM></TD>
++</TR>
++<TR>
++<TD><TT>-OUTPUT=     </TT></TD>
++<TD><EM>GCG, GDE, PHYLIP, PIR or NEXUS</EM></TD>
++</TR>
++<TR>
++<TD><TT>-OUTORDER=   </TT></TD>
++<TD><EM>INPUT or ALIGNED</EM></TD>
++</TR>
++<TR>
++<TD><TT>-CASE=       </TT></TD>
++<TD><EM>LOWER or UPPER (for GDE output only)</EM></TD>
++</TR>
++<TR>
++<TD><TT>-SEQNOS=     </TT></TD>
++<TD><EM>OFF or ON (for Clustal output only)</EM></TD>
++</TR>
++</TABLE></CENTER>
++<CENTER><H3>***Fast Pairwise Alignments:***
++</H3></CENTER>
++<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
++<TR>
++<TD><STRONG>Parameter</STRONG></TD>
++<TD><STRONG><EM>Description</EM></STRONG></TD>
++</TR>
++<TR>
++<TD><TT>-TOPDIAGS=n  </TT></TD>
++<TD><EM>number of best diags.</EM></TD>
++</TR>
++<TR>
++<TD><TT>-WINDOW=n    </TT></TD>
++<TD><EM>window around best diags.</EM></TD>
++</TR>
++<TR>
++<TD><TT>-PAIRGAP=n   </TT></TD>
++<TD><EM>gap penalty</EM></TD>
++</TR>
++<TR>
++<TD><TT>-SCORE=      </TT></TD>
++<TD><EM>PERCENT or ABSOLUTE</EM></TD>
++</TR>
++</TABLE></CENTER>
++<CENTER><H3>***Slow Pairwise Alignments:***
++</H3></CENTER>
++<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
++<TR>
++<TD><STRONG>Parameter</STRONG></TD>
++<TD><STRONG><EM>Description</EM></STRONG></TD>
++</TR>
++<TR>
++<TD><TT>-PWDNAMATRIX= </TT></TD>
++<TD><EM>DNA weight matrix=IUB, CLUSTALW or filename</EM></TD>
++</TR>
++<TR>
++<TD><TT>-PWGAPOPEN=f  </TT></TD>
++<TD><EM>gap opening penalty</EM></TD>
++</TR>
++<TR>
++<TD><TT>-PWGAPEXT=f  </TT></TD>
++<TD><EM>gap opening penalty</EM></TD>
++</TR>
++</TABLE></CENTER>
++<CENTER><H3>***Multiple Alignments:***
++</H3></CENTER>
++<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
++<TR>
++<TD><STRONG>Parameter</STRONG></TD>
++<TD><STRONG><EM>Description</EM></STRONG></TD>
++</TR>
++<TR>
++<TD><TT>-USETREE=    </TT></TD>
++<TD><EM>file for old guide tree</EM></TD>
++</TR>
++<TR>
++<TD><TT>-MATRIX=     </TT></TD>
++<TD><EM>Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename</EM></TD>
++</TR>
++<TR>
++<TD><TT>-DNAMATRIX=  </TT></TD>
++<TD><EM>DNA weight matrix=IUB, CLUSTALW or filename</EM></TD>
++</TR>
++<TR>
++<TD><TT>-GAPOPEN=f   </TT></TD>
++<TD><EM>gap opening penalty</EM></TD>
++</TR>
++<TR>
++<TD><TT>-GAPEXT=f  </TT></TD>
++<TD><EM>gap extension penalty</EM></TD>
++</TR>
++<TR>
++<TD><TT>-ENDGAPS     </TT></TD>
++<TD><EM>no end gap separation pen.</EM></TD>
++</TR>
++<TR>
++<TD><TT>-GAPDIST=n   </TT></TD>
++<TD><EM>gap separation pen. range</EM></TD>
++</TR>
++<TR>
++<TD><TT>-NOPGAP      </TT></TD>
++<TD><EM>residue-specific gaps off</EM></TD>
++</TR>
++<TR>
++<TD><TT>-NOHGAP    </TT></TD>
++<TD><EM>hydrophilic gaps off</EM></TD>
++</TR>
++<TR>
++<TD><TT>-HGAPRESIDUES= </TT></TD>
++<TD><EM>list hydrophilic res.</EM></TD>
++</TR>
++<TR>
++<TD><TT>-MAXDIV=n    </TT></TD>
++<TD><EM>% ident. for delay</EM></TD>
++</TR>
++<TR>
++<TD><TT>-TYPE=       </TT></TD>
++<TD><EM>PROTEIN or DNA</EM></TD>
++</TR>
++<TR>
++<TD><TT>-TRANSWEIGHT=f </TT></TD>
++<TD><EM>transitions weighting</EM></TD>
++</TR>
++</TABLE></CENTER>
++<CENTER><H3>***Profile Alignments:***
++</H3></CENTER>
++<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
++<TR>
++<TD><STRONG>Parameter</STRONG></TD>
++<TD><STRONG><EM>Description</EM></STRONG></TD>
++</TR>
++<TR>
++<TD><TT>-NEWTREE1=    </TT></TD>
++<TD><EM>file for new guide tree for profile1</EM></TD>
++</TR>
++<TR>
++<TD><TT>-NEWTREE2=    </TT></TD>
++<TD><EM>file for new guide tree for profile2</EM></TD>
++</TR>
++<TR>
++<TD><TT>-USETREE1=    </TT></TD>
++<TD><EM>file for old guide tree for profile1</EM></TD>
++</TR>
++<TR>
++<TD><TT>-USETREE2=    </TT></TD>
++<TD><EM>file for old guide tree for profile2</EM></TD>
++</TR>
++</TABLE></CENTER>
++<CENTER><H3>***Sequence to Profile Alignments:***
++</H3></CENTER>
++<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
++<TR>
++<TD><STRONG>Parameter</STRONG></TD>
++<TD><STRONG><EM>Description</EM></STRONG></TD>
++</TR>
++<TR>
++<TD><TT>-NEWTREE=    </TT></TD>
++<TD><EM>file for new guide tree</EM></TD>
++</TR>
++<TR>
++<TD><TT>-USETREE=    </TT></TD>
++<TD><EM>file for old guide tree</EM></TD>
++</TR>
++</TABLE></CENTER>
++<CENTER><H3>***Structure Alignments:***
++</H3></CENTER>
++<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
++<TR>
++<TD><STRONG>Parameter</STRONG></TD>
++<TD><STRONG><EM>Description</EM></STRONG></TD>
++</TR>
++<TR>
++<TD><TT>-NOSECSTR2     </TT></TD>
++<TD><EM>do not use secondary structure/gap penalty mask for profile 2</EM></TD>
++</TR>
++<TR>
++<TD><TT>-SECSTROUT=STRUCTURE or MASK or BOTH or NONE  </TT></TD>
++<TD><EM>output in alignment file</EM></TD>
++</TR>
++<TR>
++<TD><TT>-HELIXGAP=n    </TT></TD>
++<TD><EM>gap penalty for helix core residues </EM></TD>
++</TR>
++<TR>
++<TD><TT>-STRANDGAP=n   </TT></TD>
++<TD><EM>gap penalty for strand core residues</EM></TD>
++</TR>
++<TR>
++<TD><TT>-LOOPGAP=n     </TT></TD>
++<TD><EM>gap penalty for loop regions</EM></TD>
++</TR>
++<TR>
++<TD><TT>-TERMINALGAP=n </TT></TD>
++<TD><EM>gap penalty for structure termini</EM></TD>
++</TR>
++<TR>
++<TD><TT>-HELIXENDIN=n  </TT></TD>
++<TD><EM>number of residues inside helix to be treated as terminal</EM></TD>
++</TR>
++<TR>
++<TD><TT>-HELIXENDOUT=n </TT></TD>
++<TD><EM>number of residues outside helix to be treated as terminal</EM></TD>
++</TR>
++<TR>
++<TD><TT>-STRANDENDIN=n </TT></TD>
++<TD><EM>number of residues inside strand to be treated as terminal</EM></TD>
++</TR>
++<TR>
++<TD><TT>-STRANDENDOUT=n</TT></TD>
++<TD><EM>number of residues outside strand to be treated as terminal </EM></TD>
++</TR>
++</TABLE></CENTER>
++<CENTER><H3>***Trees:***
++</H3></CENTER>
++<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
++<TR>
++<TD><STRONG>Parameter</STRONG></TD>
++<TD><STRONG><EM>Description</EM></STRONG></TD>
++</TR>
++<TR>
++<TD><TT>-SEED=n    </TT></TD>
++<TD><EM>seed number for bootstraps</EM></TD>
++</TR>
++<TR>
++<TD><TT>-KIMURA      </TT></TD>
++<TD><EM>use Kimura's correction</EM></TD>
++</TR>
++<TR>
++<TD><TT>-TOSSGAPS  </TT></TD>
++<TD><EM>ignore positions with gaps</EM></TD>
++</TR>
++<TR>
++<TD><TT>-BOOTLABELS=node OR branch </TT></TD>
++<TD><EM>position of bootstrap values in tree display</EM></TD>
++</TR>
++</TABLE></CENTER>
++</P>
++<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
++<CENTER><H2><A NAME="R">                             References
++</A></H2></CENTER>
++<P>
++</P>
++<P>
++<STRONG>
++The ClustalX program is described in the manuscript:
++</STRONG>
++</P>
++<P>
++Thompson,J.D., Gibson,T.J., Plewniak,F., Jeanmougin,F. and Higgins,D.G. (1997)
++The ClustalX windows interface: flexible strategies for multiple sequence 
+ alignment aided by quality analysis tools. Nucleic Acids Research, 25:4876-4882.
+ </P>
+ <P>
--- clustalx-1.83.orig/debian/patches/trees.c.patch
+++ clustalx-1.83/debian/patches/trees.c.patch
@@ -0,0 +1,13 @@
+Index: clustalw-1.83/trees.c
+===================================================================
+--- clustalw-1.83.orig/trees.c
++++ clustalw-1.83/trees.c
+@@ -1497,7 +1497,7 @@
+ 		fprintf(stdout,"\n           or 3) use the PHYLIP package.");
+ 		fprintf(stdout,"\n\n");
+ 		if (usemenu) 
+-			getstr("Press [RETURN] to continue",dummy);
++			getstr("Press [RETURN] to continue",10,dummy);
+ 	}
+ 
+ 
--- clustalx-1.83.orig/debian/patches/series
+++ clustalx-1.83/debian/patches/series
@@ -0,0 +1,11 @@
+amenu.c.patch
+clustal-help.patch
+clustalw.h.patch
+clustalx.html.patch
+interface.c.patch
+sequence.c.patch
+trees.c.patch
+util.c.patch
+makefile.patch
+clustalx_help.patch
+xmenu.c.patch
--- clustalx-1.83.orig/debian/patches/clustalx_help.patch
+++ clustalx-1.83/debian/patches/clustalx_help.patch
@@ -0,0 +1,1529 @@
+Index: clustalw-1.83/clustalx_help
+===================================================================
+--- clustalw-1.83.orig/clustalx_help
++++ clustalw-1.83/clustalx_help
+@@ -1,4 +1,1524 @@
+ 
++This is the on-line help file for Clustal X (version 1.81), using the NCBI
++Vibrant Toolkit.   
++
++It should be named or defined as: clustalx_help 
++except with MSDOS in which case it should be named ClustalX.HLP
++
++For full details of usage and algorithms, please read the CLUSTALW.DOC file.
++
++
++Toby  Gibson                         EMBL, Heidelberg, Germany.
++Des   Higgins                        UCC, Cork, Ireland.
++Julie Thompson/Francois Jeanmougin   IGBMC, Strasbourg, France.
++
++
++
++
++>>HELP G <<
++                      General help for CLUSTAL X (1.8)
++
++Clustal X is a windows interface for the ClustalW multiple sequence alignment
++program. It provides an integrated environment for performing multiple sequence
++and profile alignments and analysing the results. The sequence alignment is
++displayed in a window on the screen. A versatile coloring scheme has been
++incorporated allowing you to highlight conserved features  in the alignment.
++The pull-down menus at the top of the window allow you to select all the
++options required for traditional multiple sequence and profile alignment.
++
++You can cut-and-paste sequences to change the order of the alignment; you can
++select a subset of sequences to be aligned; you can select a sub-range of the
++alignment to be realigned and inserted back into the original alignment.
++
++Alignment quality analysis can be performed and low-scoring segments or
++exceptional residues can be highlighted.
++
++ClustalX is available for a number of different platforms including: SUN
++Solaris, IRIX5.3 on Silicon Graphics, Digital UNIX on DECStations, Microsoft
++Windows (32 bit) for PC's, Linux ELF for x86 PC's and Macintosh PowerMac. (See
++the README file for Installation instructions.)
++
++
++<H4>
++SEQUENCE INPUT
++</H4>
++
++Sequences and profiles (a term for pre-existing alignments) are input using 
++the FILE menu. Invalid options will be disabled. All sequences must be included
++into 1 file. 7 formats are automatically recognised: NBRF/PIR, EMBL/SWISSPROT,
++Pearson (Fasta), Clustal (*.aln), GCG/MSF (Pileup), GCG9 RSF and GDE flat file.
++All non-alphabetic characters (spaces, digits, punctuation marks) are ignored
++except "-" which is used to indicate a GAP ("." in MSF/RSF).  
++
++<H4>
++SEQUENCE / PROFILE ALIGNMENTS
++</H4>
++
++Clustal X has two modes which can be selected using the switch directly above
++the sequence display: MULTIPLE ALIGNMENT MODE and PROFILE ALIGNMENT MODE.
++
++To do a MULTIPLE ALIGNMENT on a set of sequences, make sure MULTIPLE ALIGNMENT
++MODE is selected. A single sequence data area is then displayed. The ALIGNMENT
++menu then allows you to either produce a guide tree for the alignment, or to do
++a multiple alignment following the guide tree, or to do a full multiple
++alignment.
++
++In PROFILE ALIGNMENT MODE, two sequence data areas are displayed, allowing you
++to align 2 alignments (termed profiles). Profiles are also used to add a new
++sequence to an old alignment, or to use secondary structure to guide the
++alignment process. GAPS in the old alignments are indicated using the "-" 
++character. PROFILES can be input in ANY of the allowed formats; just  use "-"
++(or "." for MSF/RSF) for each gap position. In Profile Alignment Mode, a button
++"Lock Scroll" is displayed which allows you to scroll the two profiles together
++using a single scroll bar. When the Lock Scroll is turned off, the two profiles
++can be scrolled independently.
++
++<H4>
++PHYLOGENETIC TREES
++</H4>
++
++Phylogenetic trees can be calculated from old alignments (read in with "-"
++characters to indicate gaps) OR after a multiple alignment while the alignment
++is still displayed.
++
++<H4>
++ALIGNMENT DISPLAY
++</H4>
++
++The alignment is displayed on the screen with the sequence names on the left
++hand side. The sequence alignment is for display only, it cannot be edited here
++(except for changing the sequence order by cutting-and-pasting on the sequence
++names). 
++
++A ruler is displayed below the sequences, starting at 1 for the first residue
++position (residue numbers in the sequence input file are ignored).
++
++A line above the alignment is used to mark strongly conserved positions. Three
++characters ('*', ':' and '.') are used:
++
++'*' indicates positions which have a single, fully conserved residue
++
++':' indicates that one of the following 'strong' groups is fully conserved:-
++<PRE>
++                 STA  
++                 NEQK  
++                 NHQK  
++                 NDEQ  
++                 QHRK  
++                 MILV  
++                 MILF  
++                 HY  
++                 FYW  
++</PRE>
++
++'.' indicates that one of the following 'weaker' groups is fully conserved:-
++<PRE>
++                 CSA  
++                 ATV  
++                 SAG  
++                 STNK  
++                 STPA  
++                 SGND  
++                 SNDEQK  
++                 NDEQHK  
++                 NEQHRK  
++                 FVLIM  
++                 HFY  
++</PRE>
++
++These are all the positively scoring groups that occur in the Gonnet Pam250
++matrix. The strong and weak groups are defined as strong score >0.5 and weak
++score =<0.5 respectively.
++
++For profile alignments, secondary structure and gap penalty masks are displayed
++above the sequences, if any data is found in the profile input file.
++
++
++>>HELP F <<
++                      Input / Output Files 
++
++LOAD SEQUENCES reads sequences from one of 7 file formats, replacing any
++sequences that are already loaded. All sequences must be in 1 file. The formats
++that are automatically recognised are: NBRF/PIR, EMBL/SWISSPROT, Pearson
++(Fasta), Clustal (*.aln), GCG/MSF (Pileup), GCG9/RSF and GDE flat file.  All
++non-alphabetic characters (spaces, digits, punctuation  marks) are ignored
++except "-" which is used to indicate a GAP ("." in MSF/RSF).
++
++The program tries to automatically recognise the different file formats used
++and to guess whether the sequences are amino acid or nucleotide.  This is not
++always foolproof.
++
++FASTA and NBRF/PIR formats are recognised by having a ">" as the first 
++character in the file.  
++
++EMBL/Swiss Prot formats are recognised by the letters "ID" at the start of the
++file (the token for the entry name field).  
++
++CLUSTAL format is recognised by the word CLUSTAL at the beginning of the file.
++
++GCG/MSF format is recognised by one of the following:
++<UL>
++<LI>
++       - the word PileUp at the start of the file.
++</LI><LI>
++       - the word !!AA_MULTIPLE_ALIGNMENT or !!NA_MULTIPLE_ALIGNMENT
++         at the start of the file.
++</LI><LI>
++       - the word MSF on the first line of the file, and the characters ..
++         at the end of this line.
++</LI>
++</UL>
++ 
++GCG/RSF format is recognised by the word !!RICH_SEQUENCE at the beginning of
++the file.
++
++
++If 85% or more of the characters in the sequence are from A,C,G,T,U or N, the
++sequence will be assumed to be nucleotide.  This works in 97.3% of cases but
++watch out!
++
++APPEND SEQUENCES is only valid in MULTIPLE ALIGNMENT MODE. The input sequences
++do not replace those already loaded, but are appended at the end of the
++alignment.
++
++SAVE SEQUENCES AS... offers the user a choice of one of six output formats:
++CLUSTAL, NBRF/PIR, GCG/MSF, PHYLIP, NEXUS or GDE. All sequences are written
++to a single file. Options are available to save a range of the alignment, 
++switch between UPPER/LOWER case for GDE files, and to output SEQUENCE NUMBERING
++for CLUSTAL files.
++
++LOAD PROFILE 1 reads sequences in the same 7 file formats, replacing any
++sequences already loaded as Profile 1. This option will also remove any
++sequences which are loaded in Profile 2.
++
++LOAD PROFILE 2 reads sequences in the same 7 file formats, replacing any
++sequences already loaded as Profile 2.
++
++SAVE PROFILE 1 AS... is similar to the Save Sequences option except that only
++those sequences in Profile 1 will be written to the output file.
++
++SAVE PROFILE 2 AS... is similar to the Save Sequences option except that only
++those sequences in Profile 2 will be written to the output file.
++
++WRITE ALIGNMENT AS POSTSCRIPT will write the sequence display to a postscript
++format file. This will include any secondary structure / gap penalty mask 
++information and the consensus and ruler lines which are displayed on the
++screen. The Alignment Quality curve can be optionally included in the output
++file.
++
++WRITE PROFILE 1 AS POSTSCRIPT is similar to WRITE ALIGNMENT AS POSTSCRIPT
++except that only the profile 1 display will be printed.
++
++WRITE PROFILE 2 AS POSTSCRIPT is similar to WRITE ALIGNMENT AS POSTSCRIPT
++except that only the profile 2 display will be printed.
++
++
++<H4>
++POSTSCRIPT PARAMETERS
++</H4>
++
++A number of options are available to allow you to configure your postscript
++output file.
++
++PS COLORS FILE:
++
++The exact RGB values required to reproduce the colors used in the alignment
++window will vary from printer to printer. A PS colors file can be specified
++that contains the RGB values for all the colors required by each of your
++postscript printers.
++
++By default, Clustal X looks for a file called 'colprint.par' in the current
++directory (if your running under UNIX, it then looks in your home directory,
++and finally in the directories in your PATH environment variable). If no PS
++colors file is found or a color used on the screen is not defined here, the
++screen RGB values (from the Color Parameter File) are used.
++
++The PS colors file consists of one line for each color to be defined, with the
++color name followed by the RGB values (on a scale of 0 to 1). For example,
++
++RED          0.9 0.1 0.1
++
++Blank lines and comments (lines beginning with a '#' character) are ignored.
++
++
++PAGE SIZE:  The alignment can be displayed on either A4, A3 or US Letter size
++pages.
++
++ORIENTATION: The alignment can be displayed on either a landscape or portrait
++page.
++
++PRINT HEADER: An optional header including the postscript filename, and
++creation date can be printed at the top of each page.
++
++PRINT QUALITY CURVE: The Alignment Quality curve which is displayed underneath
++the alignment on the screen can be included in the postscript output.
++
++PRINT RULER: The ruler which is displayed underneath the alignment on the 
++screen can be included in the postscript output.
++
++PRINT RESIDUE NUMBERS: Sequence residue numbers can be printed at the right
++hand side of the alignment.
++
++RESIZE TO FIT PAGE: By default, the alignment is scaled to fit the page size
++selected. This option can be turned off, in which case a font size of 10 will
++be used for the sequences.
++
++PRINT FROM POSITION/TO: A range of the alignment can be printed. The default
++is to print the full alignment. The first and last residues to be printed are
++specified here.
++
++USE BLOCK LENGTH: The alignment can be divided into blocks of residues. The
++number of residues in a block is specified here. More than one block may then
++be printed on a single page. This is useful for long alignments of a small
++number of sequences. If the block length is set to 0, The alignment will not
++be divided into blocks, but printed across a number of pages.
++
++>>HELP E <<
++                          Editing Alignments
++
++Clustal X allows you to change the order of the sequences in the alignment, by
++cutting-and-pasting the sequence names.
++
++To select a group of sequences to be moved, click on a sequence name and drag
++the cursor until all the required sequences are highlighted. Holding down the
++Shift key when clicking on the first name will add new sequences to those
++already selected.
++
++(Options are provided to Select All Sequences, Select Profile 1 or Select 
++Profile 2.)
++
++The selected sequences can be removed from the alignment by using the EDIT
++menu, CUT option.
++
++To add the cut sequences back into an alignment, select a sequence by clicking
++on the sequence name. The cut sequences will be added to the alignment,
++immediately following the selected sequence, by the EDIT menu, PASTE option.
++
++To add the cut sequences to an empty alignment (eg. when cutting sequences from
++Profile 1 and pasting them to Profile 2), click on the empty sequence name
++display area, and select the EDIT menu, PASTE option as before.
++
++The sequence selection and sequence range selection can be cleared using the
++EDIT menu, CLEAR SEQUENCE SELECTION and CLEAR RANGE SELECTION options
++respectively.
++
++To search for a string of residues in the sequences, select the sequences to be
++searched by clicking on the sequence names. You can then enter the string to
++search for by selecting the SEARCH FOR STRING option. If the string is found in
++any of the sequences selected, the sequence name and column number is printed
++below the sequence display.
++
++In PROFILE ALIGNMENT MODE, the two profiles can be merged (normally done after
++alignment) by selecting ADD PROFILE 2 TO PROFILE 1. The sequences currently
++displayed as Profile 2 will be appended to Profile 1. 
++
++The REMOVE ALL GAPS option will remove all gaps from the sequences currently
++selected.
++WARNING: This option removes ALL gaps, not only those introduced by ClustalX,
++but also those that were read from the input alignment file. Any secondary
++structure information associated with the alignment will NOT be automatically
++realigned.
++
++The REMOVE GAP-ONLY COLUMNS will remove those positions in the alignment which
++contain gaps in all sequences. This can occur as a result of removing divergent
++sequences from an alignment, or if an alignment has been realigned.
++
++>>HELP M <<
++                          Multiple Alignments
++
++Make sure MULTIPLE ALIGNMENT MODE is selected, using the switch directly above
++the sequence display area. Then, use the ALIGNMENT menu to do multiple
++alignments.
++
++Multiple alignments are carried out in 3 stages:
++ 
++1) all sequences are compared to each other (pairwise alignments);
++ 
++2) a dendrogram (like a phylogenetic tree) is constructed, describing the
++approximate groupings of the sequences by similarity (stored in a file).
++ 
++3) the final multiple alignment is carried out, using the dendrogram as a guide.
++
++The 3 stages are carried out automatically by the DO COMPLETE ALIGNMENT option.
++You can skip the first stages (pairwise alignments; guide tree) by using an old
++guide tree file (DO ALIGNMENT FROM GUIDE TREE); or you can just produce the
++guide tree with no final multiple alignment (PRODUCE GUIDE TREE ONLY).
++
++
++REALIGN SELECTED SEQUENCES is used to realign badly aligned sequences in the
++alignment. Sequences can be selected by clicking on the sequence names - see
++Editing Alignments for more details. The unselected sequences are then 'fixed'
++and a profile is made including only the unselected sequences. Each of the
++selected sequences in turn is then realigned to this profile. The realigned
++sequences will be displayed as a group at the end the alignment.
++
++
++REALIGN SELECTED SEQUENCE RANGE is used to realign a small region of the 
++alignment. A residue range can be selected by clicking on the sequence display
++area. A multiple alignment is then performed, following the 3 stages described
++above, but only using the selected residue range. Finally the new alignment of
++the range is pasted back into the full sequence alignment.
++
++By default, gap penalties are used at each end of the subrange in order to 
++penalise terminal gaps. If the REALIGN SEGMENT END GAP PENALTIES option is
++switched off, gaps can be introduced at the ends of the residue range at no
++cost.
++
++
++ALIGNMENT PARAMETERS displays a sub-menu with the following options:
++
++RESET NEW GAPS BEFORE ALIGNMENT will remove any new gaps introduced into the
++sequences during multiple alignment if you wish to change the parameters and
++try again. This only takes effect just before you do a second multiple
++alignment. You can make phylogenetic trees after alignment whether or not this
++is ON. If you turn this OFF, the new gaps are kept even if you do a second
++multiple alignment. This allows you to iterate the alignment gradually.
++Sometimes, the alignment is improved by a second or third pass.
++
++RESET ALL GAPS BEFORE ALIGNMENT will remove all gaps in the sequences including
++gaps which were read in from the sequence input file. This only takes effect
++just before you do a second multiple alignment.  You can make phylogenetic
++trees after alignment whether or not this is ON.  If you turn this OFF, all
++gaps are kept even if you do a second multiple alignment. This allows you to
++iterate the alignment gradually.  Sometimes, the alignment is improved by a
++second or third pass.
++
++
++PAIRWISE ALIGNMENT PARAMETERS control the speed/sensitivity of the initial
++alignments.
++
++MULTIPLE ALIGNMENT PARAMETERS control the gaps in the final multiple
++alignments.
++
++PROTEIN GAP PARAMETERS displays a temporary window which allows you to set
++various parameters only used in the alignment of protein sequences.
++
++(SECONDARY STRUCTURE PARAMETERS, for use with the Profile Alignment Mode only,
++allows you to set various parameters only used with gap penalty masks.)
++
++SAVE LOG FILE will write the alignment calculation scores to a file. The log
++filename is the same as the input sequence filename, with an extension .log
++appended.
++
++
++<H4>
++OUTPUT FORMAT OPTIONS
++</H4>
++
++You can choose from 6 different alignment formats (CLUSTAL, GCG, NBRF/PIR,
++PHYLIP, GDE and NEXUS).  You can choose more than one (or all 6 if you wish).  
++
++CLUSTAL format output is a self explanatory alignment format. It shows the
++sequences aligned in blocks. It can be read in again at a later date to (for
++example) calculate a phylogenetic tree or add in new sequences by profile
++alignment.
++
++GCG output can be used by any of the GCG programs that can work on multiple
++alignments (e.g. PRETTY, PROFILEMAKE, PLOTALIGN). It is the same as the GCG
++.msf format files (multiple sequence file); new in version 7 of GCG.
++
++NEXUS format is used by several phylogeny programs, including PAUP and
++MacClade.
++
++PHYLIP format output can be used for input to the PHYLIP package of Joe 
++Felsenstein.  This is a very widely used package for doing every imaginable
++form of phylogenetic analysis (MUCH more than the the modest introduction
++offered by this program).
++
++NBRF/PIR: this is the same as the standard PIR format with ONE ADDITION. Gap
++characters "-" are used to indicate the positions of gaps in the multiple 
++alignment. These files can be re-used as input in any part of clustal that
++allows sequences (or alignments or profiles) to be read in.  
++
++GDE:  this format is used by the GDE package of Steven Smith and is understood
++by SEQLAB in GCG 9 or later.
++
++GDE OUTPUT CASE: sequences in GDE format may be written in either upper or
++lower case.
++ 
++CLUSTALW SEQUENCE NUMBERS: residue numbers may be added to the end of the
++alignment lines in clustalw format.
++
++OUTPUT ORDER is used to control the order of the sequences in the output
++alignments. By default, it uses the order in which the sequences were aligned
++(from the guide tree/dendrogram), thus automatically grouping closely related
++sequences. It can be switched to be the same as the original input order.
++
++PARAMETER OUTPUT: This option will save all your parameter settings in a
++parameter file (suffix .par) during alignment. The file can be subsequently
++used to rerun ClustalW using the same parameters.
++
++
++<H3>
++ALIGNMENT PARAMETERS
++</H3>
++--------------------
++
++<STRONG>
++PAIRWISE ALIGNMENT PARAMETERS
++</STRONG>
++
++A distance is calculated between every pair of sequences and these are used to
++construct the phylogenetic tree which guides the final multiple alignment. The
++scores are calculated from separate pairwise alignments. These can be
++calculated using 2 methods: dynamic programming (slow but accurate) or by the
++method of Wilbur and Lipman (extremely fast but approximate).   
++
++You can choose between the 2 alignment methods using the PAIRWISE ALIGNMENTS
++option. The slow/accurate method is fast enough for short sequences but will be
++VERY SLOW for many (e.g. >100) long (e.g. >1000 residue) sequences.   
++
++
++<STRONG>
++SLOW-ACCURATE alignment parameters:
++</STRONG>
++
++These parameters do not have any affect on the speed of the alignments. They
++are used to give initial alignments which are then rescored to give percent
++identity scores. These % scores are the ones which are displayed on the 
++screen. The scores are converted to distances for the trees.
++
++Gap Open Penalty:      the penalty for opening a gap in the alignment.
++
++Gap Extension Penalty: the penalty for extending a gap by 1 residue.
++
++Protein Weight Matrix: the scoring table which describes the similarity of 
++each amino acid to each other.
++
++Load protein matrix: allows you to read in a comparison table from a file.
++
++DNA weight matrix: the scores assigned to matches and mismatches (including
++IUB ambiguity codes).
++
++Load DNA matrix: allows you to read in a comparison table from a file.
++
++See the Multiple alignment parameters, MATRIX option below for details of the
++matrix input format.
++
++
++<STRONG>
++FAST-APPROXIMATE alignment parameters:
++</STRONG>
++
++These similarity scores are calculated from fast, approximate, global align-
++ments, which are controlled by 4 parameters. 2 techniques are used to make
++these alignments very fast: 1) only exactly matching fragments (k-tuples) are
++considered; 2) only the 'best' diagonals (the ones with most k-tuple matches)
++are used.
++
++GAP PENALTY:   This is a penalty for each gap in the fast alignments. It has
++little effect on the speed or sensitivity except for extreme values.
++
++K-TUPLE SIZE:  This is the size of exactly matching fragment that is used. 
++INCREASE for speed (max= 2 for proteins; 4 for DNA), DECREASE for sensitivity.
++For longer sequences (e.g. >1000 residues) you may wish to increase the
++default.
++
++TOP DIAGONALS: The number of k-tuple matches on each diagonal (in an imaginary
++dot-matrix plot) is calculated. Only the best ones (with most matches) are used
++in the alignment. This parameter specifies how many. Decrease for speed;
++increase for sensitivity.
++
++WINDOW SIZE:  This is the number of diagonals around each of the 'best' 
++diagonals that will be used. Decrease for speed; increase for sensitivity.
++
++
++<STRONG>
++MULTIPLE ALIGNMENT PARAMETERS
++</STRONG>
++
++These parameters control the final multiple alignment. This is the core of the
++program and the details are complicated. To fully understand the use of the
++parameters and the scoring system, you will have to refer to the documentation.
++
++Each step in the final multiple alignment consists of aligning two alignments 
++or sequences. This is done progressively, following the branching order in the
++GUIDE TREE. The basic parameters to control this are two gap penalties and the
++scores for various identical/non-indentical residues. 
++
++The GAP OPENING and EXTENSION PENALTIES can be set here. These control the 
++cost of opening up every new gap and the cost of every item in a gap.  
++Increasing the gap opening penalty will make gaps less frequent. Increasing 
++the gap extension penalty will make gaps shorter. Terminal gaps are not 
++penalised.
++
++The DELAY DIVERGENT SEQUENCES switch delays the alignment of the most distantly
++related sequences until after the most closely related sequences have  been
++aligned. The setting shows the percent identity level required to delay the
++addition of a sequence; sequences that are less identical than this level to
++any other sequences will be aligned later.
++
++The TRANSITION WEIGHT gives transitions (A<-->G or C<-->T i.e. purine-purine or
++pyrimidine-pyrimidine substitutions) a weight between 0 and 1; a weight of zero
++means that the transitions are scored as mismatches, while a weight of 1 gives
++the transitions the match score. For distantly related DNA sequences, the
++weight should be near to zero; for closely related sequences it can be useful
++to assign a higher score. The default is set to 0.5.
++
++
++The PROTEIN WEIGHT MATRIX option allows you to choose a series of weight
++matrices. For protein alignments, you use a weight matrix to determine the
++similarity of non-identical amino acids. For example, Tyr aligned with Phe is
++usually judged to be 'better' than Tyr aligned with Pro.
++
++There are three 'in-built' series of weight matrices offered. Each consists of
++several matrices which work differently at different evolutionary distances. To
++see the exact details, read the documentation. Crudely, we store several
++matrices in memory, spanning the full range of amino acid distance (from almost
++identical sequences to highly divergent ones). For very similar sequences, it
++is best to use a strict weight matrix which only gives a high score to
++identities and the most favoured conservative substitutions. For more divergent
++sequences, it is appropriate to use "softer" matrices which give a high score
++to many other frequent substitutions.
++
++1) BLOSUM (Henikoff). These matrices appear to be the best available for 
++carrying out data base similarity (homology searches). The matrices currently
++used are: Blosum 80, 62, 45 and 30. BLOSUM was the default in earlier Clustal X
++versions.
++
++2) PAM (Dayhoff). These have been extremely widely used since the late '70s. We
++currently use the PAM 20, 60, 120, 350 matrices.
++
++3) GONNET. These matrices were derived using almost the same procedure as the
++Dayhoff one (above) but are much more up to date and are based on a far larger
++data set. They appear to be more sensitive than the Dayhoff series. We
++currently use the GONNET 80, 120, 160, 250 and 350 matrices. This series is the
++default for Clustal X version 1.8.
++
++We also supply an identity matrix which gives a score of 10 to two identical 
++amino acids and a score of zero otherwise. This matrix is not very useful.
++
++Load protein matrix: allows you to read in a comparison matrix from a file.
++This can be either a single matrix or a series of matrices (see below for
++format). 
++
++
++DNA WEIGHT MATRIX option allows you to select a single matrix (not a series)
++used for aligning nucleic acid sequences. Two hard-coded matrices are available:
++
++1) IUB. This is the default scoring matrix used by BESTFIT for the comparison
++of nucleic acid sequences. X's and N's are treated as matches to any IUB
++ambiguity symbol. All matches score 1.9; all mismatches for IUB symbols score 0.
++
++2) CLUSTALW(1.6). A previous system used by ClustalW, in which matches score
++1.0 and mismatches score 0. All matches for IUB symbols also score 0.
++
++Load DNA matrix: allows you to read in a nucleic acid comparison matrix from a
++file (just one matrix, not a series).
++
++
++SINGLE MATRIX INPUT FORMAT
++The format used for a single matrix is the same as the BLAST program. The
++scores in the new weight matrix should be similarities. You can use negative as
++well as positive values if you wish, although the matrix will be automatically
++adjusted to all positive scores, unless the NEGATIVE MATRIX option is selected.
++Any lines beginning with a # character are assumed to be comments. The first
++non-comment line should contain a list of amino acids in any order, using the 1
++letter code, followed by a * character. This should be followed by a square
++matrix of scores, with one row and one column for each amino acid. The last row
++and column of the matrix (corresponding to the * character) contain the minimum
++score over the whole matrix.
++
++MATRIX SERIES INPUT FORMAT
++ClustalX uses different matrices depending on the mean percent identity of the
++sequences to be aligned. You can specify a series of matrices and the range of
++the percent identity for each matrix in a matrix series file. The file is
++automatically recognised by the word CLUSTAL_SERIES at the beginning of the
++file. Each matrix in the series is then specified on one line which should
++start with the word MATRIX. This is followed by the lower and upper limits of
++the sequence percent identities for which you want to apply the matrix. The
++final entry on the matrix line is the filename of a Blast format matrix file
++(see above for details of the single matrix file format).
++
++Example.
++
++CLUSTAL_SERIES
++ 
++MATRIX 81 100 /us1/user/julie/matrices/blosum80
++MATRIX 61 80 /us1/user/julie/matrices/blosum62
++MATRIX 31 60 /us1/user/julie/matrices/blosum45
++MATRIX 0 30 /us1/user/julie/matrices/blosum30
++
++
++<STRONG>
++PROTEIN GAP PARAMETERS
++</STRONG>
++
++RESIDUE SPECIFIC PENALTIES are amino acid specific gap penalties that reduce or
++increase the gap opening penalties at each position in the alignment or 
++sequence. See the documentation for details. As an example, positions that are
++rich in glycine are more likely to have an adjacent gap than positions that are
++rich in valine.
++
++HYDROPHILIC GAP PENALTIES are used to increase the chances of a gap within a
++run (5 or more residues) of hydrophilic amino acids; these are likely to be
++loop or random coil regions where gaps are more common. The residues that are
++"considered" to be hydrophilic can be entered in HYDROPHILIC RESIDUES.
++
++GAP SEPARATION DISTANCE tries to decrease the chances of gaps being too close
++to each other. Gaps that are less than this distance apart are penalised more
++than other gaps. This does not prevent close gaps; it makes them less frequent,
++promoting a block-like appearance of the alignment.
++
++END GAP SEPARATION treats end gaps just like internal gaps for the purposes of
++avoiding gaps that are too close (set by GAP SEPARATION DISTANCE above). If you
++turn this off, end gaps will be ignored for this purpose. This is useful when
++you wish to align fragments where the end gaps are not biologically meaningful.
++
++
++>>HELP P <<
++                   Profile and Structure Alignments
++   
++By PROFILE ALIGNMENT, we mean alignment using existing alignments. Profile 
++alignments allow you to store alignments of your favourite sequences and add
++new sequences to them in small bunches at a time. A profile is simply an
++alignment of one or more sequences (e.g. an alignment output file from Clustal
++X). Each input can be a single sequence. One or both sets of input sequences
++may include secondary structure assignments or gap penalty masks to guide the
++alignment. 
++
++Make sure PROFILE ALIGNMENT MODE is selected, using the switch directly above
++the sequence display area. Then, use the ALIGNMENT menu to do profile and
++secondary structure alignments.
++
++The profiles can be in any of the allowed input formats with "-" characters
++used to specify gaps (except for GCG/MSF where "." is used).
++
++You have to load the 2 profiles by choosing FILE, LOAD PROFILE 1 and  LOAD
++PROFILE 2. Then ALIGNMENT, ALIGN PROFILE 2 TO PROFILE 1 will align the 2
++profiles to each other. Secondary structure masks in either profile can be used
++to guide the alignment. This option compares all the sequences in profile 1
++with all the sequences in profile 2 in order to build guide trees which will be
++used to calculate sequence weights, and select appropriate alignment parameters
++for the final profile alignment.
++
++You can skip the first stage (pairwise alignments; guide trees) by using old
++guide tree files (ALIGN PROFILES FROM GUIDE TREES). 
++
++The ALIGN SEQUENCES TO PROFILE 1 option will take the sequences in the second
++profile and align them to the first profile, 1 at a time.  This is useful to
++add some new sequences to an existing alignment, or to align a set of sequences
++to a known structure. In this case, the second profile set need not be
++pre-aligned.
++
++You can skip the first stage (pairwise alignments; guide tree) by using an old
++guide tree file (ALIGN SEQUENCES TO PROFILE 1 FROM TREE). 
++
++SAVE LOG FILE will write the alignment calculation scores to a file. The log
++filename is the same as the input sequence filename, with an extension .log
++appended.
++
++The alignment parameters can be set using the ALIGNMENT PARAMETERS menu,
++Pairwise Parameters, Multiple Parameters and Protein Gap Parameters options.
++These are EXACTLY the same parameters as used by the general, automatic
++multiple alignment procedure. The general multiple alignment procedure is
++simply a series of profile alignments. Carrying out a series of profile
++alignments on larger and larger groups of sequences, allows you to manually
++build up a complete alignment, if necessary editing intermediate alignments.
++
++<STRONG>
++SECONDARY STRUCTURE PARAMETERS
++</STRONG>
++
++Use this menu to set secondary structure options. If a solved structure is
++known, it can be used to guide the alignment by raising gap penalties within
++secondary structure elements, so that gaps will preferentially be inserted into
++unstructured surface loop regions. Alternatively, a user-specified gap penalty
++mask can be supplied for a similar purpose.
++
++A gap penalty mask is a series of numbers between 1 and 9, one per position in 
++the alignment. Each number specifies how much the gap opening penalty is to be 
++raised at that position (raised by multiplying the basic gap opening penalty
++by the number) i.e. a mask figure of 1 at a position means no change
++in gap opening penalty; a figure of 4 means that the gap opening penalty is
++four times greater at that position, making gaps 4 times harder to open.
++
++The format for gap penalty masks and secondary structure masks is explained in
++a separate help section.
++
++>>HELP B << 
++            Secondary Structure / Gap Penalty Masks
++
++The use of secondary structure-based penalties has been shown to improve  the
++accuracy of sequence alignment. Clustal X now allows secondary structure/ gap
++penalty masks to be supplied with the input sequences used during profile
++alignment. (NB. The secondary structure information is NOT used during multiple
++sequence alignment). The masks work by raising gap penalties in specified
++regions (typically secondary structure elements) so that gaps are
++preferentially opened in the less well conserved regions (typically surface
++loops).
++
++The USE PROFILE 1(2) SECONDARY STRUCTURE / GAP PENALTY MASK options control
++whether the input 2D-structure information or gap penalty masks will be used
++during the profile alignment.
++
++The OUTPUT options control whether the secondary structure and gap penalty
++masks should be included in the Clustal X output alignments. Showing both is
++useful for understanding how the masks work. The 2D-structure information is
++itself useful in judging the alignment quality and in seeing how residue
++conservation patterns vary with secondary structure. 
++
++The HELIX and STRAND GAP PENALTY options provide the value for raising the gap
++penalty at core Alpha Helical (A) and Beta Strand (B) residues. In CLUSTAL
++format, capital residues denote the A and B core structure notation. Basic gap
++penalties are multiplied by the amount specified.
++
++The LOOP GAP PENALTY option provides the value for the gap penalty in Loops.
++By default this penalty is not raised. In CLUSTAL format, loops are specified
++by "." in the secondary structure notation.
++
++The SECONDARY STRUCTURE TERMINAL PENALTY provides the value for setting the gap
++penalty at the ends of secondary structures. Ends of secondary structures are
++known to grow or shrink, comparing related structures. Therefore by default
++these are given intermediate values, lower than the core penalties. All
++secondary structure read in as lower case in CLUSTAL format gets the reduced
++terminal penalty.
++
++The HELIX and STRAND TERMINAL POSITIONS options specify the range of structure
++termini for the intermediate penalties. In the alignment output, these are
++indicated as lower case. For Alpha Helices, by default, the range spans the 
++end-helical turn (3 residues). For Beta Strands, the default range spans the
++end residue and the adjacent loop residue, since sequence conservation often
++extends beyond the actual H-bonded Beta Strand.
++
++Clustal X can read the masks from SWISS-PROT, CLUSTAL or GDE format input
++files. For many 3-D protein structures, secondary structure information is
++recorded in the feature tables of SWISS-PROT database entries. You should
++always check that the assignments are correct - some are quite inaccurate.
++Clustal X looks for SWISS-PROT HELIX and STRAND assignments e.g.
++
++
++<PRE>
++FT   HELIX       100    115
++FT   STRAND      118    119
++</PRE>
++
++The structure and penalty masks can also be read from CLUSTAL alignment format 
++as comment lines beginning "!SS_" or "!GM_" e.g.
++
++<PRE>
++!SS_HBA_HUMA    ..aaaAAAAAAAAAAaaa.aaaAAAAAAAAAAaaaaaaAaaa.........aaaAAAAAA
++!GM_HBA_HUMA    112224444444444222122244444444442222224222111111111222444444
++HBA_HUMA        VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGK
++</PRE>
++
++Note that the mask itself is a set of numbers between 1 and 9 each of which is 
++assigned to the residue(s) in the same column below. 
++
++In GDE flat file format, the masks are specified as text and the names must
++begin with "SS_ or "GM_.
++
++Either a structure or penalty mask or both may be used. If both are included
++in an alignment, the user will be asked which is to be used.
++
++
++>>HELP T <<
++                            Phylogenetic Trees
++
++Before calculating a tree, you must have an ALIGNMENT in memory. This can be
++input using the FILE menu, LOAD SEQUENCES option or you should have just
++carried out a full multiple alignment and the alignment is still in memory.
++Remember YOU MUST ALIGN THE SEQUENCES FIRST!!!!
++
++The method used is the NJ (Neighbour Joining) method of Saitou and Nei. First
++you calculate distances (percent divergence) between all pairs of sequence from
++a multiple alignment; second you apply the NJ method to the distance matrix.
++
++To calculate a tree, use the DRAW N-J TREE option. This gives an UNROOTED tree
++and all branch lengths. The root of the tree can only be inferred by using an
++outgroup (a sequence that you are certain branches at the outside of the tree
++.... certain on biological grounds) OR if you assume a degree of constancy in
++the 'molecular clock', you can place the root in the 'middle' of the tree
++(roughly equidistant from all tips).
++
++BOOTSTRAP N-J TREE uses a method for deriving confidence values for the 
++groupings in a tree (first adapted for trees by Joe Felsenstein). It involves
++making N random samples of sites from the alignment (N should be LARGE, e.g.
++500 - 1000); drawing N trees (1 from each sample) and counting how many times
++each grouping from the original tree occurs in the sample trees. You can set N
++using the NUMBER OF BOOTSTRAP TRIALS option in the BOOTSTRAP TREE window. In
++practice, you should use a large number of bootstrap replicates (1000 is
++recommended, even if it means running the program for an hour on a slow 
++computer). You can also supply a seed number for the random number generator
++here. Different runs with the same seed will give the same answer. See the
++documentation for more details.
++
++EXCLUDE POSITIONS WITH GAPS? With this option, any alignment positions where
++ANY of the sequences have a gap will be ignored. This means that 'like' will
++be compared to 'like' in all distances, which is highly desirable. It also
++automatically throws away the most ambiguous parts of the alignment, which are
++concentrated around gaps (usually). The disadvantage is that you may throw away
++much of the data if there are many gaps (which is why it is difficult for us to
++make it the default).  
++
++CORRECT FOR MULTIPLE SUBSTITUTIONS? For small divergence (say <10%) this option
++makes no difference. For greater divergence, this option corrects for the fact
++that observed distances underestimate actual evolutionary distances. This is
++because, as sequences diverge, more than one substitution will happen at many
++sites. However, you only see one difference when you look at the present day
++sequences. Therefore, this option has the effect of stretching branch lengths
++in trees (especially long branches). The corrections used here (for DNA or
++proteins) are both due to Motoo Kimura. See the documentation for details.  
++
++Where possible, this option should be used. However, for VERY divergent
++sequences, the distances cannot be reliably corrected. You will be warned if
++this happens. Even if none of the distances in a data set exceed the reliable
++threshold, if you bootstrap the data, some of the bootstrap distances may
++randomly exceed the safe limit.  
++
++SAVE LOG FILE will write the tree calculation scores to a file. The log
++filename is the same as the input sequence filename, with an extension .log
++appended.
++
++<H4>
++OUTPUT FORMAT OPTIONS
++</H4>
++
++Three different formats are allowed. None of these displays the tree visually.
++You can display the tree using the NJPLOT program distributed with Clustal X
++OR get the PHYLIP package and use the tree drawing facilities there. 
++ 
++1) CLUSTAL FORMAT TREE. This format is verbose and lists all of the distances
++between the sequences and the number of alignment positions used for each. The
++tree is described at the end of the file. It lists the sequences that are
++joined at each alignment step and the branch lengths. After two sequences are
++joined, it is referred to later as a NODE. The number of a NODE is the number
++of the lowest sequence in that NODE.   
++
++2) PHYLIP FORMAT TREE. This format is the New Hampshire format, used by many
++phylogenetic analysis packages. It consists of a series of nested parentheses,
++describing the branching order, with the sequence names and branch lengths. It
++can be read by the NJPLOT program distributed with ClustalX. It can also be
++used by the RETREE, DRAWGRAM and DRAWTREE programs of the PHYLIP package to see
++the trees graphically. This is the same format used during multiple alignment
++for the guide trees. Some other packages that can read and display New
++Hampshire format are TreeTool, TreeView, and Phylowin.
++
++3) PHYLIP DISTANCE MATRIX. This format just outputs a matrix of all the
++pairwise distances in a format that can be used by the PHYLIP package. It used
++to be useful when one could not produce distances from protein sequences in the
++Phylip package but is now redundant (PROTDIST of Phylip 3.5 now does this).
++
++4) NEXUS FORMAT TREE. This format is used by several popular phylogeny programs,
++including PAUP and MacClade. The format is described fully in:
++Maddison, D. R., D. L. Swofford and W. P. Maddison.  1997.
++NEXUS: an extensible file format for systematic information.
++Systematic Biology 46:590-621.
++
++BOOTSTRAP LABELS ON: By default, the bootstrap values are correctly placed on
++the tree branches of the phylip format output tree. The toggle allows them to
++be placed on the nodes, which is incorrect, but some display packages (e.g.
++TreeTool, TreeView and Phylowin) only support node labelling but not branch
++labelling. Care should be taken to note which branches and labels go together. 
++
++
++>>HELP C <<
++                               Colors
++
++Clustal X provides a versatile coloring scheme for the sequence alignment 
++display. The sequences (or profiles) are colored automatically, when they are
++loaded. Sequences can be colored either by assigning a color to specific
++residues, or on the basis of an alignment consensus. In the latter case, the
++alignment consensus is calculated automatically, and the residues in each
++column are colored according to the consensus character assigned to that
++column. In this way, you can choose to highlight, for example, conserved
++hydrophylic or hydrophobic positions in the alignment.
++
++The 'rules' used to color the alignment are specified in a COLOR PARAMETER
++FILE. Clustal X automatically looks for a file called 'colprot.par' for protein
++sequences or 'coldna.par' for DNA, in the current directory. (If your running
++under UNIX, it then looks in your home directory, and finally in the
++directories in your PATH environment variable).
++
++By default, if no color parameter file is found, protein sequences are colored
++by residue as follows:
++
++<PRE>
++	Color			Residue Code
++
++	ORANGE			GPST
++	RED			HKR
++	BLUE			FWY
++	GREEN			ILMV
++</PRE>
++
++In the case of DNA sequences, the default colors are as follows:
++
++<PRE>
++	Color			Residue Code
++
++	ORANGE			A
++	RED			C
++	BLUE			T
++	GREEN			G
++</PRE>
++
++
++The default BACKGROUND COLORING option shows the sequence residues using a
++black character on a colored background. It can be switched off to show
++residues as a colored character on a white background. 
++
++Either BLACK AND WHITE or DEFAULT COLOR options can be selected. The Color
++option looks first for the color parameter file (as described above) and, if no
++file is found, uses the default residue-specific colors.
++
++You can specify your own coloring scheme by using the LOAD COLOR PARAMETER FILE
++option. The format of the color parameter file is described below.
++
++<H4>
++COLOR PARAMETER FILE
++</H4>
++
++This file is divided into 3 sections:
++
++1) the names and rgb values of the colors
++2) the rules for calculating the consensus
++3) the rules for assigning colors to the residues
++ 
++An example file is given here.
++
++<PRE>
++ --------------------------------------------------------------------
++@rgbindex
++RED          0.9 0.1 0.1
++BLUE         0.1 0.1 0.9
++GREEN        0.1 0.9 0.1
++YELLOW       0.9 0.9 0.0
++
++@consensus
++% = 60% w:l:v:i:m:a:f:c:y:h:p
++# = 80% w:l:v:i:m:a:f:c:y:h:p
++- = 50% e:d
+++ = 60% k:r
++q = 50% q:e
++p = 50% p
++n = 50% n
++t = 50% t:s
++
++@color
++g = RED
++p = YELLOW
++t = GREEN if t:%:#
++n = GREEN if n
++w = BLUE if %:#:p
++k = RED if +
++ --------------------------------------------------------------------
++</PRE>
++
++The first section is optional and is identified by the header @rgbindex. If
++this section exists, each color used in the file must be named and the rgb
++values specified (on a scale from 0 to 1). If the rgb index section is not
++found, the following set of hard-coded colors will be used.
++
++<PRE>
++RED          0.9 0.1 0.1
++BLUE         0.1 0.1 0.9
++GREEN        0.1 0.9 0.1
++ORANGE       0.9 0.7 0.3
++CYAN         0.1 0.9 0.9
++PINK         0.9 0.5 0.5
++MAGENTA      0.9 0.1 0.9
++YELLOW       0.9 0.9 0.0
++</PRE>
++
++The second section is optional and is identified by the header @consensus. It
++defines how the consensus is calculated.
++ 
++The format of each consensus parameter is:-
++ 
++<PRE>
++c = n% residue_list
++ 
++        where
++              c             is a character used to identify the parameter.
++              n             is an integer value used as the percentage cutoff
++                            point.
++              residue_list  is a list of residues denoted by a single
++                            character, delimited by a colon (:).
++</PRE>
++ 
++For example:   # = 60% w:l:v:i
++
++will assign a consensus character # to any column in the alignment which
++contains more than 60% of the residues w,l,v and i.
++        
++ 
++The third section is identified by the header @color, and defines how colors
++are assigned to each residue in the alignment.
++ 
++The color parameters can take one of two formats:
++
++<PRE>
++1) r = color
++2) r = color if consensus_list
++ 
++        where
++              r             is a character used to denote a residue.
++              color         is one of the colors in the GDE color lookup table.
++              residue_list  is a list of residues denoted by a single
++                            character, delimited by a colon (:).
++</PRE>
++ 
++Examples:
++1) g = ORANGE
++
++will color all glycines ORANGE, regardless of the consensus.
++
++2) w = BLUE if w:%:#
++
++will color BLUE any tryptophan which is found in a column with a consensus of
++w, % or #.
++ 
++
++>>HELP Q <<
++                       Alignment Quality Analysis
++
++<H3>
++QUALITY SCORES
++</H3>
++--------------
++
++Clustal X provides an indication of the quality of an alignment by plotting
++a 'conservation score' for each column of the alignment. A high score indicates
++a well-conserved column; a low score indicates low conservation. The quality
++curve is drawn below the alignment.
++
++Two methods are also provided to indicate single residues or sequence segments
++which score badly in the alignment.
++ 
++Low-scoring residues are expected to occur at a moderate frequency in all the
++sequences because of their steady divergence due to the natural processes of
++evolution. The most divergent sequences are likely to have the most outliers.
++However, the highlighted residues are especially useful in pointing to
++sequence misalignments. Note that clustering of highlighted residues is a
++strong indication of misalignment. This can arise due to various reasons, for
++example:
++ 
++        1. Partial or total misalignments caused by a failure in the
++        alignment algorithm. Usually only in difficult alignment cases.
++ 
++        2. Partial or total misalignments because at least one of the
++        sequences in the given set is partly or completely unrelated to the
++        other sequences. It is up to the user to check that the set of
++        sequences are alignable.
++
++        3. Frameshift translation errors in a protein sequence causing local
++        mismatched regions to be heavily highlighted. These are surprisingly
++        common in database entries. If suspected, a 3-frame translation of
++        the source DNA needs to be examined.
++ 
++Occasionally, highlighted residues may point to regions of some biological
++significance. This might happen for example if a protein alignment contains a
++sequence which has acquired new functions relative to the main sequence set. It
++is important to exclude other explanations, such as error or the natural
++divergence of sequences, before invoking a biological explanation.
++
++
++<H3>
++LOW-SCORING SEGMENTS
++</H3>
++--------------------
++
++Unreliable regions in the alignment can be highlighted using the Low-Scoring
++Segments option. A sequence-weighted profile is used to indicate any segments
++in the sequences which score badly. Because the profile calculation may take
++some time, an option is provided to calculate LOW-SCORING SEGMENTS. The 
++segment display can then be toggled on or off without having to repeat the
++time-consuming calculations.
++
++For details of the low-scoring segment calculation, see the CALCULATION section
++below.
++
++
++<H4>
++LOW-SCORING SEGMENT PARAMETERS
++</H4>
++------------------------------
++
++MINIMUM LENGTH OF SEGMENTS: short segments (or even single residues) can be
++hidden by increasing the minimum length of segments which will be displayed.
++
++DNA MARKING SCALE is used to remove less significant segments from the 
++highlighted display. Increase the scale to display more segments; decrease the
++scale to remove the least significant.
++
++
++PROTEIN WEIGHT MATRIX: the scoring table which describes the similarity of each
++amino acid to each other. The matrix is used to calculate the sequence-
++weighted profile scores. There are four 'in-built' Log-Odds matrices offered:
++the Gonnet PAM 80, 120, 250, 350 matrices. A more stringent matrix which only
++gives a high score to identities and the most favoured conservative
++substitutions, may be more suitable when the sequences are closely related. For
++more divergent sequences, it is appropriate to use "softer" matrices which give
++a high score to many other frequent substitutions. This  option automatically
++recalculates the low-scoring segments.
++
++
++DNA WEIGHT MATRIX: Two hard-coded matrices are available:
++
++1) IUB. This is the default scoring matrix used by BESTFIT for the comparison
++of nucleic acid sequences. X's and N's are treated as matches to any IUB
++ambiguity symbol. All matches score 1.0; all mismatches for IUB symbols score
++0.9.
++
++2) CLUSTALW(1.6). The previous system used by ClustalW, in which matches score
++1.0 and mismatches score 0. All matches for IUB symbols also score 0. 
++
++A new matrix can be read from a file on disk, if the filename consists only
++of lower case characters. The values in the new weight matrix should be
++similarities and should be NEGATIVE for infrequent substitutions.
++ 
++INPUT FORMAT. The format used for a new matrix is the same as the BLAST
++program. Any lines beginning with a # character are assumed to be comments. The
++first non-comment line should contain a list of amino acids in any order, using
++the 1 letter code, followed by a * character. This should be followed by a
++square matrix of scores, with one row and one column for each amino acid. The
++last row and column of the matrix (corresponding to the * character) contain
++the minimum score over the whole matrix.
++
++<H4>
++QUALITY SCORE PARAMETERS
++</H4>
++------------------------
++
++You can customise the column 'quality scores' plotted underneath the alignment
++display using the following options.
++
++SCORE PLOT SCALE: this is a scalar value from 1 to 10, which can be used to
++change the scale of the quality score plot. 
++
++RESIDUE EXCEPTION CUTOFF: this is a scalar value from 1 to 10, which can be
++used to change the number of residue exceptions which are highlighted in the
++alignment display. (For an explanation of this cutoff, see the CALCULATION OF
++RESIDUE EXCEPTIONS section below.)
++
++PROTEIN WEIGHT MATRIX: the scoring table which describes the similarity of 
++each amino acid to each other. 
++ 
++DNA WEIGHT MATRIX: two hard-coded matrices are available: IUB and CLUSTALW(1.6).
++
++For more information about the weight matrices, see the help above for
++the Low-scoring Segments Weight Matrix.
++
++For details of the quality score calculations, see the CALCULATION section
++below.
++
++
++<STRONG>
++SHOW LOW-SCORING SEGMENTS
++</STRONG>
++                       
++The low-scoring segment display can be toggled on or off. This option does not
++recalculate the profile scores.
++
++
++<STRONG>
++SHOW EXCEPTIONAL RESIDUES
++</STRONG>
++                       
++This option highlights individual residues which score badly in the alignment
++quality calculations. Residues which score exceptionally low are highlighted by
++using a white character on a grey background.
++
++<STRONG>
++SAVE QUALITY SCORES TO FILE
++</STRONG>
++
++The quality scores that are plotted underneath the alignment display can also
++be saved in a text file. Each column in the alignment is written on one line in
++the output file, with the value of the quality score at the end of the line.
++Only the sequences currently selected in the display are written to the file.
++One use for quality scores is to color residues in a protein structure by
++sequence conservation. In this way conserved surface residues can be
++highlighted to locate functional regions such as ligand-binding sites.
++
++
++<H3>
++CALCULATION OF QUALITY SCORES
++</H3>
++-----------------------------
++
++Suppose we have an alignment of m sequences of length n. Then, the alignment
++can be written as:
++
++<PRE>
++        A11 A12 A13 .......... A1n
++        A21 A22 A23 .......... A2n
++        .
++        .
++        Am1 Am2 Am3 .......... Amn
++</PRE>
++
++We also have a residue comparison matrix of size R where C(i,j) is the score
++for aligning residue i with residue j.
++
++We want to calculate a score for the conservation of the jth position in the
++alignment.
++
++To do this, we define an R-dimensional sequence space. For the jth position in 
++the alignment, each sequence consists of a single residue which is assigned a
++point S in the space. S has R dimensions, and for sequence i, the rth dimension
++is defined as:
++
++<PRE>
++	Sr =    C(r,Aij)
++</PRE>
++
++We then calculate a consensus value for the jth position in the alignment. This
++value X also has R dimensions, and the rth dimension is defined as:
++
++<PRE>
++	Xr = (   SUM   (Fij * C(i,r)) ) / m
++               1<=i<=R
++</PRE>
++
++where Fij is the count of residues i at position j in the alignment.
++
++Now we can calculate the distance Di between each sequence i and the consensus 
++position X in the R-dimensional space.
++
++<PRE>
++	Di = SQRT   (   SUM   (Xr - Sr)(Xr - Sr) )
++                      1<=i<=R
++
++</PRE>
++
++The quality score for the jth position in the alignment is defined as the mean
++of the sequence distances Di.
++
++The score is normalised by multiplying by the percentage of sequences which
++have residues (and not gaps) at this position.
++
++<H3>
++CALCULATION OF RESIDUE EXCEPTIONS
++</H3>
++---------------------------------
++
++The jth residue of the ith sequence is considered as an exception if the
++distance Di of the sequence from the consensus value P is greater than (Upper
++Quartile + Inter Quartile Range * Cutoff). The value used as a cutoff for
++displaying exceptions can be set from the SCORE PARAMETERS menu. A high cutoff
++value will only display very significant exceptions; a low value will allow
++more, less significant, exceptions to be highlighted.
++
++(NB. Sequences which contain gaps at this position are not included in the
++exception calculation.)
++
++
++<H3>
++CALCULATION OF LOW-SCORING SEGMENTS
++</H3>
++-----------------------------------
++
++Suppose we have an alignment of m sequences of length n. Then, the alignment
++can be written as:
++
++<PRE>
++        A11 A12 A13 .......... A1n
++        A21 A22 A23 .......... A2n
++        .
++        .
++        Am1 Am2 Am3 .......... Amn
++</PRE>
++
++We also have a residue comparison matrix of size R where C(i,j) is the score
++for aligning residue i with residue j.
++
++We calculate sequence weights by building a neighbour-joining tree, in which
++branch lengths are proportional to divergence. Summing the branches by branch
++ownership provides the weights. See (Thompson et al., CABIOS, 10, 19 (1994) and
++Henikoff et al.,JMB, 243, 574 1994).
++
++To find the low-scoring segments in a sequence Si, we build a weighted profile
++of the remaining sequences in the alignment. Suppose we find residue r at 
++position j in the sequence; then the score for the jth position in the sequence
++is defined as
++
++<PRE>
++	Score(Si,j) = Profile(j,r)   where Profile(j,r) is the profile score
++                                       for residue r at position j in the
++                                       alignment.
++</PRE>
++
++These residue scores are summed along the sequence in both forward and backward
++directions. If the sum of the scores is positive, then it is reset to zero.
++Segments which score negatively in both directions are considered as 
++'low-scoring' and will be highlighted in the alignment display.
++
++
++>>HELP 9 <<
++              Command Line Parameters
++
++                DATA (sequences)
++
++-INFILE=file.ext                             :input sequences
++-PROFILE1=file.ext  and  -PROFILE2=file.ext  :profiles (aligned sequences)
++
++
++                VERBS (do things)
++
++-OPTIONS	    :list the command line parameters
++-HELP  or -CHECK    :outline the command line parameters
++-ALIGN              :do full multiple alignment 
++-TREE               :calculate NJ tree
++-BOOTSTRAP(=n)      :bootstrap a NJ tree (n= number of bootstraps; def. = 1000)
++-CONVERT            :output the input sequences in a different file format
++
++
++                PARAMETERS (set things)
++
++***General settings:****
++-INTERACTIVE :read command line, then enter normal interactive menus
++-QUICKTREE   :use FAST algorithm for the alignment guide tree
++-TYPE=       :PROTEIN or DNA sequences
++-NEGATIVE    :protein alignment with negative values in matrix
++-OUTFILE=    :sequence alignment file name
++-OUTPUT=     :GCG, GDE, PHYLIP, PIR or NEXUS
++-OUTORDER=   :INPUT or ALIGNED
++-CASE=       :LOWER or UPPER (for GDE output only)
++-SEQNOS=     :OFF or ON (for Clustal output only)
++
++
++***Fast Pairwise Alignments:***
++-KTUPLE=n      :word size
++-TOPDIAGS=n  :number of best diags.
++-WINDOW=n    :window around best diags.
++-PAIRGAP=n   :gap penalty
++-SCORE=      :PERCENT or ABSOLUTE
++
++
++***Slow Pairwise Alignments:***
++-PWMATRIX=    :Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename
++-PWDNAMATRIX= :DNA weight matrix=IUB, CLUSTALW or filename
++-PWGAPOPEN=f  :gap opening penalty
++-PWGAPEXT=f  :gap opening penalty
++ 
++
++***Multiple Alignments:***
++-NEWTREE=    :file for new guide tree
++-USETREE=    :file for old guide tree
++-MATRIX=     :Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename
++-DNAMATRIX=  :DNA weight matrix=IUB, CLUSTALW or filename
++-GAPOPEN=f   :gap opening penalty
++-GAPEXT=f  :gap extension penalty
++-ENDGAPS     :no end gap separation pen.
++-GAPDIST=n   :gap separation pen. range
++-NOPGAP      :residue-specific gaps off
++-NOHGAP    :hydrophilic gaps off
++-HGAPRESIDUES= :list hydrophilic res.
++-MAXDIV=n    :% ident. for delay
++-TYPE=       :PROTEIN or DNA
++-TRANSWEIGHT=f :transitions weighting
++
++
++***Profile Alignments:***
++-PROFILE      :Merge two alignments by profile alignment
++-NEWTREE1=    :file for new guide tree for profile1
++-NEWTREE2=    :file for new guide tree for profile2
++-USETREE1=    :file for old guide tree for profile1
++-USETREE2=    :file for old guide tree for profile2
++
++
++***Sequence to Profile Alignments:***
++-SEQUENCES   :Sequentially add profile2 sequences to profile1 alignment
++-NEWTREE=    :file for new guide tree
++-USETREE=    :file for old guide tree
++
++
++***Structure Alignments:***
++-NOSECSTR1     :do not use secondary structure/gap penalty mask for profile 1 
++-NOSECSTR2     :do not use secondary structure/gap penalty mask for profile 2
++-SECSTROUT=STRUCTURE or MASK or BOTH or NONE  :output in alignment file
++-HELIXGAP=n    :gap penalty for helix core residues 
++-STRANDGAP=n   :gap penalty for strand core residues
++-LOOPGAP=n     :gap penalty for loop regions
++-TERMINALGAP=n :gap penalty for structure termini
++-HELIXENDIN=n  :number of residues inside helix to be treated as terminal
++-HELIXENDOUT=n :number of residues outside helix to be treated as terminal
++-STRANDENDIN=n :number of residues inside strand to be treated as terminal
++-STRANDENDOUT=n:number of residues outside strand to be treated as terminal 
++
++
++***Trees:***
++-OUTPUTTREE=nj OR phylip OR dist OR nexus
++-SEED=n    :seed number for bootstraps
++-KIMURA      :use Kimura's correction
++-TOSSGAPS  :ignore positions with gaps
++-BOOTLABELS=node OR branch :position of bootstrap values in tree display
++
++
++>>HELP R <<
++                             References
++
++<STRONG>
++The ClustalX program is described in the manuscript:
++</STRONG>
++
++Thompson,J.D., Gibson,T.J., Plewniak,F., Jeanmougin,F. and Higgins,D.G. (1997)
++The ClustalX windows interface: flexible strategies for multiple sequence 
++alignment aided by quality analysis tools. Nucleic Acids Research, 24:4876-4882.
++
++
++<STRONG>
++The ClustalW program is described in the manuscript:
++</STRONG>
++
++Thompson, J.D., Higgins, D.G. and Gibson, T.J. (1994) CLUSTAL W: improving the
++sensitivity of progressive multiple sequence alignment through sequence
++weighting, positions-specific gap penalties and weight matrix choice.  Nucleic
++Acids Research, 22:4673-4680.
++
++
++<STRONG>
++The ClustalV program is described in the manuscript:
++</STRONG>
++
++Higgins,D.G., Bleasby,A.J. and Fuchs,R. (1992) CLUSTAL V: improved software for
++multiple sequence alignment. CABIOS 8,189-191.
++
++
++<STRONG>
++The original Clustal program is described in the manuscripts:
++</STRONG>
++
++Higgins,D.G. and Sharp,P.M. (1989) Fast and sensitive multiple sequence
++alignments on a microcomputer.
++CABIOS 5,151-153.
++
++Higgins,D.G. and Sharp,P.M. (1988) CLUSTAL: a package for performing multiple
++sequence alignment on a microcomputer. Gene 73,237-244.
++
++-------------------------------------------------------------------------------
++<STRONG>
++Some tips on using Clustal X:
++</STRONG>
++
++Jeanmougin,F., Thompson,J.D., Gouy,M., Higgins,D.G. and Gibson,T.J. (1998)
++Multiple sequence alignment with Clustal X. Trends Biochem Sci, 23, 403-5.
++
++<STRONG>
++Some tips on using Clustal W:
++</STRONG>
++
++Higgins, D. G., Thompson, J. D. and Gibson, T. J. (1996) Using CLUSTAL for
++multiple sequence alignments. Methods Enzymol., 266, 383-402.
++
++-------------------------------------------------------------------------------
++<STRONG>
++You can get the latest version of the ClustalX program by anonymous ftp to:
++</STRONG>
++
++ftp-igbmc.u-strasbg.fr
++ftp.embl-heidelberg.de
++ftp.ebi.ac.uk
++
++<STRONG>
++Or, have a look at the following WWW site:
++</STRONG>
++
++http://www-igbmc.u-strasbg.fr/BioInfo/
++
++
+ This is the on-line help file for Clustal X (version 1.83), using the NCBI
+ Vibrant Toolkit.   
+ 
--- clustalx-1.83.orig/debian/patches/xmenu.c.patch
+++ clustalx-1.83/debian/patches/xmenu.c.patch
@@ -0,0 +1,13 @@
+Index: xmenu.c
+===================================================================
+--- ./xmenu.c	(révision 173)
++++ ./xmenu.c	(révision 174)
+@@ -4411,7 +4411,7 @@
+ 	while(TRUE) {
+ 		if(fgets(temp,MAXLINE+1,fd) == NULL) {
+ 			if(!found_help)
+-				error("No help found in help file");
++				error("No help found in help file [%s]",help_file);
+ 			fclose(fd);
+ 			return;
+ 		}
--- clustalx-1.83.orig/debian/patches/amenu.c.patch
+++ clustalx-1.83/debian/patches/amenu.c.patch
@@ -0,0 +1,130 @@
+Index: clustalw-1.83/amenu.c
+===================================================================
+--- clustalw-1.83.orig/amenu.c
++++ clustalw-1.83/amenu.c
+@@ -184,7 +184,7 @@
+ 		fprintf(stdout,"     H. HELP\n");
+ 		fprintf(stdout,"     X. EXIT (leave program)\n\n\n");
+ 		
+-		getstr("Your choice",lin1);
++		getstr("Your choice",MAXLINE+1,lin1);
+ 
+ 		switch(toupper(*lin1)) {
+ 			case '1': seq_input(FALSE);
+@@ -268,7 +268,7 @@
+         fprintf(stdout,"    H.  HELP\n");
+         fprintf(stdout,"    or press [RETURN] to go back to main menu\n\n\n");
+ 
+-        getstr("Your choice",lin1);
++        getstr("Your choice",MAXLINE+1,lin1);
+         if(*lin1 == EOS) return;
+ 
+         switch(toupper(*lin1))
+@@ -361,7 +361,7 @@
+         fprintf(stdout,"    H.  HELP\n");
+         fprintf(stdout,"    or press [RETURN] to go back to main menu\n\n\n");
+ 
+-        getstr("Your choice",lin1);
++        getstr("Your choice",MAXLINE+1,lin1);
+         if(*lin1 == EOS) return;
+ 
+         switch(toupper(*lin1))
+@@ -457,7 +457,7 @@
+ 		fprintf(stdout,"\n\n");
+ 		fprintf(stdout,"     H. HELP\n\n\n");
+ 		
+-		getstr("Enter number (or [RETURN] to exit)",lin2);
++		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
+ 		if( *lin2 == EOS) { 
+ 			return;
+ 		}
+@@ -533,7 +533,7 @@
+                 fprintf(stdout,"--\n");
+ 
+ 
+-                getstr("\n\nEnter number (or [RETURN] to exit)",lin2);
++                getstr("\n\nEnter number (or [RETURN] to exit)",MAXLINE+1,lin2);
+                 if(*lin2 == EOS) return(output_struct_penalties);
+ 
+         	switch(toupper(*lin2))
+@@ -602,7 +602,7 @@
+         fprintf(stdout,"    H.  HELP\n");
+         fprintf(stdout,"    or press [RETURN] to go back to main menu\n\n\n");
+ 
+-        getstr("Your choice",lin1);
++        getstr("Your choice",MAXLINE+1,lin1);
+         if(*lin1 == EOS) return;
+ 
+         switch(toupper(*lin1))
+@@ -677,7 +677,7 @@
+ 	fprintf(stdout,"\n");
+ 	fprintf(stdout,"     H. HELP\n\n\n");	
+ 	
+-		getstr("Enter number (or [RETURN] to exit)",lin2);
++		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
+ 		if(*lin2 == EOS) return;
+ 		
+ 		switch(toupper(*lin2)) {
+@@ -766,7 +766,7 @@
+ 	fprintf(stdout,"\n");
+ 	fprintf(stdout,"     H. HELP\n\n\n");	
+ 	
+-		getstr("Enter number (or [RETURN] to exit)",lin2);
++		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
+ 		if(*lin2 == EOS) return;
+ 		
+ 		switch(toupper(*lin2)) {
+@@ -907,7 +907,7 @@
+ 
+ 		fprintf(stdout,"     H. HELP\n\n\n");
+ 		
+-		getstr("Enter number (or [RETURN] to exit)",lin2);
++		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
+ 		if( *lin2 == EOS) {
+                         if(dnaflag) {
+                                 dna_pw_go_penalty     = pw_go_penalty;
+@@ -1029,7 +1029,7 @@
+                 fprintf(stdout,"     8. Protein Gap Parameters\n\n");
+ 		fprintf(stdout,"     H. HELP\n\n\n");		
+ 
+-		getstr("Enter number (or [RETURN] to exit)",lin2);
++		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
+ 
+ 		if(*lin2 == EOS) {
+ 			if(dnaflag) {
+@@ -1122,7 +1122,7 @@
+ 		fprintf(stdout,"     5. Toggle End Gap Separation         :%s\n\n",(!use_endgaps) ? "OFF" : "ON");
+ 		fprintf(stdout,"     H. HELP\n\n\n");		
+ 
+-		getstr("Enter number (or [RETURN] to exit)",lin2);
++		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
+ 
+ 		if(*lin2 == EOS) return;
+ 		
+@@ -1136,7 +1136,7 @@
+ 			case '3':
+ 				fprintf(stdout,"Hydrophilic Residues Currently: %s\n",hyd_residues);
+ 
+-				getstr("Enter residues (or [RETURN] to quit)",lin1);
++				getstr("Enter residues (or [RETURN] to quit)",MAXLINE+1,lin1);
+                                 if (*lin1 != EOS) {
+                                         for (i=0;i<strlen(hyd_residues) && i<26;i++) {
+                                         c = lin1[i];
+@@ -1188,7 +1188,7 @@
+                 fprintf(stdout,"--\n");
+ 
+ 
+-                getstr("\n\nEnter number (or [RETURN] to exit)",lin2);
++                getstr("\n\nEnter number (or [RETURN] to exit)",MAXLINE+1,lin2);
+                 if(*lin2 == EOS) return(matn);
+ 
+                 i=toupper(*lin2)-'0';
+@@ -1223,7 +1223,7 @@
+ 	fprintf(stdout,"\n%s\n",title);
+ 	strcpy(line,prompt);
+ 	strcat(line, "(y/n) ? [y]");
+-	getstr(line,lin2);
++	getstr(line,MAXLINE+1,lin2);
+ 	if ((*lin2 != 'n') && (*lin2 != 'N'))
+ 		return('y');
+ 	else
--- clustalx-1.83.orig/debian/patches/clustal-help.patch
+++ clustalx-1.83/debian/patches/clustal-help.patch
@@ -0,0 +1,26 @@
+Index: clustalw-1.83/clustalw.c
+===================================================================
+--- clustalw-1.83.orig/clustalw.c
++++ clustalw-1.83/clustalw.c
+@@ -34,7 +34,7 @@
+ #ifdef MSDOS
+         char *help_file_name = "clustalw.hlp";
+ #else
+-        char *help_file_name = "clustalw_help";
++        char *help_file_name = "/usr/share/clustalw/clustalw_help";
+ #endif
+ 
+ sint max_names; /* maximum length of names in current alignment file */
+Index: clustalw-1.83/clustalx.c
+===================================================================
+--- clustalw-1.83.orig/clustalx.c
++++ clustalw-1.83/clustalx.c
+@@ -26,7 +26,7 @@
+ #ifdef MSDOS
+         char *help_file_name = "clustalx.hlp";
+ #else
+-        char *help_file_name = "clustalx_help";
++        char *help_file_name = "/usr/share/clustalw/clustalx_help";
+ #endif
+ 
+ sint max_names; /* maximum length of names in current alignment file */
--- clustalx-1.83.orig/debian/patches/util.c.patch
+++ clustalx-1.83/debian/patches/util.c.patch
@@ -0,0 +1,52 @@
+Index: clustalw-1.83/util.c
+===================================================================
+--- clustalw-1.83.orig/util.c
++++ clustalw-1.83/util.c
+@@ -171,10 +171,18 @@
+ 	return str;
+ }
+ 
+-void getstr(char *instr,char *outstr)
++void getstr(char *instr, int n, char *outstr)
+ {	
++	int sl;
+ 	fprintf(stdout,"%s: ",instr);
+-	gets(outstr);
++	fgets(outstr,n,stdin);
++	/*
++	 * modify outstr for compatibility with prior used (insecure) gets()
++	 */
++	sl=strlen(outstr);
++	if(sl>0 && '\n'==outstr[sl-1]) {
++		outstr[sl-1]=0;
++	}
+ }
+ 
+ double getreal(char *instr,double minx,double maxx,double def)
+@@ -185,7 +193,7 @@
+ 	
+ 	while(TRUE) {
+ 		fprintf(stdout,"%s (%.1f-%.1f)   [%.1f]: ",instr,minx,maxx,def);
+-		gets(line);
++		fgets(line,MAXLINE,stdin);
+ 		status=sscanf(line,"%f",&ret);
+ 		if(status == EOF) return def;
+ 		if(ret>maxx) {
+@@ -210,7 +218,7 @@
+ 	while(TRUE) {
+ 		fprintf(stdout,"%s (%d..%d)    [%d]: ",
+ 		instr,(pint)minx,(pint)maxx,(pint)def);
+-		gets(line);
++		fgets(line,MAXLINE,stdin);
+ 		status=sscanf(line,"%d",&ret);
+ 		if(status == EOF) return def;
+ 		if(ret>maxx) {
+@@ -230,7 +238,7 @@
+ {
+ 	char line[MAXLINE];
+ 	
+-	getstr("\n\nEnter system command",line);
++	getstr("\n\nEnter system command",MAXLINE,line);
+ 	if(*line != EOS)
+ 		system(line);
+ 	fprintf(stdout,"\n\n");
--- clustalx-1.83.orig/debian/patches/makefile.patch
+++ clustalx-1.83/debian/patches/makefile.patch
@@ -0,0 +1,101 @@
+Index: clustalw-1.83/makefile
+===================================================================
+--- clustalw-1.83.orig/makefile
++++ clustalw-1.83/makefile
+@@ -1,7 +1,15 @@
+-install: clustalx clustalw
+ 
+-clean:
+-	rm *.o
++RM=/bin/rm -f
++
++BINDIR=$(DESTDIR)/usr/bin
++XBINDIR=$(DESTDIR)/usr/X11R6/bin
++DOCDIR=$(DESTDIR)/usr/share/doc/clustalw
++XDOCDIR=$(DESTDIR)/usr/share/doc/clustalx
++LIBDIR=$(DESTDIR)/usr/share/clustalw
++MANDIR=$(DESTDIR)/usr/share/man/man1
++XMANDIR=$(DESTDIR)/usr/X11R6/man/man1
++DOCS=clustalv.doc clustalw.doc clustalw.ms README_W
++XDOCS=README_X clustalx.html
+ 
+ OBJECTS = interface.o sequence.o showpair.o malign.o \
+   	util.o trees.o gcgcheck.o prfalign.o pairalign.o \
+@@ -12,25 +20,36 @@
+ 
+ HEADERS = general.h clustalw.h
+ 
+-CC	= cc
+-CFLAGS  = -c -O
++CC	= gcc
++CFLAGS  = -c -O2
++
++MACHINE=$(shell uname -m)
++ifeq ("$(MACHINE)","alpha")
++ # -mieee is for the Alpha only: ClustalW divides by zero (yes, I know it's bad)
++ # and expect the processor to goes on. -mieee tells the Alpha to comply with
++ # the IEEE standard and to shut up about divisions by zero.
++ CFLAGS  +=  -mieee
++endif
++
+ LFLAGS	= -O -lm 
+-NCBI_INC  = /dec/biolo/ncbi/include
+-NCBI_LIB	= /dec/biolo/ncbi/lib
+-CXFLAGS  = -DWIN_MOTIF -I$(NCBI_INC)
+-LXFLAGS	= -L$(NCBI_LIB) -lvibrant -lncbi -lpthread -lXm -lXmu -lXt -lX11 -lm 
++NCBI_INC= /usr/include/ncbi
++NCBI_LIB= /usr/lib
++CXFLAGS	= -DWIN_MOTIF -I$(NCBI_INC)
++LXFLAGS	= -L/usr/X11R6/lib -lvibrant -lncbi -lpthread -lXm -lXmu -lXt -lX11 -lm
+ 
+-clustalw : $(OBJECTS) amenu.o clustalw.o
+-	$(CC) -o $@ $(OBJECTS) amenu.o clustalw.o $(LFLAGS)
++all: clustalx clustalw
+ 
+-interface.o : interface.c $(HEADERS) param.h
+-	$(CC) $(CFLAGS) $*.c
++machine:
++	echo $(MACHINE)
+ 
+-amenu.o : amenu.c $(HEADERS) param.h
+-	$(CC) $(CFLAGS) $*.c
++clustalw : $(OBJECTS) $(XOBJECTS) amenu.o clustalw.o
++	$(CC) -o $@ -I$(NCBI_INC) $(OBJECTS) amenu.o clustalw.o $(LFLAGS)
+ 
+ clustalx : $(OBJECTS) $(XOBJECTS) clustalx.o
+-	$(CC) -o $@ $(OBJECTS) $(XOBJECTS) clustalx.o $(LFLAGS) $(LXFLAGS)
++	$(CC) -o $@ -I$(NCBI_INC) $(OBJECTS) $(XOBJECTS) clustalx.o $(LFLAGS) $(LXFLAGS)
++
++clustalw.o : clustalw.c $(HEADERS)
++	$(CC) $(CFLAGS) $*.c
+ 
+ clustalx.o : clustalx.c $(HEADERS)
+ 	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+@@ -56,6 +75,25 @@
+ trees.o : trees.c $(HEADERS) dayhoff.h
+ 	$(CC) $(CFLAGS) $*.c
+ 
+-.c.o :
+-	$(CC) $(CFLAGS) $?
++
++
++install: all
++	install -d $(BINDIR) $(XBINDIR) $(LIBDIR) $(DOCDIR)/examples $(MANDIR) $(XMANDIR) $(XDOCDIR)
++	install -m 0755 clustalw $(BINDIR)
++	install -m 0755 clustalx $(XBINDIR)
++	install -m 0644 clustalw_help clustalx_help $(LIBDIR)
++	install -m 0644 clustalw.1 $(MANDIR)
++	install -m 0644 clustalx.1 $(MANDIR)
++	install -m 0644 $(DOCS) $(DOCDIR)
++	install -m 0644 $(XDOCS) $(XDOCDIR)
++	cp -a -R tests.clustalw $(DOCDIR)/examples/tests
++
++.PHONY:	clean distclean
++
++clean:
++	$(RM) *.o
++
++distclean: clean
++	$(RM) clustalw clustalx
++	cd tests.clustalw; make clean
+ 
--- clustalx-1.83.orig/debian/patches/sequence.c.patch
+++ clustalx-1.83/debian/patches/sequence.c.patch
@@ -0,0 +1,13 @@
+Index: clustalw-1.83/sequence.c
+===================================================================
+--- clustalw-1.83.orig/sequence.c
++++ clustalw-1.83/sequence.c
+@@ -924,7 +924,7 @@
+ 	static Boolean dnaflag1;
+ 	
+ 	if(usemenu)
+-		getstr("Enter the name of the sequence file",line);
++		getstr("Enter the name of the sequence file",FILENAMELEN+1,line);
+ 	else
+ 		strcpy(line,seqname);
+ 	if(*line == EOS) return -1;
--- clustalx-1.83.orig/debian/patches/interface.c.patch
+++ clustalx-1.83/debian/patches/interface.c.patch
@@ -0,0 +1,226 @@
+Index: clustalw-1.83/interface.c
+===================================================================
+--- clustalw-1.83.orig/interface.c
++++ clustalw-1.83/interface.c
+@@ -1223,8 +1223,7 @@
+ 			while(fgets(temp,MAXLINE+1,help_file)) {
+ 				if(strstr(temp, help_marker)){
+ 				  	if(usemenu) {
+-						fprintf(stdout,"\n");
+-				    		getstr("Press [RETURN] to continue",lin2);
++				    		getstr("\nPress [RETURN] to continue",MAXLINE+1,lin2);
+ 				  	}
+ 					fclose(help_file);
+ 					return;
+@@ -1235,8 +1234,7 @@
+ 				}
+ 			       if(usemenu) {
+ 			          if(nlines >= PAGE_LEN) {
+-				     	   fprintf(stdout,"\n");
+-			 	  	   getstr("Press [RETURN] to continue or  X  to stop",lin2);
++			 	  	   getstr("\nPress [RETURN] to continue or  X  to stop",MAXLINE+1,lin2);
+ 				  	   if(toupper(*lin2) == 'X') {
+ 						   fclose(help_file);
+ 						   return;
+@@ -1247,8 +1245,7 @@
+ 			       }
+ 			}
+ 			if(usemenu) {
+-				fprintf(stdout,"\n");
+-				getstr("Press [RETURN] to continue",lin2);
++				getstr("\nPress [RETURN] to continue",MAXLINE+1,lin2);
+ 			}
+ 			fclose(help_file);
+ 		}
+@@ -1286,8 +1283,7 @@
+                 fputs(temp,stdout);
+                 ++nlines;
+                 if(nlines >= PAGE_LEN) {
+-                        fprintf(stdout,"\n");
+-                        getstr("Press [RETURN] to continue or  X  to stop",lin2);
++                        getstr("\nPress [RETURN] to continue or  X  to stop",MAXLINE+1,lin2);
+                         if(toupper(*lin2) == 'X') {
+                                 fclose(file);
+                                 return;
+@@ -1297,8 +1293,7 @@
+                 }
+         }
+         fclose(file);
+-        fprintf(stdout,"\n");
+-        getstr("Press [RETURN] to continue",lin2);
++        getstr("\nPress [RETURN] to continue",MAXLINE+1,lin2);
+ }
+ 
+ 
+@@ -1747,7 +1742,7 @@
+         FILE *infile;
+ 
+         if(usemenu)
+-                getstr("Enter name of the matrix file",lin2);
++                getstr("Enter name of the matrix file",MAXLINE+1,lin2);
+         else
+                 strcpy(lin2,str);
+ 
+@@ -1773,7 +1768,7 @@
+         FILE *infile;
+ 
+         if(usemenu)
+-                getstr("Enter name of the matrix file",lin2);
++                getstr("Enter name of the matrix file",MAXLINE+1,lin2);
+         else
+                 strcpy(lin2,str);
+ 
+@@ -2163,6 +2158,7 @@
+  
+ {	static char temp[FILENAMELEN+1];
+ 	static char local_prompt[MAXLINE];
++	static char local_prompt_tmp[MAXLINE+FILENAMELEN+1];
+ 	FILE * file_handle;
+ 
+ /*	if (*file_name == EOS) {
+@@ -2174,17 +2170,17 @@
+ 		warning("Output file name is the same as input file.");
+ 		if (usemenu) {
+ 			strcpy(local_prompt,"\n\nEnter new name to avoid overwriting ");
+-			strcat(local_prompt," [%s]: ");          
+-			fprintf(stdout,local_prompt,file_name);
+-			gets(temp);
++			strcat(local_prompt," [%s]");          
++			sprintf(local_prompt_tmp,local_prompt,file_name);
++			getstr(local_prompt_tmp,FILENAMELEN+1,temp);
+ 			if(*temp != EOS) strcpy(file_name,temp);
+ 		}
+ 	}
+ 	else if (usemenu) {
+ 		strcpy(local_prompt,prompt);
+-		strcat(local_prompt," [%s]: ");          
+-		fprintf(stdout,local_prompt,file_name);
+-		gets(temp);
++		strcat(local_prompt," [%s]");          
++		sprintf(local_prompt_tmp,local_prompt,file_name);
++		getstr(local_prompt_tmp,FILENAMELEN+1,temp);
+ 		if(*temp != EOS) strcpy(file_name,temp);
+ 	}
+ 
+@@ -2260,7 +2256,7 @@
+         	}
+         	else {
+                  	if((tree = open_output_file(
+-                	"\nEnter name for new GUIDE TREE           file  ",path,
++                	"\nEnter name for new GUIDE TREE file ",path,
+                 	phylip_name,"dnd")) == NULL) return;
+         	}
+ 	}
+@@ -2327,6 +2323,7 @@
+ { 
+ 	char path[FILENAMELEN+1];
+ 	char tree_name[FILENAMELEN+1],temp[MAXLINE+1];
++	char tmp_msg[MAXLINE+1+300];
+ 	Boolean use_tree;
+ 	FILE *tree;
+ 	sint i,j,count;
+@@ -2383,9 +2380,9 @@
+         	if((tree=fopen(tree_name,"r"))!=NULL) {
+ #endif
+ 		if (usemenu)
+-            	fprintf(stdout,"\nUse the existing GUIDE TREE file,  %s  (y/n) ? [y]: ",
++            	sprintf(tmp_msg,"\nUse the existing GUIDE TREE file,  %s  (y/n) ? [y]",
+                                            tree_name);
+-                gets(temp);
++                getstr(tmp_msg,MAXLINE+1,temp);
+                 if(*temp != 'n' && *temp != 'N') {
+                     strcpy(phylip_name,tree_name);
+                     use_tree = TRUE;
+@@ -2584,6 +2581,7 @@
+ void get_tree(char *phylip_name)
+ {
+ 	char path[FILENAMELEN+1],temp[MAXLINE+1];
++	char tmp_msg[FILENAMELEN+300];
+ 	sint count;
+ 	
+ 	if(empty) {
+@@ -2615,9 +2613,9 @@
+        			strcpy(phylip_name,path);
+        			strcat(phylip_name,"dnd");
+ 
+-            fprintf(stdout,"\nEnter a name for the guide tree file [%s]: ",
++			sprintf(tmp_msg,"\nEnter a name for the guide tree file [%s]",
+                                            phylip_name);
+-                	gets(temp);
++                	getstr(tmp_msg,MAXLINE+1,temp);
+                 	if(*temp != EOS)
+                         	strcpy(phylip_name,temp);
+         	}
+@@ -2685,6 +2683,8 @@
+ 	char path[FILENAMELEN+1];
+ 	char tree_name[FILENAMELEN+1];
+ 	char temp[MAXLINE+1];
++	char tmp_msg[FILENAMELEN+300];
++
+ 	Boolean use_tree1,use_tree2;
+ 	FILE *tree;
+ 	sint count,i,j,dscore;
+@@ -2717,9 +2717,9 @@
+ #else
+         	if((tree=fopen(tree_name,"r"))!=NULL) {
+ #endif
+-            	fprintf(stdout,"\nUse the existing GUIDE TREE file for Profile 1,  %s  (y/n) ? [y]: ",
++            	sprintf(tmp_msg,"\nUse the existing GUIDE TREE file for Profile 1,  %s  (y/n) ? [y]",
+                                            tree_name);
+-                gets(temp);
++                getstr(tmp_msg,MAXLINE+1,temp);
+                 if(*temp != 'n' && *temp != 'N') {
+                     strcpy(p1_tree_name,tree_name);
+                     use_tree1 = TRUE;
+@@ -2739,19 +2739,20 @@
+ 			strcpy(tree_name,path);
+ 			strcat(tree_name,"dnd");
+ #ifdef VMS
+-        	if((tree=fopen(tree_name,"r","rat=cr","rfm=var"))!=NULL) {
++			if((tree=fopen(tree_name,"r","rat=cr","rfm=var"))!=NULL)
+ #else
+-        	if((tree=fopen(tree_name,"r"))!=NULL) {
++			if((tree=fopen(tree_name,"r"))!=NULL)
+ #endif
+-            	fprintf(stdout,"\nUse the existing GUIDE TREE file for Profile 2,  %s  (y/n) ? [y]: ",
+-                                           tree_name);
+-                gets(temp);
+-                if(*temp != 'n' && *temp != 'N') {
+-                    strcpy(p2_tree_name,tree_name);
+-                    use_tree2 = TRUE;
+-                }
+-                fclose(tree);
+-        	}
++			{
++				sprintf(tmp_msg,"\nUse the existing GUIDE TREE file for Profile 2,  %s  (y/n) ? [y]",
++						   tree_name);
++				getstr(tmp_msg,MAXLINE+1,temp);
++				if(*temp != 'n' && *temp != 'N') {
++				    strcpy(p2_tree_name,tree_name);
++				    use_tree2 = TRUE;
++				}
++				fclose(tree);
++			}
+ 		}
+ 		else if (!usemenu && use_tree2_file) {
+ 			use_tree2 = TRUE;
+@@ -4194,6 +4195,7 @@
+ {
+   char parname[FILENAMELEN+1], temp[FILENAMELEN+1];
+   char path[FILENAMELEN+1];
++  char tmp_msg[FILENAMELEN+300];
+   FILE *parout;
+   
+   get_path(seqname,path);
+@@ -4201,9 +4203,9 @@
+   strcat(parname,"par");
+   
+   if(usemenu) {
+-    fprintf(stdout,"\nEnter a name for the parameter output file [%s]: ",
++    sprintf(tmp_msg,"\nEnter a name for the parameter output file [%s]",
+ 	    parname);
+-    gets(temp);
++    getstr(tmp_msg,FILENAMELEN+1,temp);
+     if(*temp != EOS)
+       strcpy(parname,temp);
+   }
--- clustalx-1.83.orig/debian/clustalx.manpages
+++ clustalx-1.83/debian/clustalx.manpages
@@ -0,0 +1 @@
+debian/clustalx.1
--- clustalx-1.83.orig/debian/changelog
+++ clustalx-1.83/debian/changelog
@@ -0,0 +1,201 @@
+clustalx (1.83-4) unstable; urgency=low
+
+  [ Charles Plessy ]
+  * Transient package while Clustal X version > 2.0 is being relicenced.
+    This package is the same as clustalw-1.83-3 except that it only builds
+    the clustalx binary package (plus some minor improvements described in
+    this changelog. In parallel, the Debian clustalw source package will
+    stop building the clustalx binary package.
+  * Association between Clustal X and .aln files:
+    - text/x-clustalw-alignment associated to clustalx in clustalx.desktop.
+    - .aln declared as text/x-clustalw-alignment in clustalx.sharedmimeinfo.
+    - text/x-clustalw-alignment associated to clustalx in clustalx.mime.
+    - debian/rules calls dh_installmime.
+  * debian/control:
+    - Allowed upload by Debian Maintainers.
+    - Added Homepage: field.
+    - Removed clustalw.
+    - Checked conformance with Policy 3.7.3.
+
+  [ Steffen Moeller ]
+  * Updated watch file.
+  * added German translation to desktop file
+
+ -- Charles Plessy <charles-debian-nospam@plessy.org>  Thu, 21 Feb 2008 13:40:20 +0900
+
+clustalw (1.83-3) unstable; urgency=low
+
+  [ Nelson A. de Oliveira ]
+  * Added watch file, debian/rules get-orig-source
+    should be used to download the orig.tar.gz, not uscan.
+
+  [ Charles Plessy ]
+  * Build-depend on libncbi6-dev instead of ncbi-tools6-dev.
+
+  [ Steffen Moeller ]
+  * Added dependency to later version lesstif2 (Closes:#429480).
+  * The recompilation against libvibrant6 was probably
+    already fixed in an earlier version, certainly it is now
+    (Closes:#378483).
+
+ -- Charles Plessy <charles-debian-nospam@plessy.org>  Wed, 12 Sep 2007 16:50:37 +0900
+
+clustalw (1.83-2) unstable; urgency=low
+
+  [ Steffen Moeller ]
+  * Help now works in ClustalX (Closes:#215414).
+  * debian/control:
+    - removed [biology] tag and improved debtags
+    - better formatting/wording
+    - suggesting alignment pretty-printers
+
+  [ Charles Plessy ]
+  * Add Subversion repository URL to debian/control.
+  * FreeDesktop menu added (Closes: #380715)
+  * Acknowledge NMU, thanks to Kai and Fabio. (Closes: #374241, #359712)
+  * Collaborative maintainance: the maintainer is a mailing list, Steffen
+    Moëller and Charles Plessy are uploaders.
+  * Upgraded to debhelper 5.
+  * Modified the copyright file to mention that it is not disallowed to use
+    autobuilders to create binary packages.
+  * Debian Menu transition: Apps/Science becomes Applications/Science/Biology.
+  * Fixed a typo in clustalw.menu (Closes: #428518)
+  * Updated Steffen's email adress.
+  * Using quilt to manage the changes to the sources.
+
+ -- Charles Plessy <charles-debian-nospam@plessy.org>  Sun, 12 Aug 2007 23:06:39 +0900
+
+clustalw (1.83-1.2) unstable; urgency=high
+
+  * Non-maintainer upload.
+  * No changes, let's try to trigger again the builders.
+
+ -- Fabio Tranchitella <kobold@debian.org>  Fri, 10 Nov 2006 09:16:45 +0100
+
+clustalw (1.83-1.1) unstable; urgency=low 
+
+  * Non-maintainer upload.
+  * lesstif1 is deprecated, transition to lesstif2 (Closes: #374241)
+  * Dead link in the package description (Closes: #359712)
+  * Updated standards version, binary-indep rule required
+
+ -- Kai Hendry <hendry@iki.fi>  Mon, 10 Jul 2006 11:04:05 +0900
+
+clustalw (1.83-1) unstable; urgency=low
+
+  * New upstream version
+  * New maintainer
+  * Reapplied patches from 1.82-3
+  * Removed calls to insecure gets
+  * Cosmetics to makefile
+  * Extension of description
+  * Standards-Version: 3.5.10
+  * Increased version of debhelper: Build-Depends: debhelper (>= 4)
+  * Used debian/compat for debhelper version
+  * Replaces *.files by *.install, *.docs, *.manpages and used the
+    apropriate tools in debian/rules
+  * Moved clustalx to /usr/bin because it does not *belong* to the
+    X11 system
+  * Moved the manpage clustalx.1x to clustalx.1 accordingly.
+  * Removed debian/dirs
+  * Added menu entries
+
+ -- Steffen Moeller <moeller@pzr.uni-rostock.de>  Mon, 23 Jun 2003 19:26:29 +0100
+
+clustalw (1.82-3) unstable; urgency=low
+
+  * "My-wifes-Katrin-Birthday"-release (even if I doubt that she is very
+    interested in Debian packaging details)
+  * Fix a buffer override
+    closes: #135255
+
+ -- Andreas Tille <tille@debian.org>  Wed, 27 Feb 2002 07:26:29 +0100
+
+clustalw (1.82-2) unstable; urgency=low
+
+  * also compile GUI clustalx which is included in the sources now
+
+ -- Andreas Tille <tille@debian.org>  Sat,  3 Nov 2001 14:41:50 +0100
+
+clustalw (1.82-1) unstable; urgency=low
+
+  * New maintainer
+    closes: #100213
+  * New upstream version
+    closes: #40904
+  * Standards-Version: 3.5.6
+  * Moved help file to /usr/share/clustalw/clustalw_help because it
+    is architecture independend
+  * Added URL to the package description because I consider this as
+    "good style" to have an upstream link without installing the package
+  * Fixed a typo in the examples Makefile which was shiped with the
+    Debian package:
+     clustalw -profile -profile1=nuc.aln -profile2=nuc2.aln -outfile=profile.aln
+                                                      ^ this 2 was missing
+
+ -- Andreas Tille <tille@debian.org>  Mon, 29 Oct 2001 21:40:20 +0100
+
+clustalw (1.7-9) unstable; urgency=low
+
+  * Maintainer set to Debian QA Group <packages@qa.debian.org>.
+
+ -- Adrian Bunk <bunk@fs.tum.de>  Fri, 24 Aug 2001 21:09:56 +0200
+
+clustalw (1.7-8) unstable; urgency=low
+
+  * Adopted by new maintainer; closes: #92790 
+  * Updated to latest standards version and added Build-Depends (changed
+    makefile, control, dirs, docs, and rules); closes: #91132, #91411
+  * Deleted empty README.Debian file.
+  * Corrected doc path in manpage.
+  * Moved package to section non-free/science, because it is a tool that
+    is exclusively useful for molecular biologists.
+
+ -- Dr. Guenter Bechly <gbechly@debian.org>  Fri, 20 Apr 2001 19:08:00 +0200
+
+clustalw (1.7-7) unstable; urgency=low
+
+  * Better extended description. Closes #31475.
+  * Manual page. Closes #31382.
+
+ -- Stephane Bortzmeyer <bortzmeyer@debian.org>  Wed, 31 Mar 1999 10:50:02 +0200
+
+clustalw (1.7-6) unstable; urgency=low
+
+  * Yet another attempt to get past rejection :-) Licence is now included
+    in the copyright file.
+
+ -- Stephane Bortzmeyer <bortzmeyer@debian.org>  Sat, 19 Dec 1998 15:29:40 +0100
+
+clustalw (1.7-5) unstable; urgency=low
+
+  * A LICENCE is now included, non-free unfortunately.
+
+ -- Stephane Bortzmeyer <bortzmeyer@debian.org>  Thu, 17 Dec 1998 14:48:43 +0100
+
+clustalw (1.7-4) unstable; urgency=low
+
+  * Compiles with -mieee on the Alpha: otherwise, floating exception since 
+    ClustalW divides by zero (yuck!)
+
+ -- Stephane Bortzmeyer <bortzmeyer@debian.org>  Wed,  2 Dec 1998 16:16:02 +0100
+
+clustalw (1.7-3) unstable; urgency=low
+
+  * Tests and samples added
+  * On line help file added 
+
+ -- Stephane Bortzmeyer <bortzmeyer@debian.org>  Tue,  1 Dec 1998 15:57:50 +0100
+
+clustalw (1.7-2) unstable; urgency=low
+
+  * Switch to dh_make
+  * First public release
+
+ -- Stephane Bortzmeyer <bortzmeyer@debian.org>  Tue,  1 Dec 1998 15:25:13 +0100
+
+clustalw (1.7-1) unstable; urgency=low
+
+  * Initial Release.
+
+ -- Stephane Bortzmeyer <bortzmeyer@pasteur.fr>  Fri, 28 Aug 1998 16:09:48 +0200
--- clustalx-1.83.orig/debian/copyright
+++ clustalx-1.83/debian/copyright
@@ -0,0 +1,76 @@
+This package was debianized by Andreas Tille <tille@debian.org> on
+Sat, 27 Oct 2001 22:16:53 +0200
+
+It was downloaded from:
+
+       ftp://ftp-igbmc.u-strasbg.fr/pub/ClustalX/  and
+       ftp://ftp-igbmc.u-strasbg.fr/pub/ClustalW/
+
+while the source was merged to one common upstream source (see README.Debian)
+
+Authors: 
+Toby Gibson <Toby.Gibson@EMBL-Heidelberg.de>
+Julie Thompson <julie@titus.u-strasbg.fr>
+Des Higgins <d.higgins@ucc.ie>
+
+Copyright:
+
+Non-free. You cannot distribute it at will.
+
+Debian holds a special exemption for distributing (see below). The licence does
+not forbid Debian from using autobuilders to create binary packages.
+
+Licence included here:
+
+**********************
+LICENCE FOR CLUSTAL W
+**********************
+
+Clustal W (hereafter "the program") is copyright (c) 1994-1998 by Julie D.
+Thompson, Desmond G. Higgins and Toby J. Gibson.
+
+Permission is granted to copy, distribute and use the program provided no fee
+is charged for it and provided that this copyright and licence notice is not
+removed or altered.
+
+The full source code of the program is provided free. You should not
+distribute a modified version of the program without obtaining the permission
+of the authors. You must keep the original copyright and licence notice. You
+must also document clearly the modifications you have made. You must make
+clear that this is not the original version.
+
+Commercial distributors of Clustal W are requested to contact the Clustal W
+authors in order to take out a non-exclusive licence. See the README file
+included with Clustal W for a rationale.
+
+You should understand that this software is provided as-is. The authors make
+no claims towards its suitability for any purpose and accept absolutely no
+liability for any damages the program may cause. Use at your own risk.
+
+* End of licence
+
+
+
+Special authorization for Debian:
+
+
+
+From: "Toby Gibson" <Toby.Gibson@EMBL-Heidelberg.de>
+Date: Thu, 17 Dec 1998 14:37:02 +0100
+To: Stephane Bortzmeyer <bortzmeyer@debian.org>
+Subject: Re: Fwd: clustalw_1.7-4_i386.changes REJECTED
+
+Hi Stephane,
+
+Now that we have thought about it, I don't think we can meet your stricter free
+criterion. There are already several companies who bundle Clustal W in sequence
+analysis packages and so are effectively selling it. They have paid for
+non-exclusive licences even though anyone can get the program for free: but
+they must have a multiple alignment engine, so we might as well earn some money
+which we can put toward further development.
+
+I think the main thing is to allow the distribution at all by Debian. We seem
+to have reached this point.
+
+Please do include this licence in the Debian package and I hope the release can
+go smoothly from now on.
--- clustalx-1.83.orig/debian/clustalx.docs
+++ clustalx-1.83/debian/clustalx.docs
@@ -0,0 +1,2 @@
+README_X
+clustalx.html
--- clustalx-1.83.orig/debian/watch
+++ clustalx-1.83/debian/watch
@@ -0,0 +1,2 @@
+version=3
+ftp://ftp.ebi.ac.uk/pub/software/clustalw2/clustalx-(.*)-src\.tar\.gz
--- clustalx-1.83.orig/debian/clustalx.dirs
+++ clustalx-1.83/debian/clustalx.dirs
@@ -0,0 +1 @@
+usr/share/applications
--- clustalx-1.83.orig/debian/rules
+++ clustalx-1.83/debian/rules
@@ -0,0 +1,51 @@
+#!/usr/bin/make -f
+
+include /usr/share/quilt/quilt.make
+
+build: patch build-stamp
+build-stamp:
+	dh_testdir
+	$(MAKE)
+	touch build-stamp
+
+clean: unpatch
+	dh_testdir
+	dh_testroot
+	[ ! -f Makefile ] || $(MAKE) distclean
+	dh_clean build-stamp
+
+install:
+install: build
+	dh_testdir
+	dh_testroot
+	dh_clean -k
+	dh_installdirs
+	dh_install
+
+binary-arch: build install
+	dh_testdir
+	dh_testroot
+	dh_installdocs
+	dh_installexamples
+	dh_installmenu
+	dh_desktop
+	dh_installmime
+	dh_installman
+	dh_installchangelogs
+	dh_strip
+	dh_link
+	dh_compress
+	dh_fixperms
+	dh_installdeb
+	dh_shlibdeps
+	dh_gencontrol
+	dh_md5sums
+	dh_builddeb
+
+get-orig-source:
+	destdir=.;if [ -d debian ]; then destdir=..; fi; \
+	lynx --dump ftp://ftp.ebi.ac.uk/pub/software/unix/clustalx/clustalx1.83.sun.tar.gz | tar --delete clustalx1.83.sun/clustalx clustalx1.83.sun/clustalw  -f - | gzip -c > $$destdir/clustalw_1.83.orig.tar.gz
+
+binary: binary-arch
+binary-indep: # does nothing
+.PHONY: build clean binary-arch binary install
--- clustalx-1.83.orig/debian/clustalx.mime
+++ clustalx-1.83/debian/clustalx.mime
@@ -0,0 +1 @@
+text/x-clustalw-alignment; clustalx '%s'; description=Clustal W multiple sequence alignment; nametemplate=%s.aln
--- clustalx-1.83.orig/debian/compat
+++ clustalx-1.83/debian/compat
@@ -0,0 +1 @@
+5
--- clustalx-1.83.orig/debian/clustalx.sharedmimeinfo
+++ clustalx-1.83/debian/clustalx.sharedmimeinfo
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<mime-info xmlns='http://www.freedesktop.org/standards/shared-mime-info'>
+  <mime-type type="text/x-clustalw-alignment">
+    <sub-class-of type="text/plain"/>
+    <comment>Multiple sequence alignment in Clustal W format</comment>
+    <comment xml:lang="fr">Alignement multiple de séquences au format Clustal W</comment>
+    <magic priority="50">
+      <match type="string" offset="0" value="CLUSTAL"/>
+    </magic>
+    <glob pattern="*.aln"/>
+  </mime-type>
+</mime-info>
--- clustalx-1.83.orig/debian/clustalx.desktop
+++ clustalx-1.83/debian/clustalx.desktop
@@ -0,0 +1,19 @@
+[Desktop Entry]
+Version=1.0
+Terminal=false
+Icon=clustalx
+Exec=clustalx
+Name=Clustal X
+GenericName=Alignment and phylogeny
+GenericName[de]=Alignment und Phylogenie
+GenericName[en]=Alignment and phylogeny
+GenericName[fr]=Alignement et phylogénèse
+GenericName[pt_BR]=Alinhamento e filogenia
+Comment=Global multiple nucleotide or peptide sequence alignment and phyolgenetic analysis
+Comment[de]=Globales multiples Nucleotid- oder Peptid-Sequenzalignment and phylogenetische Analyse
+Comment[en]=Global multiple nucleotide or peptide sequence alignment and phylogenetic analysis
+Comment[fr]=Alignement global de séquences peptidiques ou nucléotidiques et analyse phylogénétique
+Comment[pt_BR]=Alinhamento global de sequências peptídicas ou nucleotídeos e análise filogenética
+Type=Application
+Categories=Biology;Science;Education;
+MimeType=text/x-clustalw-alignment;
--- clustalx-1.83.orig/debian/clustalx.install
+++ clustalx-1.83/debian/clustalx.install
@@ -0,0 +1,4 @@
+clustalx		usr/bin
+clustalx_help		usr/share/clustalw
+debian/clustalx.desktop	usr/share/applications
+