<?xml version='1.0'?>
<!DOCTYPE art SYSTEM 'http://www.biomedcentral.com/xml/article.dtd'>
<art>
   <ui>gb-2007-8-4-r52</ui>
   <ji>GBJ</ji>
   <fm>
      <dochead>Research</dochead>
      <bibl>
         <title>
            <p>Assembly of the <it>Candida albicans </it>genome into sixteen supercontigs aligned on the eight chromosomes</p>
         </title>
         <aug>
            <au id="A1" ce="yes">
               <snm>van het Hoog</snm>
               <fnm>Marco</fnm>
               <insr iid="I1"/>
               <email>marco@vanhethoog.com</email>
            </au>
            <au id="A2" ce="yes">
               <snm>Rast</snm>
               <mi>J</mi>
               <fnm>Timothy</fnm>
               <insr iid="I2"/>
               <email>rastx001@umn.edu</email>
            </au>
            <au id="A3">
               <snm>Martchenko</snm>
               <fnm>Mikhail</fnm>
               <insr iid="I1"/>
               <email>mikhail.martchenko@cnrc-nrc.gc.ca</email>
            </au>
            <au id="A4">
               <snm>Grindle</snm>
               <fnm>Suzanne</fnm>
               <insr iid="I2"/>
               <email>grind001@umn.edu</email>
            </au>
            <au id="A5">
               <snm>Dignard</snm>
               <fnm>Daniel</fnm>
               <insr iid="I1"/>
               <email>daniel.dignard@cnrc-nrc.gc.ca</email>
            </au>
            <au id="A6">
               <snm>Hogues</snm>
               <fnm>Herv&#233;</fnm>
               <insr iid="I1"/>
               <email>herve@bri.nrc.ca</email>
            </au>
            <au id="A7">
               <snm>Cuomo</snm>
               <fnm>Christine</fnm>
               <insr iid="I3"/>
               <email>cuomo@broad.mit.edu</email>
            </au>
            <au id="A8">
               <snm>Berriman</snm>
               <fnm>Matthew</fnm>
               <insr iid="I4"/>
               <email>mb4@sanger.ac.uk</email>
            </au>
            <au id="A9">
               <snm>Scherer</snm>
               <fnm>Stewart</fnm>
               <insr iid="I5"/>
               <email>stew@sscherer.com</email>
            </au>
            <au id="A10">
               <snm>Magee</snm>
               <fnm>BB</fnm>
               <insr iid="I2"/>
               <email>magee002@umn.edu</email>
            </au>
            <au id="A11">
               <snm>Whiteway</snm>
               <fnm>Malcolm</fnm>
               <insr iid="I1"/>
               <email>Malcolm.Whiteway@cnrc-nrc.gc.ca</email>
            </au>
            <au id="A12">
               <snm>Chibana</snm>
               <fnm>Hiroji</fnm>
               <insr iid="I6"/>
               <email>chibana@myco.pf.chiba-u.ac.jp</email>
            </au>
            <au id="A13">
               <snm>Nantel</snm>
               <fnm>Andr&#233;</fnm>
               <insr iid="I1"/>
               <email>andre.nantel@nrc-cnrc.gc.ca</email>
            </au>
            <au id="A14" ca="yes">
               <snm>Magee</snm>
               <fnm>PT</fnm>
               <insr iid="I2"/>
               <email>magee@umn.edu</email>
            </au>
         </aug>
         <insg>
            <ins id="I1">
               <p>Biotechnology Research Institute, National Research Council of Canada, Montreal, Quebec, H4P 2R2, Canada</p>
            </ins>
            <ins id="I2">
               <p>University of Minnesota, Minneapolis, MN, 55455, USA</p>
            </ins>
            <ins id="I3">
               <p>Broad Institute of MIT and Harvard, Cambridge, MA, USA</p>
            </ins>
            <ins id="I4">
               <p>Wellcome Trust Sanger Institute, Hinxton, CB10 1SA, UK</p>
            </ins>
            <ins id="I5">
               <p>Paseo Grande, Moraga, CA 94556, USA</p>
            </ins>
            <ins id="I6">
               <p>Research Center for Pathogenic Fungi and Microbial Toxicoses, Chiba University, Chiba, 260-8673, Japan</p>
            </ins>
         </insg>
         <source>Genome Biology</source>
         <issn>1465-6906</issn>
         <pubdate>2007</pubdate>
         <volume>8</volume>
         <issue>4</issue>
         <fpage>R52</fpage>
         <url>http://genomebiology.com/2007/8/4/R52</url>
         <xrefbib>
            <pubidlist>
               <pubid idtype="pmpid">17419877</pubid>
               <pubid idtype="doi">10.1186/gb-2007-8-4-r52</pubid>
            </pubidlist>
         </xrefbib>
      </bibl>
      <history>
         <rec>
            <date>
               <day>6</day>
               <month>10</month>
               <year>2006</year>
            </date>
         </rec>
         <revrec>
            <date>
               <day>28</day>
               <month>2</month>
               <year>2007</year>
            </date>
         </revrec>
         <acc>
            <date>
               <day>9</day>
               <month>4</month>
               <year>2007</year>
            </date>
         </acc>
         <pub>
            <date>
               <day>09</day>
               <month>04</month>
               <year>2007</year>
            </date>
         </pub>
      </history>
      <cpyrt>
         <year>2007</year>
         <collab>van het Hoog et al.; licensee BioMed Central Ltd.</collab>
         <note>This is an open access article distributed under the terms of the Creative Commons Attribution License (<url>http://creativecommons.org/licenses/by/2.0</url>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</note>
      </cpyrt>
      <shorttitle>
         <p>New <it>Candida albicans </it>genome assembly</p>
      </shorttitle>
      <shortabs>
         <p>For Assembly 20 of the <it>Candida albicans </it>genome, the sequence of each of the eight chromosomes was determined, revealing new insights into gene family creation and dispersion, subtelomere organization, and chromosome evolution.</p>
      </shortabs>
      <abs>
         <sec>
            <st>
               <p>Abstract</p>
            </st>
            <sec>
               <st>
                  <p>Background</p>
               </st>
               <p>The 10.9&#215; genomic sequence of <it>Candida albicans</it>, the most important human fungal pathogen, was published in 2004. Assembly 19 consisted of 412 supercontigs, of which 266 were a haploid set, since this fungus is diploid and contains an extensive degree of heterozygosity but lacks a complete sexual cycle. However, sequences of specific chromosomes were not determined.</p>
            </sec>
            <sec>
               <st>
                  <p>Results</p>
               </st>
               <p>Supercontigs from Assembly 19 (183, representing 98.4% of the sequence) were assigned to individual chromosomes purified by pulse-field gel electrophoresis and hybridized to DNA microarrays. Nine Assembly 19 supercontigs were found to contain markers from two different chromosomes. Assembly 21 contains the sequence of each of the eight chromosomes and was determined using a synteny analysis with preliminary versions of the <it>Candida dubliniensis </it>genome assembly, bioinformatics, a sequence tagged site (STS) map of overlapping fosmid clones, and an optical map. The orientation and order of the contigs on each chromosome, repeat regions too large to be covered by a sequence run, such as the ribosomal DNA cluster and the major repeat sequence, and telomere placement were determined using the STS map. Sequence gaps were closed by PCR and sequencing of the products. The overall assembly was compared to an optical map; this identified some misassembled contigs and gave a size estimate for each chromosome.</p>
            </sec>
            <sec>
               <st>
                  <p>Conclusion</p>
               </st>
               <p>Assembly 21 reveals an ancient chromosome fusion, a number of small internal duplications followed by inversions, and a subtelomeric arrangement, including a new gene family, the <it>TLO </it>genes. Correlations of position with relatedness of gene families imply a novel method of dispersion. The sequence of the individual chromosomes of <it>C. albicans </it>raises interesting biological questions about gene family creation and dispersion, subtelomere organization, and chromosome evolution.</p>
            </sec>
         </sec>
      </abs>
   </fm>
   <meta>
      <classifications>
         <classification type="BMC" subtype="man_spc_id" id="30010014">Microbiology and parasitology</classification>
         <classification type="BMC" subtype="man_spc_id" id="30010009">Genetics</classification>
         <classification type="BMC" subtype="man_spc_id" id="30010010">Genome studies</classification>
      </classifications>
   </meta>
   <bdy>
      <sec>
         <st>
            <p>Background</p>
         </st>
         <p>In the past 25 years, the opportunistic human pathogen <it>Candida albicans </it>has become a serious medical problem. This fungus is now fourth on the list of hospital-acquired infections, ahead of Gram-negative bacteria, and despite the recent introduction of a new class of antifungals, drug resistance continues to be a problem <abbrgrp><abbr bid="B1">1</abbr></abbrgrp>. In the past 15 years, molecular techniques have been applied to understand the pathogenesis of this organism as well as to search for novel drug targets. However, <it>C. albicans </it>presents several difficulties for molecular biologists: it is diploid; only a part of a sexual cycle has been demonstrated; it has a very plastic genome; and it is highly heterozygous. Each of these properties is best investigated through a genomic approach. Hence, knowledge of the genome sequence has been an important goal for the past 10 years. More recently, genome structure and dynamics have become increasingly important in this organism as widespread aneuploidy <abbrgrp><abbr bid="B2">2</abbr><abbr bid="B3">3</abbr></abbrgrp>, the role of repeated DNA in chromosome loss <abbrgrp><abbr bid="B4">4</abbr></abbrgrp>, and chromosome rearrangement leading to drug resistance <abbrgrp><abbr bid="B5">5</abbr></abbrgrp> have been reported.</p>
         <p>The Candida Genome Sequencing Project started in 1996 and, in 2004, it produced a diploid assembly constructed from 10.9&#215; coverage (Assembly 19), which provided single contigs where heterozygosity was not obvious and allelic contigs where there was significant heterozygosity <abbrgrp><abbr bid="B6">6</abbr></abbrgrp>. There were several important steps along the way to this release; these are detailed in a review by Nantel <abbrgrp><abbr bid="B7">7</abbr></abbrgrp>. The first was the construction of a physical map of one chromosome, chromosome 7 <abbrgrp><abbr bid="B8">8</abbr></abbrgrp>. Next were the two early releases of the emerging sequence data, called Assembly 4 and Assembly 6. These lower density assemblies facilitated a great deal of gene analysis, including the construction of several microarrays <abbrgrp><abbr bid="B9">9</abbr><abbr bid="B10">10</abbr></abbrgrp>, an analysis of haploinsufficient genes for filamentation <abbrgrp><abbr bid="B11">11</abbr></abbrgrp>, and the elucidation of several gene families, including a number important in pathogenesis. Examples include the secreted aspartyl proteinases (<it>SAP</it>s) <abbrgrp><abbr bid="B12">12</abbr></abbrgrp>, the agglutinin-like substances (<it>ALS</it>s) <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>, and the phospholipases (<it>PLB </it>and <it>PLC</it>) <abbrgrp><abbr bid="B14">14</abbr><abbr bid="B15">15</abbr></abbrgrp>. Two quite comprehensive disruption libraries are currently available. One library was constructed systematically by targeted disruption of one allele followed by insertion of a regulated promoter at the other allele <abbrgrp><abbr bid="B16">16</abbr></abbrgrp>. The other disruption library was constructed randomly by transposon mutagenesis, using as an insert into one allele the UAU cassette, which facilitates disruption of the second allele via two spontaneously occurring steps of mitotic recombination <abbrgrp><abbr bid="B17">17</abbr></abbrgrp>.</p>
         <p>These tools have greatly advanced the pace of molecular analysis of the pathogenesis and life style of <it>C. albicans</it>, but Assembly 19 was not a finished sequence, since it contained a total of 412 contigs, of which 266 were the haploid set. In order to provide a finished sequence, we used hybridization of chromosomes partially purified by pulse-field elecrophoresis as well as a sequence tagged site (STS) map based on a fosmid library to identify the chromosomal location of various contigs. We then utilized bioinformatics to analyze both the emerging sequence of <it>C. albicans </it>strain WO-1, the sister species <it>Candida dubliniensis </it>and the primary traces used to generate Assembly 4, and coupled this with the STS map and a whole-chromosome optical map to construct Assembly 21. This assembly has eight linear DNA sequences including nine copies of the intermediate repeat called the major repeat sequence (MRS), of which three have been completely sequenced. The MRS is made up of three subrepeats, called RB2, RPS, and HOK <abbrgrp><abbr bid="B18">18</abbr></abbrgrp>. In addition to the intact MRS sequences, there are 14 RB2 sequences and 2 HOK sequences. The ribosomal DNA constitutes another repeat, which is not included in the assembly. In addition to its usefulness for gene mapping, Assembly 21 reveals some interesting biological features, including a putative transcription factor gene family with members proximal to 14 of the 16 telomeres, a telomere-like sequence in the middle of chromosome 1, information on the relationships of chromosome location to similarity of gene families, and a revised open reading frame (ORF) list.</p>
      </sec>
      <sec>
         <st>
            <p>Results</p>
         </st>
         <sec>
            <st>
               <p>Assembly 21</p>
            </st>
            <p>The completed Assembly 21 contains 15.845 Mb of DNA, organized into the 8 <it>C. albicans </it>chromosomes. The assembly does not include the complete telomeric sequences for every chromosome, and includes only one copy of the normally repeated rDNA on chromosome R. All but two chromosomes end with the subtelomeric repeat CARE-2 (Rel-2) <abbrgrp><abbr bid="B19">19</abbr><abbr bid="B20">20</abbr></abbrgrp> at each telomere. This repeat was originally shown to be telomere-associated on chromosome 7 <abbrgrp><abbr bid="B8">8</abbr></abbrgrp>. Macro-restriction maps locate MRSs on all chromosomes but chromosome 3, but since the MRSs are highly repeated and more than 16 kb in size, they are represented but not included in the assembly. On chromosomes 7 and 6, where the MRSs have been sequenced, they are inserted into the sequence. The finished sequence of chromosome 7 has been published elsewhere <abbrgrp><abbr bid="B21">21</abbr></abbrgrp>; this assembly includes a slightly revised version. In addition to the missing MRS regions there is one gap, on chromosome 3. Assembly 21 is a haploid assembly; in cases where Assembly 19 detected heterozygosity, allelic contigs 19-1XXXX and 19.2XXXX were assembled. In regions where there were allelic contigs in Assembly 19, only the 19-1XXXX contigs were used to construct Assembly 21, so it provides no information about heterozygosity.</p>
         </sec>
         <sec>
            <st>
               <p>Chromosome size and structure</p>
            </st>
            <p>Table <tblr tid="T1">1</tblr> shows the sizes of the individual chromosomes in Assembly 21 and compares the size of the sequence with the size of each chromosome as determined by the optical map. The Assembly 21 size in Table <tblr tid="T1">1</tblr> does not include the MRS if one or more are present, and for chromosome R, only one copy of the rDNA repeat is included. The chromosomes range in size from 3,190,598 bases for chromosome 1 to 943,480 bases for chromosome 7. For chromosome R, the actual number of rDNA repeat adds 350 kb to one homologue and 800 kb to the other. Where chromosome homologues are of different sizes, as with chromosome R, the optical map software will choose one homologue. The optical map thus gives the size, including the rDNA, of the smaller homologue of chromosome R. The larger homologue is very close to chromosome 1 in size, about 3.1 Mb. The fact that the subtelomeric repeat CARE-2 is missing from one telomere may indicate that the sequence does not extend to the end of the chromosome. On chromosomes 2 and 7 both the <it>TLO </it>gene and the CARE-2 sequences are missing. We were able to map 233,091 out of 250,884 (93%) of the original sequence traces on Assembly 21. The remaining 17,793 sequences probably represent some of these missing sequences as well as allelic variants but it was impossible to map them to the current assembly.</p>
            <tbl id="T1">
               <title>
                  <p>Table 1</p>
               </title>
               <caption>
                  <p>Chromosome size and features</p>
               </caption>
               <tblbdy cols="6">
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c cspan="2" ca="center">
                        <p>Size (bp)</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c cspan="2">
                        <hr/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Chromosome</p>
                     </c>
                     <c ca="center">
                        <p>Assembly 20</p>
                     </c>
                     <c ca="center">
                        <p>Optical map*</p>
                     </c>
                     <c ca="center">
                        <p>Centromere location</p>
                     </c>
                     <c ca="center">
                        <p>MRS</p>
                     </c>
                     <c ca="left">
                        <p>Features</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="6">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>R</p>
                     </c>
                     <c ca="center">
                        <p>2,294,279</p>
                     </c>
                     <c ca="center">
                        <p>2,709,974</p>
                     </c>
                     <c ca="center">
                        <p>1,748,965</p>
                     </c>
                     <c ca="center">
                        <p>1</p>
                     </c>
                     <c ca="left">
                        <p>Ribosomal DNA cluster is ~800 kb on one homologue, ~350 kb on the other. Lacks CARE-2 on the right telomere</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>1</p>
                     </c>
                     <c ca="center">
                        <p>3,190,598</p>
                     </c>
                     <c ca="center">
                        <p>3,218,448</p>
                     </c>
                     <c ca="center">
                        <p>1,561,879</p>
                     </c>
                     <c ca="center">
                        <p>1</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>2</p>
                     </c>
                     <c ca="center">
                        <p>2,233,511</p>
                     </c>
                     <c ca="center">
                        <p>2,228,646</p>
                     </c>
                     <c ca="center">
                        <p>1,924,678</p>
                     </c>
                     <c ca="center">
                        <p>1</p>
                     </c>
                     <c ca="left">
                        <p>The left-hand telomere lacks <it>TLO </it>and CARE-2</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>3</p>
                     </c>
                     <c ca="center">
                        <p>1,798,342</p>
                     </c>
                     <c ca="center">
                        <p>1,794,194</p>
                     </c>
                     <c ca="center">
                        <p>816,770</p>
                     </c>
                     <c ca="center">
                        <p>0</p>
                     </c>
                     <c ca="left">
                        <p>One gap</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>4</p>
                     </c>
                     <c ca="center">
                        <p>1,622,838</p>
                     </c>
                     <c ca="center">
                        <p>1,619,262</p>
                     </c>
                     <c ca="center">
                        <p>1,000,800</p>
                     </c>
                     <c ca="center">
                        <p>2</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>5</p>
                     </c>
                     <c ca="center">
                        <p>1,191,532</p>
                     </c>
                     <c ca="center">
                        <p>1,246,010</p>
                     </c>
                     <c ca="center">
                        <p>465,800</p>
                     </c>
                     <c ca="center">
                        <p>1</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>6</p>
                     </c>
                     <c ca="center">
                        <p>1,030,364</p>
                     </c>
                     <c ca="center">
                        <p>1,057,155</p>
                     </c>
                     <c ca="center">
                        <p>975,879</p>
                     </c>
                     <c ca="center">
                        <p>1</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>7</p>
                     </c>
                     <c ca="center">
                        <p>943,480</p>
                     </c>
                     <c ca="center">
                        <p>961,199</p>
                     </c>
                     <c ca="center">
                        <p>423,765</p>
                     </c>
                     <c ca="center">
                        <p>2</p>
                     </c>
                     <c ca="left">
                        <p>The right-hand telomere lacks <it>TLO </it>and CARE-2</p>
                     </c>
                  </r>
               </tblbdy>
               <tblfn>
                  <p>*The MRS is included in all the optical map sizes.</p>
               </tblfn>
            </tbl>
            <p>Assembly 21 demonstrates that the sub-telomeric repeats found on chromosome 7 <abbrgrp><abbr bid="B21">21</abbr></abbrgrp> are characteristic of all the chromosomes. The common factor is all or part of the repeat CARE-2 <abbrgrp><abbr bid="B19">19</abbr></abbrgrp>, which includes the long terminal repeat (LTR) kappa (AF041469) and shares some sequence with Rel-2 <abbrgrp><abbr bid="B20">20</abbr></abbrgrp>. There are several other LTRs at the telomeres, and on chromosome 1R there is an intact transposon pCa1 (AF007776). Although one telomere on each of chromosomes 2 and 7 in Assembly 21 is missing CARE-2 (and the <it>TLO </it>gene (see below)), the fosmid map shows these repeats on every telomere. Sequence near the telomere is hard to clone, and the most likely interpretation of this discrepancy is that the appropriate clones for these regions were underrepresented in the Stanford library. Since the repeats tend to be telomere-proximal to the <it>TLO </it>gene, the fact that <it>TLO </it>is missing on chromosomes 2 and 7 also suggests that some sequence is missing. The detailed organization of the sub-telomeric repeats is complex and differs at each telomere.</p>
         </sec>
         <sec>
            <st>
               <p>Centromeres</p>
            </st>
            <p>Sequences that bind the Cse4p protein (the CENP-A orthologue) from <it>C. albicans </it>have been identified on each chromosome by Sanyal <it>et al</it>. <abbrgrp><abbr bid="B22">22</abbr></abbrgrp>, who suggested that they are at least part of the centromere. This interpretation is supported by the observation that the sequence identified on chromosome 7 contributes to its mitotic stability. Table <tblr tid="T1">1</tblr> gives the address (distance in bases from the left-hand end of the chromosome) of each centromere and shows that the chromosomes are generally metacentric, with the centromere sequences located near the center of the chromosomes. However, chromosomes 2 and 6 are acrocentric. On chromosome 2 the centromere is about 85% of the way toward the right telomere, and on 6 it is more than 95%. In contrast to <it>Saccharomyces cerevisiae </it>and <it>Candida glabrata</it>, there are no sub-telomeric gene families other then the <it>TLO </it>genes in <it>C. albicans</it>.</p>
         </sec>
         <sec>
            <st>
               <p>ORF analysis</p>
            </st>
            <p>The ORF analysis of Assembly 21 is being carried out at the Candida Genome Database (47). The present analysis, based on the previous assembly, 20, differs slightly from that of Assembly 19. The human-curated annotation of Assembly 19 identified 6,354 genes. The Candida Genome Database contains 12,015 genes, including allelic variants. Assembly 20 contains 6,090 genes. Of these, the identity and sequence of 6,065 have not changed significantly from Assembly 19 (>98% sequence identity). Fourteen new ORFs have been added to Assembly 20. Thus, 290 genes from the annotation of Assembly 19 are not in Assembly 20. Of these, 192 have been found to be identical to other Assembly 20 genes and have >90% of their sequence incorporated in these genes, while an additional 19 have >50% of their sequence incorporated into another ORF. The 79 remaining Assembly 19 ORFs have no strong Blast hits and include 55 hypotheticals, 15 gene family members, 7 that were truncated, 1 that was overlapping, and 2 putative sequencing errors. The Candida Genome Database provides an extensive analysis of the changes in ORF classifications between the two assemblies. Some discrepancies between their numbers and ours arise from the fact that they started from a greater number of orf19 genes.</p>
         </sec>
         <sec>
            <st>
               <p>Repeated DNA</p>
            </st>
            <p><it>C. albicans </it>was shown to have eight chromosomes by pulse-field electrophoresis. Early studies demonstrated not only that the organism was diploid but that it contained several large blocks of repeated DNA, the MRS, with a complete or partial copy on each of the eight chromosomes <abbrgrp><abbr bid="B18">18</abbr><abbr bid="B23">23</abbr><abbr bid="B24">24</abbr><abbr bid="B25">25</abbr></abbrgrp>.</p>
            <p>Analysis of the emerging sequence demonstrated the existence of a large number of LTRs and other repeated sequences related to transposons <abbrgrp><abbr bid="B26">26</abbr></abbrgrp>. Gene families also constitute a significant source of repeated DNA. Some of these repeated sequences, especially the MRSs, are too large to be crossed in a single sequencing run, so that assembly using bioinformatics is blocked. The sequences of the MRSs on chromosomes should be considered unreliable due to the repeated nature of MRSs, which attract traces from many different other MRSs in the genome. The smaller repeated DNA regions are subject to the same potential problem and led to erroneous assembly in some of the Assembly 19 contigs. The fosmid map and the optical map were very helpful in identifying these errors and correcting them.</p>
         </sec>
         <sec>
            <st>
               <p>The <it>TLO </it>gene family</p>
            </st>
            <p>The putative transcription factor gene <it>CTA2 </it>was identified by Kaiser <it>et al</it>. <abbrgrp><abbr bid="B27">27</abbr></abbrgrp> in a one-hybrid screen in <it>S. cerevisiae</it>. Goodwin and Poulter <abbrgrp><abbr bid="B26">26</abbr></abbrgrp> first noticed that this or a related sequence was often telomere-associated. The <it>CTA2 </it>sequence has homology to members of a gene family found at 14 of the 16 telomeres. Since <it>CTA2 </it>is a gene name, we renamed the family <it>TLO </it>for TeLOmere-associated genes. On this assembly, they are numbered so that the left arm of the chromosome has an odd-numbered <it>TLO </it>gene and the right <it>TLO </it>gene has an even number, ascending from chromosome R through chromosome 7. Thus, chromosome R has <it>TLO1 </it>on the left and <it>TLO2 </it>on the right. Although the original member of this gene family was isolated because it has transactivating activity in <it>S. cerevisiae</it>, the function of these genes in <it>C. albicans </it>has not been determined. There are 15 members of this gene family in Assembly 21; 14 are located within 14 kb of an end of a chromosome contig, and all are oriented in a 5'-3' direction toward the centromere and away from the telomere (Figure <figr fid="F1">1</figr>). In addition, all but one (chromosome 1R) have the LTR kappa 5' to the ORF, usually immediately adjacent but sometimes a few kb away. One member of the <it>TLO </it>gene family (ORF19.2661) is found in the interior of chromosome 1, 1.29 Mb from the left end of this 3.19 Mb chromosome. Like the other family members, this ORF points toward the centromere and away from the telomere and has a kappa sequence from the subtelomeric repeat CARE-2 in its 5' region. In this case, the kappa sequence is the only part of the CARE-2 sequence present. Thus, this particular copy resembles all the telomere-associated family members but is located in the middle of chromosome 1, and we have numbered it <it>TLO34</it>, since it is located between <it>TLO3 </it>and <it>TLO4</it>. In order to keep the numbering system consistent, we have saved the names <it>TLO6 </it>and <it>TLO15 </it>for the genes on 2R and 7L that we expect will eventually be identified. In addition, Figure <figr fid="F2">2a</figr> demonstrates that <it>TLO14</it>, on the right arm of chromosome 6, has been confirmed by PCR but this gene has yet to be sequenced.</p>
            <fig id="F1">
               <title>
                  <p>Figure 1</p>
               </title>
               <caption>
                  <p>Schematic representation of the chromosomes in Assembly 21</p>
               </caption>
               <text>
                  <p>Schematic representation of the chromosomes in Assembly 21. The major structural features, including the centromeres (CEN), the MRS sequences, and the CARE-2 sequences are represented. The <it>TLO </it>genes without introns are shown in red; those with introns are in green. The positions are approximately to scale. The smaller homologue of chromosome R (containing a 350 kb rDNA repeat) is shown.</p>
               </text>
               <graphic file="gb-2007-8-4-r52-1"/>
            </fig>
            <fig id="F2">
               <title>
                  <p>Figure 2</p>
               </title>
               <caption>
                  <p>Analysis of the <it>TLO </it>gene family</p>
               </caption>
               <text>
                  <p>Analysis of the <it>TLO </it>gene family. <b>(a) </b>PCR validation of the telomeric <it>TLO </it>genes using a universal <it>TLO </it>primer coupled with a second primer that is specific for each chromosome arm. A putative <it>TLO14 </it>gene was identified on the 6R arm (see arrowhead). <b>(b) </b>Identification, by rtPCR, of two putative splicing variants using a 5' oligo that will recognize all <it>TLO </it>genes and two 3' primers with partial specificity for the two possible versions of the <it>TLO8 </it>transcripts. <b>(c) </b>Alignment of the amino acid sequences of the 14 <it>TLO </it>genes that are present in the latest genome assembly.</p>
               </text>
               <graphic file="gb-2007-8-4-r52-2"/>
            </fig>
            <p>An alignment of the amino acid sequences encoded by 13 <it>TLO </it>genes is shown in Figure <figr fid="F2">2c</figr>. While we initially noticed that six of them (<it>TLO5</it>, <it>7</it>, <it>8</it>, <it>11</it>, <it>13</it>, and <it>16</it>) had a different carboxy-terminal domain, an astute reviewer remarked that adding an intron to the annotations of these genes would allow them to code for a protein with a carboxy-terminal end that is similar to the other <it>TLO </it>genes. We used rtPCR with a universal 5' primer and gene-specific 3' primers to show that both types of transcripts are expressed in <it>Candida </it>cells. Although the amplicons nevertheless contained a mixture of two or more alleles or family members, end sequencing clearly showed that the long form contained the common carboxy-terminal sequences while the short form encoded the unique sequences first noticed in <it>TLO5</it>. Although the annotation assumes that all six members of this TLO family sub-group yield transcripts in both spliced and unspliced forms, confirmation would require screening of an expressed sequence tag library. Finally, <it>TLO4 </it>(orf19.7276.1) lies almost at the edge of a sequence contig and is obviously missing an upstream exon while the additional amino-terminal domain encoded by <it>TLO34 </it>may or may not be part of the actual transcript.</p>
         </sec>
         <sec>
            <st>
               <p>Other gene families</p>
            </st>
            <p>As noted by Braun and coworkers <abbrgrp><abbr bid="B28">28</abbr></abbrgrp>, <it>C. albicans </it>has a number of gene families. These range in size from 2 members to as many as 26 (the ABC transporter superfamily) <abbrgrp><abbr bid="B29">29</abbr></abbrgrp>. Several are clustered on one or two chromosomes. For example, chromosome 6 contains members of six families, with five represented by more than one member, and one, <it>ALS</it>, with five members. Most of the families with more than two members have representatives on more than one chromosome. In addition, some two-member families are on different chromosomes. Examination of the genes surrounding family members on different chromosomes for the most part gives no hint as to the mechanism by which homologous sequences arrived at different locations.</p>
            <p>The arrangement of these families on the chromosomes is not random. Figure <figr fid="F3">3a</figr> shows the ORFs from Assembly 21 aligned along the eight chromosomes. ORFs with 80% or more similarity are connected by lines. There are some areas that appear to be tandem duplications. An example from chromosome 3 is shown in Figure <figr fid="F3">3b</figr>. The sequence containing the gene <it>DTD2 </it>and those for two hypothetical proteins seem to have undergone a duplication accompanied by an inversion. A similar event seems to have occurred 40 kb away involving the genes <it>DAL5</it>, <it>ECM18</it>, and <it>FUR4</it>. There are no sequences such as LTRs near these duplications. Although the most plausible mechanism for the generation of gene families whose members are close together on one chromosome is tandem duplication, in many cases of gene families that contain tandemly repeated genes, the most closely related members are dispersed. Table <tblr tid="T2">2</tblr> shows this for the <it>SAP </it>(<ul>S</ul>ecreted <ul>A</ul>spartyl <ul>P</ul>roteinase) and <it>LIP </it>(<ul>LIP</ul>ase) gene families. For the <it>SAP </it>gene family, only <it>SAP5 </it>and <it>SAP6</it>, 84 kb apart on chromosome 6, are both nearest neighbors and most closely related in sequence. <it>SAP1 </it>and <it>SAP4 </it>are adjacent and each is most closely related to another member. For the <it>LIP </it>gene family, <it>LIP5 </it>is most similar to <it>LIP9</it>, and they are 12 kb apart on chromosome 7. Family members <it>LIP1</it>, <it>6</it>, and <it>10 </it>are adjacent on chromosome 1 and each is most homologous to a distal gene (<it>LIP3 </it>for <it>LIP1 </it>and <it>LIP2 </it>for <it>LIP6 </it>and <it>LIP 10</it>). It is interesting to note from Figure <figr fid="F3">3a</figr> that the number of highly similar ORFs outside the <it>TLO </it>and MRS sequences (represented by the blue connecting lines) is relatively small compared to the number of gene families.</p>
            <fig id="F3">
               <title>
                  <p>Figure 3</p>
               </title>
               <caption>
                  <p>Highly related ORFs in the <it>C. albicans </it>genome</p>
               </caption>
               <text>
                  <p>Highly related ORFs in the <it>C. albicans </it>genome. <b>(a) </b>The eight chromosomes are shown with the ORFs indicated by the small black lines. The red lines connect the <it>TLO </it>genes, while the green lines connect the ORFs in the MRS sequence. The blue lines connect ORFs with a relatedness greater than 80%. <b>(b) </b>Close-up of chromosome 3 from base 622,329 to base 785,321 showing two areas that appear to have undergone duplication followed by inversion.</p>
               </text>
               <graphic file="gb-2007-8-4-r52-3"/>
            </fig>
            <tbl id="T2">
               <title>
                  <p>Table 2</p>
               </title>
               <caption>
                  <p>Chromosome location and similarity of the SAP and LIP gene families</p>
               </caption>
               <tblbdy cols="5">
                  <r>
                     <c ca="left">
                        <p>Gene</p>
                     </c>
                     <c ca="left">
                        <p>Chromosome (nearest neighbor)</p>
                     </c>
                     <c ca="center">
                        <p>BLAST score for nearest neighbor</p>
                     </c>
                     <c ca="left">
                        <p>Most closely related member (BLAST)</p>
                     </c>
                     <c ca="center">
                        <p>BLAST score for most closely related member</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="5">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <b><it>SAP </it>gene family</b>
                        </p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>SAP1</it>
                        </p>
                     </c>
                     <c ca="left">
                        <p>6 (<it>SAP4</it>)</p>
                     </c>
                     <c ca="center">
                        <p>2,453</p>
                     </c>
                     <c ca="left">
                        <p>
                           <it>SAP2</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>3,097</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>SAP2</it>
                        </p>
                     </c>
                     <c ca="left">
                        <p>R (none)</p>
                     </c>
                     <c ca="center">
                        <p>NA</p>
                     </c>
                     <c ca="left">
                        <p>
                           <it>SAP3</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>3,529</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>SAP3</it>
                        </p>
                     </c>
                     <c ca="left">
                        <p>3 (<it>SAP9</it>)</p>
                     </c>
                     <c ca="center">
                        <p>326</p>
                     </c>
                     <c ca="left">
                        <p>
                           <it>SAP2</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>3,529</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>SAP4</it>
                        </p>
                     </c>
                     <c ca="left">
                        <p>6 (<it>SAP1</it>)</p>
                     </c>
                     <c ca="center">
                        <p>2,453</p>
                     </c>
                     <c ca="left">
                        <p>
                           <it>SAP6</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>5,639</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>SAP5</it>
                        </p>
                     </c>
                     <c ca="left">
                        <p>6 (<it>SAP6</it>)</p>
                     </c>
                     <c ca="center">
                        <p>5,100</p>
                     </c>
                     <c ca="left">
                        <p>
                           <it>SAP6</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>5,100</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>SAP6</it>
                        </p>
                     </c>
                     <c ca="left">
                        <p>6 (<it>SAP5</it>)</p>
                     </c>
                     <c ca="center">
                        <p>5,121</p>
                     </c>
                     <c ca="left">
                        <p>
                           <it>SAP4</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>5,639</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>SAP7</it>
                        </p>
                     </c>
                     <c ca="left">
                        <p>1 (none)</p>
                     </c>
                     <c ca="center">
                        <p>NA</p>
                     </c>
                     <c ca="left">
                        <p>
                           <it>SAP6</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>637</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>SAP8</it>
                        </p>
                     </c>
                     <c ca="left">
                        <p>3 (<it>SAP9</it>)</p>
                     </c>
                     <c ca="center">
                        <p>194</p>
                     </c>
                     <c ca="left">
                        <p>
                           <it>SAP3</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>1,628</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>SAP9</it>
                        </p>
                     </c>
                     <c ca="left">
                        <p>3 (<it>SAP8</it>)</p>
                     </c>
                     <c ca="center">
                        <p>194</p>
                     </c>
                     <c ca="left">
                        <p>
                           <it>SAP10</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>296</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>SAP10</it>
                        </p>
                     </c>
                     <c ca="left">
                        <p>4 (none)</p>
                     </c>
                     <c ca="center">
                        <p>NA</p>
                     </c>
                     <c ca="left">
                        <p>
                           <it>SAP3</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>299</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <b><it>LIP </it>gene family</b>
                        </p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>LIP1</it>
                        </p>
                     </c>
                     <c ca="left">
                        <p>1 (<it>LIP10</it>)</p>
                     </c>
                     <c ca="center">
                        <p>2,523</p>
                     </c>
                     <c ca="left">
                        <p>
                           <it>LIP3</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>3,976</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>LIP2</it>
                        </p>
                     </c>
                     <c ca="left">
                        <p>1 (<it>LIP1</it>)</p>
                     </c>
                     <c ca="center">
                        <p>2,773</p>
                     </c>
                     <c ca="left">
                        <p>
                           <it>LIP6</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>3,320</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>LIP3</it>
                        </p>
                     </c>
                     <c ca="left">
                        <p>1 (LIP6)</p>
                     </c>
                     <c ca="center">
                        <p>2,956</p>
                     </c>
                     <c ca="left">
                        <p>
                           <it>LIP1</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>3,976</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>LIP4</it>
                        </p>
                     </c>
                     <c ca="left">
                        <p>6 (none)</p>
                     </c>
                     <c ca="center">
                        <p>NA</p>
                     </c>
                     <c ca="left">
                        <p>
                           <it>LIP8</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>4,535</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>LIP5</it>
                        </p>
                     </c>
                     <c ca="left">
                        <p>7 (<it>LIP9</it>)</p>
                     </c>
                     <c ca="center">
                        <p>4,034</p>
                     </c>
                     <c ca="left">
                        <p>
                           <it>LIP8</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>5,318</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>LIP6</it>
                        </p>
                     </c>
                     <c ca="left">
                        <p>1 (<it>LIP10</it>)</p>
                     </c>
                     <c ca="center">
                        <p>2,935</p>
                     </c>
                     <c ca="left">
                        <p>
                           <it>LIP2</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>3,320</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>LIP7</it>
                        </p>
                     </c>
                     <c ca="left">
                        <p>R (none)</p>
                     </c>
                     <c ca="center">
                        <p>NA</p>
                     </c>
                     <c ca="left">
                        <p>
                           <it>LIP1</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>838</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>LIP8</it>
                        </p>
                     </c>
                     <c ca="left">
                        <p>7 (<it>LIP9</it>)</p>
                     </c>
                     <c ca="center">
                        <p>2,076</p>
                     </c>
                     <c ca="left">
                        <p>
                           <it>LIP5</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>5,174</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>LIP9</it>
                        </p>
                     </c>
                     <c ca="left">
                        <p>7 (<it>LIP5</it>)</p>
                     </c>
                     <c ca="center">
                        <p>4,084</p>
                     </c>
                     <c ca="left">
                        <p>
                           <it>LIP5</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>4,084</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>LIP10</it>
                        </p>
                     </c>
                     <c ca="left">
                        <p>1 (<it>LIP6</it>)</p>
                     </c>
                     <c ca="center">
                        <p>2,935</p>
                     </c>
                     <c ca="left">
                        <p>
                           <it>LIP2</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>3,211</p>
                     </c>
                  </r>
               </tblbdy>
            </tbl>
         </sec>
      </sec>
      <sec>
         <st>
            <p>Discussion</p>
         </st>
         <p>Assembly 19, the diploid assembly of the genome of <it>C. albicans </it>strain SC5314 was a very important achievement. It provided a great deal of insight into many aspects of genomic organization, especially the large amount of heterozygosity <abbrgrp><abbr bid="B6">6</abbr></abbrgrp>. The subsequent annotation of the assembly by the community demonstrated a number of important properties of the genome, including the number of genes (6,354), the number with introns (224), the frequency and characteristics of short tandem repeats, and the characteristics of several multigene families. Braun <it>et al</it>. <abbrgrp><abbr bid="B28">28</abbr></abbrgrp> also identified putative spurious genes and genes either on overlapping contigs or truncated by the end of contigs. However, they did not address chromosome location nor try to join the 266 haploid contigs of Assembly 19 into chromosome-sized assemblies. Thus, although these two projects brought the <it>C. albicans </it>genome to a very useful state, they still left it incomplete, lacking chromosome-size contigs and with some genes in an ambiguous state. Subsequently, Chibana <it>et al</it>. <abbrgrp><abbr bid="B21">21</abbr></abbrgrp> completed the sequence of chromosome 7, identified 404 genes, and compared the synteny to the <it>S. cerevisiae </it>genome. They sequenced the MRSs and the gaps left in Assembly 19. They then aligned the sequence on the chromosome as determined by the physical map <abbrgrp><abbr bid="B8">8</abbr></abbrgrp>.</p>
         <p>We undertook to complete the assembly (on a haploid basis) of all the chromosomes of <it>C. albicans</it>. We ordered and aligned the existing contigs along the chromosomes, filled in the gaps either by reexamining the traces at the Stanford Genome Technology Center, by gap sequencing or by using the emerging <it>C. albicans </it>WO-1 sequence to correct two regions of chromosomes 1 and 4. The assembly was also based on the STS fosmid map and on an optical map. As completed, the assembly consists of 16 supercontigs, interrupted on 5 chromosomes only by large blocks of repeated DNA. The contigs for chromosomes 6 and 7, for which the MRSs have been sequenced, have no gaps, while chromosome 3 has one gap, where adjacent contigs could not be joined. Chromosome 4 has two gaps and 5, 2, and 1 have one gap each, corresponding to the MRS. Chromosome R has two gaps, one for the MRS and one for the rDNA. Thus, there is only one gap in the unique sequence of the <it>C. albicans </it>genome that cannot be filled with sequence data from either SC5314 or WO-1. We identified 85 junctions that could not be filled with the original SC5314 sequence traces. We successfully amplified 82 of these (see Additional data file 1) and work is continuing to amplify the last three junctions and to produce 'SC5314-pure' sequence. We have used this assembly to determine the size of the various chromosomes and to examine several unique aspects of the genome, including the subtelomeric regions, the gene families, and evidence for chromosome rearrangements. Our ultimate objectives are to identify aspects of the genome that affect virulence and to increase our understanding of the evolutionary mechanisms that affect the genome of this fungal pathogen.</p>
         <p>There are chromosome size discrepancies between Assembly 21 and the optical map; these are attributable to several causes. Where the Assembly 21 size is smaller than the optical map size, the explanation may be the missing MRS, missing telomere-associated sequences, or size heterozygosity between the homologues. For example, we know that on chromosome 5 the MRS is 50 kb in size <abbrgrp><abbr bid="B4">4</abbr></abbrgrp>, very close to the difference between the two estimates. Where the size determined by the optical map is smaller (chromosomes 2, 3, and 4), the difference seems most likely to be heterozygosity for insertions of retrotransposon-related sequences. In these cases, the optical map of this chromosome is probably derived from the smaller homologue. For chromosome 2, the discrepancy is rather large, given that this chromosome in Assembly 21 lacks the MRS and probably some telomere sequences. Interestingly, the size estimates in Jones <it>et al</it>. <abbrgrp><abbr bid="B6">6</abbr></abbrgrp> for the various chromosomes are remarkably close to the sizes determined by the optical map in Table <tblr tid="T1">1</tblr>.</p>
         <p>One piece of information that comes out of our assembly is the similarity of the sequence of the <it>C. dubliniensis </it>genome to that of <it>C. albicans</it>. Although the karyotypes of these two organisms are quite divergent, the arrangement of genes within the chromosomes is similar enough to be of great assistance in mapping the contigs from Assembly 19. This bears out the evidence from the presence of MRS-like sequences and the ability to produce interspecies hybrids <abbrgrp><abbr bid="B30">30</abbr></abbrgrp> that these two species are very closely related indeed. Other studies have shown that only about 4.4% (247) of <it>C. albicans </it>genes have less than 60% homology to <it>C. dubliniensis </it><abbrgrp><abbr bid="B31">31</abbr></abbrgrp>. Our results suggest that intergenic regions also show regions of significant sequence conservation.</p>
         <p>The amount of repeated DNA in <it>C. albicans </it>is significant. The MRSs were a major problem and their placement on the chromosomes required the physical and optical maps. Chromosomes 4 and 7 each have two MRSs forming an inverted repeat, and in principle the internal DNA fragment could invert via mitotic recombination. In strain SC5314 and its derivative, CAI-4, this inversion seems to occur very rarely, at least in the laboratory. In spite of the fact that most of the known translocations in <it>C. albicans </it>occur at the MRS, suggesting that this is a hot spot for recombination, there is no evidence on either chromosome for a flip of the bracketed sequence.</p>
         <p>The specific sequences of six of the nine MRSs are unavailable. This is only a problem if sequence variation in the MRS plays a biological role, and there is no evidence that it does. In addition to the MRSs and the subtelomeric repeats, there are more than 350 LTR sequences belonging to 34 different families scattered throughout the genome <abbrgrp><abbr bid="B26">26</abbr></abbrgrp>, and several of these are found clustered at telomeres. The subtelomeric repeat CARE-2 contains an LTR called kappa <abbrgrp><abbr bid="B32">32</abbr></abbrgrp>, which is found at the 5' end of each member of the <it>TLO </it>gene family. Whether this is related to the expansion of this family to the telomeres is not clear. The repeated DNA led to misassembly of some contigs in Assembly 19, including chimeras, artifactual duplications, and omitted sequence. The two physical maps and the <it>C. dubliniensis </it>sequence were essential in sorting out these artifacts.</p>
         <p>The numerous gene families in <it>C. albicans </it>distinguish it from <it>S. cerevisiae</it>. A very common feature of these families is a clustering of members on a particular chromosome, which might reflect an ontology wherein a single copy undergoes tandem duplication and then sequences diverge as function diverges. There are several instances where similar but oppositely oriented gene clusters suggest that an inverted duplication of a region larger than a gene has occurred (Figure <figr fid="F3">3a</figr>).</p>
         <p>The model for gene family ontology of duplication followed by dispersion would predict that, in general, similarity should be related to proximity. The arrangements of the two families we examined in detail, the <it>SAP </it>family and the <it>LIP </it>family, raise some questions about this model. In only two cases are the most similar family members the closest neighbors (<it>SAP6 </it>and <it>SAP5</it>; <it>LIP9 </it>and <it>LIP5</it>). However, the members clustered on one chromosome tend to be most closely related. For the <it>LIP </it>gene family, Hube and coworkers <abbrgrp><abbr bid="B33">33</abbr></abbrgrp> showed that <it>LIP5</it>, <it>8</it>, and <it>9</it>, on chromosome 7, form a group and <it>LIP1</it>, <it>2</it>, <it>3</it>, <it>6</it>, and <it>10</it>, on chromosome 1, are a related but distinct group. <it>LIP7</it>, on chromosome R, is an outlier, only distantly related, while <it>LIP4</it>, on chromosome 6, fits with the chromosome 7 group. For the <it>SAP </it>gene family, <it>SAP4</it>, <it>5</it>, and <it>6 </it>(chromosome 6) form a highly related cluster, while the rest of the group, on chromosomes R, 3, 4, and 6, form a loose association, with the highest similarity being between <it>SAP2 </it>on chromosome R and <it>SAP1</it>on chromosome 6. These relationships suggest that the families originate on one chromosome and expand there, and when one member is duplicated on another chromosome, the pattern may or may not be repeated. The large number of gene families whose members are dispersed but not randomly would suggest that <it>C. albicans </it>is efficient at gene duplication at a distance. However, there are no hints of a specific mechanism in the sequence, such as homology between flanking sequences on different chromosomes or traces of mobile genetic elements. The relatively small number of highly similar ORFs suggests that the gene family members either diverged some time ago or are under strong selection to perform specific functions.</p>
         <p>The <it>TLO </it>gene family is unique in <it>C. albicans </it>because it is found on every chromosome, and there are no closely adjacent members. This suggests that it arose by a mechanism different from, for example, the <it>LIP </it>family. One clue is that in all cases it is flanked on its 5' side by the LTR kappa <abbrgrp><abbr bid="B26">26</abbr></abbrgrp>. It seems possible that it has moved via genomic rearrangements caused by the transposon for which kappa is the LTR. An alternative possibility is that this family dispersed by telomere recombination, which is relatively frequent in <it>S. cerevisiae </it><abbrgrp><abbr bid="B34">34</abbr></abbrgrp> and has been shown to occur in <it>C. albicans </it><abbrgrp><abbr bid="B35">35</abbr></abbrgrp>. There are no obvious subtelomeric repeats in <it>C. albicans</it>, in contrast to <it>S. cerevisiae </it>and <it>C. glabrata </it><abbrgrp><abbr bid="B36">36</abbr></abbrgrp>.</p>
         <p>The two subgroups of the <it>TLO </it>family are differentiated by the presence of an intron. On chromosome 1, there is an interior <it>TLO </it>gene, as well as one near each telomere. A plausible explanation for this arrangement is that a chromosome translocation has occurred, with DNA being added to the end of a smaller precursor of chromosome 1, followed by reconstitution of the telomere at the new end generated. There are only three genes in the emerging <it>C. dubliniensis </it>sequence with similarity to the <it>TLO </it>family, and they are not located at the telomeres. On chromosomes 1 and R in <it>C. dubliniensis</it>, the genes adjacent to the <it>TLO </it>family member are present and are several kilobases from the end of the assembled sequence, suggesting that the <it>TLO </it>gene absence is not due to missing telomere-proximal sequence. Since there are significant differences in virulence between <it>C. albicans </it>and <it>C. dubliniensis</it>, there may be a role for the <it>TLO </it>gene family in some aspect of pathogenesis.</p>
         <p>The function of the <it>TLO </it>genes is unknown. Although a member of this gene family was isolated as a potential trans-activating protein (and named <it>CTA2</it>), based on a one-hybrid screen in <it>S. cerevisiae</it>, there is no evidence beyond those experiments as to function <abbrgrp><abbr bid="B27">27</abbr></abbrgrp>.</p>
         <p>Assembly 21 will be of major importance as studies of the biology and virulence of <it>C. albicans </it>continue. It will provide the mapping information that has been lacking due to the absence of a sexual cycle, and it should stimulate experiments in areas as different as evolution and genome dynamics. Among the unsolved questions in the latter area are the detailed structure of the centromere and the function of the MRS. The presence of chromosomal aberrations in clinical isolates was demonstrated early <abbrgrp><abbr bid="B37">37</abbr><abbr bid="B38">38</abbr></abbrgrp>, and several laboratory strains have recently been shown to be aneuploid <abbrgrp><abbr bid="B2">2</abbr><abbr bid="B3">3</abbr><abbr bid="B5">5</abbr></abbrgrp>. Genome alterations have recently been shown to play an important role in drug resistance <abbrgrp><abbr bid="B5">5</abbr></abbrgrp>, and the complete sequence of each of the chromosomes may lead to the discovery of other changes that affect pathogenesis. Assembly 21 will also be useful for studying aneuploidy in <it>C. albicans</it>. Finally, this assembly provides an up-to-date listing of the genes of this important pathogen and will greatly aid its ongoing molecular analysis.</p>
      </sec>
      <sec>
         <st>
            <p>Materials and methods</p>
         </st>
         <sec>
            <st>
               <p>Assembly 21</p>
            </st>
            <p>The first step in Assembly 21 was to assign contigs from Assembly 19 to the appropriate chromosomes. One hundred contigs were anchored by probes on the physical map. To assign the rest, chromosomes were separated by pulse-field gel electrophoresis. Chromosomal bands were eluted from the gel, labeled with either Cy3 of Cy5 dyes, and hybridized to a microarray <abbrgrp><abbr bid="B10">10</abbr></abbrgrp> based on ORFs identified from Assembly 4. As a control, total genomic DNA was also labeled with the reciprocal dye and co-incubated along with the partially purified chromosomes. At least two individual hybridizations (including dye swap controls) were conducted for each of the eight chromosomes. Results and details from these experiments can be seen on our web page <abbrgrp><abbr bid="B39">39</abbr></abbrgrp>. Fluorescence ratios were interpreted visually and it was very easy for us to assign chromosomal localization for 183 Assembly 19 contigs representing 98.4% of DNA sequences and 97.3% of known genes. This analysis also identified nine misassembled contigs by the fact that genes assigned to them hybridized to different chromosomes. The emerging sequence from the closely related species <it>C. dubliniensis </it>was then aligned, using megablast, to the Assembly 19 contigs. Phrap was used to connect pairs of contigs that were assigned to the same chromosome and were found to be adjacent based on the <it>C. dubliniensis </it>overlap <abbrgrp><abbr bid="B48">48</abbr></abbrgrp>. After the release of the <it>C. albicans </it>WO-1 traces, Phrap and these traces were also used to locate misassemblies and to correct them.</p>
            <p>The emerging alignments were compared with the physical map and areas of disparity mapped with more probes. Attempts to assemble the contigs into whole chromosomes with Phrap failed because of the large number of repeated elements in the <it>C. albicans </it>genome. A custom-made stitcher script (which utilized a 100 nucleotide pure text string starting from the previous alignment) was used to assemble the contig-alignments into chromosomes. The resulting assembly was checked for the presence of all the ORFs from the community annotation <abbrgrp><abbr bid="B28">28</abbr></abbrgrp>, and very few ORFs were missing. The penultimate assembly was compared with the optical genome map and the physical map and disparities resolved.</p>
            <p>One major discrepancy was the orientation of the sequence between the MRS regions on chromosome 4. The optical map and the physical map showed it in one conformation, while the assembly showed it in the other. The final orientation, consistent with the physical and optical maps, was confirmed by PCR of the border fragments of one MRS and by restriction digestion of genomic DNA and hybridization of a Southern blot with probes from the regions that flank the two MRSs on both sides.</p>
            <p>The remaining gaps were filled by PCR followed by sequencing of the products. One gap proved to be the result of a misassembly in a contig. The insertion of a partially overlapping contig closed it. Two gaps were filled by using the draft sequence of Broad Institute's WO-1 assembly <abbrgrp><abbr bid="B40">40</abbr></abbrgrp>. One gap was filled by a sequence from Selmecki <it>et al</it>. <abbrgrp><abbr bid="B5">5</abbr></abbrgrp>. The result was called Assembly 20 and it was composed of eight chromosomes with one unresolved (not connected) contig pair. Two gaps were introduced due to the flip of the area between two MRSs on chromosome 4. Two gaps are located around the rDNA region. In addition, most of the MRS regions have not been resolved and can be considered gaps, with the exception of the two MRSs in chromosome 7 and the one MRS in chromosome 6, which were cloned and sequenced at Chiba University.</p>
            <p>Following the completion of Assembly 20, we discovered that sequence traces that were used for the assembly and that we had thought were from strain SC5314 were in fact coming from the WO-1 sequencing project. Additional experiments and analysis were thus required to produce Assembly 21 in which the <it>C. albicans </it>WO-1 sequences from the contig junctions are replaced with sequences from SC5314. The original traces from SC5314 were obtained from the Stanford Genome Technology Centre and aligned over the junction areas of Assembly 20 using Sequencher 4.7 <abbrgrp><abbr bid="B41">41</abbr></abbrgrp>. Data that were contaminated with WO-1 sequences were tagged as 'Reference sequences', which means that they were not used to construct the consensus sequence for the new alignments. Any missing bases not covered by SC5314 traces were filled with Ns. The resulting alignments can be viewed on our web page <abbrgrp><abbr bid="B39">39</abbr></abbrgrp>. All but 85 junctions could be confidently filled with the SC5314 traces. PCRs were then performed on gaps, overlaps and rearranged regions that had low SC5314 trace coverage. As shown in Additional data file 1, a first round of 85 PCRs was successful in producing 80 amplicons of the expected size and efforts will continue to produce the missing SC5314 sequence data over the next few months. Additional data file 2 includes the sequences of the primers used for these PCR reactions.</p>
         </sec>
         <sec>
            <st>
               <p>The physical map</p>
            </st>
            <p>A fosmid library was constructed from strain 1161 <abbrgrp><abbr bid="B42">42</abbr></abbrgrp> by M Strathman. Sau3A-digested genomic DNA was inserted into the fosmid vector pFOS1 <abbrgrp><abbr bid="B43">43</abbr></abbrgrp>. The library consists of 3,840 clones with an average insert size of 40 kb (10&#215; genome coverage). For probing, the library was arrayed in 10 384-well plates. Two plates each were printed on a 12 &#215; 16 cm nylon membrane (Hybond-N+, Amersham Pharmacia Biotech Inc Piscataway, NJ, USA), giving a complete library set on five membranes. For printing, freshly thawed fosmid clones were transferred, using a 384-point replicator, to the membrane overlaid on Luria agar containing 20 &#956;g chloramphenicol. The plates were grown overnight at 37&#176;C and the membranes were processed according to the manufacturer's instructions for colony lifts.</p>
            <p>Probes were variously clones of genomic DNA, T- and S-end probes from fosmids <abbrgrp><abbr bid="B44">44</abbr></abbrgrp>, and PCR products generated from SC5314 DNA using primers based on the public genomic sequences. Probes were randomly labeled with <sup>32</sup>P. The mapping was carried out as described in Chibana <it>et al</it>. <abbrgrp><abbr bid="B8">8</abbr></abbrgrp>. Briefly, membranes containing the library were hybridized with probes and fosmids hybridizing with the same probe were considered to overlap. Probes were also hybridized to Southern blots of pulse-field separations of chromosomes and genomic <it>Sfi</it>I digests. The overlapping fosmids were arranged in a linear fashion with chromosome markers like the MRS used to orient the array. The complete set of overlapping fosmids was trimmed to make a minimum tiling set. The physical map is available <abbrgrp><abbr bid="B45">45</abbr></abbrgrp>.</p>
         </sec>
         <sec>
            <st>
               <p>The optical map</p>
            </st>
            <p>The optical map was prepared by OpGen (Madison, WI, USA) using a proprietary technique. The technique involves preparation of large (>500 kb) molecules of genomic DNA, and spreading them on a microfluidic device that causes them to elongate and adhere to the substratum. They are then treated with a restriction enzyme (in this case, <it>Xho</it>I). Since the molecules are under slight tension, restriction sites appear as breaks in the molecules. The molecules are then stained with a fluorescent dye, scanned, and the positions of the restriction sites determined, using the amount of fluorescence to calculate the mass of the fragments. The results are averaged over 500 molecules, allowing the preparation of a macro-restriction map. This can be compared with the restriction map derived from the assembly. A more complete description of the technique and more information can be found at <abbrgrp><abbr bid="B46">46</abbr></abbrgrp>. Since the optical map software searches the image database for matches and then constructs the map based on the consensus pattern, the optical map of a chromosome describes only one of the two homologues, and no data on heterozygosity are included.</p>
            <p>Although in most cases the three methods were in agreement on the sequence assembly, in cases where disagreements could not be resolved, concurrence of two of the three approaches was deemed sufficient.</p>
            <p>The complete Assembly 21 has been submitted to the Candida Genome Database <abbrgrp><abbr bid="B47">47</abbr></abbrgrp>, which will be in charge of its maintenance and curation.</p>
         </sec>
      </sec>
      <sec>
         <st>
            <p>Additional data files</p>
         </st>
         <p>The following additional data are available with the online version of this paper. Additional data file <supplr sid="S1">1</supplr> is a figure showing a gel electrophoresis separation of the PCR products produced to close the gaps. Additional data file <supplr sid="S2">2</supplr> is a list of the oligonucleotides in the PCR reactions whose products are shown in Additional data file <supplr sid="S1">1</supplr>.</p>
         <suppl id="S1">
            <title>
               <p>Additional data file 1</p>
            </title>
            <caption>
               <p>Gel electrophoresis separation of the PCR products produced to close the gaps</p>
            </caption>
            <text>
               <p>Gel electrophoresis separation of the PCR products produced to close the gaps</p>
            </text>
            <file name="gb-2007-8-4-r52-S1.tiff">
               <p>Click here for file</p>
            </file>
         </suppl>
         <suppl id="S2">
            <title>
               <p>Additional data file 2</p>
            </title>
            <caption>
               <p>Oligonucleotides in the PCR reactions whose products are shown in Additional data file 1</p>
            </caption>
            <text>
               <p>Oligonucleotides in the PCR reactions whose products are shown in Additional data file 1</p>
            </text>
            <file name="gb-2007-8-4-r52-S2.xls">
               <p>Click here for file</p>
            </file>
         </suppl>
      </sec>
   </bdy>
   <bm>
      <ack>
         <sec>
            <st>
               <p>Acknowledgements</p>
            </st>
            <p>We thank Z Li, Dr Lois Hoyer, Christiane Cantin and members of the Candida Genome Database for technical assistance. Observations from one of the Genome Biology reviewers were extremely helpful in resolving the structure of the <it>TLO </it>transcripts. We also acknowledge Michael Goodpaster, Paul Wentzel, Anna Windfeldt, Molly Wang, and many past members of the Magee and Scherer labs for long efforts on the physical map. This project was funded by contract N01 AI05406 and grant R01 AI 16567 from the National Institute of Allergy and Infectious Disease, USA, (to PTM), by the NRC Genome Health Initiative (to MW and AN) as well as by Canadian Institutes of Health Research grants HOP 67260 (to AN) and MOP 42516 (to MW). This is NRC publication number 47525. This work was partly supported by a Grant-in-Aid for Scientific Research from the Ministry of Education, Culture, Sports, Science and Technology, Japan.</p>
         </sec>
      </ack>
      <refgrp>
         <bibl id="B1">
            <title>
               <p>Candidemia.</p>
            </title>
            <aug>
               <au>
                  <snm>Kullberg</snm>
                  <fnm>BJ</fnm>
               </au>
               <au>
                  <snm>Filler</snm>
                  <fnm>SG</fnm>
               </au>
            </aug>
            <source>Candida and Candidiasis</source>
            <publisher>Washington, DC: ASM Press</publisher>
            <editor>Calderone R</editor>
            <pubdate>2002</pubdate>
            <fpage>327</fpage>
            <lpage>340</lpage>
         </bibl>
         <bibl id="B2">
            <title>
               <p>Chromosome 1 trisomy compromises the virulence of <it>Candida albicans</it>.</p>
            </title>
            <aug>
               <au>
                  <snm>Chen</snm>
                  <fnm>X</fnm>
               </au>
               <au>
                  <snm>Magee</snm>
                  <fnm>BB</fnm>
               </au>
               <au>
                  <snm>Dawson</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Magee</snm>
                  <fnm>PT</fnm>
               </au>
               <au>
                  <snm>Kumamoto</snm>
                  <fnm>CA</fnm>
               </au>
            </aug>
            <source>Mol Microbiol</source>
            <pubdate>2004</pubdate>
            <volume>51</volume>
            <fpage>551</fpage>
            <lpage>65</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1046/j.1365-2958.2003.03852.x</pubid>
                  <pubid idtype="pmpid" link="fulltext">14756793</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B3">
            <title>
               <p>Comparative genome hybridization reveals widespread aneuploidy in <it>Candida albicans </it>laboratory strains.</p>
            </title>
            <aug>
               <au>
                  <snm>Selmecki</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Bergmann</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Berman</snm>
                  <fnm>J</fnm>
               </au>
            </aug>
            <source>Mol Microbiol</source>
            <pubdate>2005</pubdate>
            <volume>55</volume>
            <fpage>1553</fpage>
            <lpage>1565</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1111/j.1365-2958.2005.04492.x</pubid>
                  <pubid idtype="pmpid" link="fulltext">15720560</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B4">
            <title>
               <p>Effect of the major repeat sequence on chromosome loss in <it>Candida albicans</it>.</p>
            </title>
            <aug>
               <au>
                  <snm>Lephart</snm>
                  <fnm>PR</fnm>
               </au>
               <au>
                  <snm>Chibana</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Magee</snm>
                  <fnm>PT</fnm>
               </au>
            </aug>
            <source>Eukaryot Cell</source>
            <pubdate>2005</pubdate>
            <volume>4</volume>
            <fpage>733</fpage>
            <lpage>741</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1087809</pubid>
                  <pubid idtype="pmpid" link="fulltext">15821133</pubid>
                  <pubid idtype="doi">10.1128/EC.4.4.733-741.2005</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B5">
            <title>
               <p>Aneuploidy and isochromosome formation in drug-resistant <it>Candida albicans</it>.</p>
            </title>
            <aug>
               <au>
                  <snm>Selmecki</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Forche</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Berman</snm>
                  <fnm>J</fnm>
               </au>
            </aug>
            <source>Science</source>
            <pubdate>2006</pubdate>
            <volume>313</volume>
            <fpage>367</fpage>
            <lpage>370</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1717021</pubid>
                  <pubid idtype="pmpid" link="fulltext">16857942</pubid>
                  <pubid idtype="doi">10.1126/science.1128242</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B6">
            <title>
               <p>The diploid genome sequence of <it>Candida albicans</it>.</p>
            </title>
            <aug>
               <au>
                  <snm>Jones</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Federspiel</snm>
                  <fnm>NA</fnm>
               </au>
               <au>
                  <snm>Chibana</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Dungan</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Kalman</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Magee</snm>
                  <fnm>BB</fnm>
               </au>
               <au>
                  <snm>Newport</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Thorstenson</snm>
                  <fnm>YR</fnm>
               </au>
               <au>
                  <snm>Agabian</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Magee</snm>
                  <fnm>PT</fnm>
               </au>
               <etal/>
            </aug>
            <source>Proc Natl Acad Sci USA</source>
            <pubdate>2004</pubdate>
            <volume>101</volume>
            <fpage>7329</fpage>
            <lpage>7334</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">409918</pubid>
                  <pubid idtype="pmpid" link="fulltext">15123810</pubid>
                  <pubid idtype="doi">10.1073/pnas.0401648101</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B7">
            <title>
               <p>The long hard road to a completed <it>Candida albicans </it>genome.</p>
            </title>
            <aug>
               <au>
                  <snm>Nantel</snm>
                  <fnm>A</fnm>
               </au>
            </aug>
            <source>Fungal Genet Biol</source>
            <pubdate>2006</pubdate>
            <volume>43</volume>
            <fpage>311</fpage>
            <lpage>315</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/j.fgb.2006.01.002</pubid>
                  <pubid idtype="pmpid" link="fulltext">16517185</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B8">
            <title>
               <p>A physical map of Chromosome 7 of <it>Candida albicans</it>.</p>
            </title>
            <aug>
               <au>
                  <snm>Chibana</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Magee</snm>
                  <fnm>BB</fnm>
               </au>
               <au>
                  <snm>Grindle</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Ran</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Scherer</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Magee</snm>
                  <fnm>PT</fnm>
               </au>
            </aug>
            <source>Genetics</source>
            <pubdate>1998</pubdate>
            <volume>149</volume>
            <fpage>1739</fpage>
            <lpage>1752</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1460290</pubid>
                  <pubid idtype="pmpid" link="fulltext">9691033</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B9">
            <title>
               <p>DNA array studies demonstrate convergent regulation of virulence factors by Cph1, Cph2, and Efg1 in <it>Candida albicans</it>.</p>
            </title>
            <aug>
               <au>
                  <snm>Lane</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Birse</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Zhou</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Matson</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Liu</snm>
                  <fnm>H</fnm>
               </au>
            </aug>
            <source>J Biol Chem</source>
            <pubdate>2001</pubdate>
            <volume>276</volume>
            <fpage>48988</fpage>
            <lpage>48996</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1074/jbc.M104484200</pubid>
                  <pubid idtype="pmpid" link="fulltext">11595734</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B10">
            <title>
               <p>Transcription profiling of <it>Candida albicans </it>cells undergoing the yeast-to-hyphal transition.</p>
            </title>
            <aug>
               <au>
                  <snm>Nantel</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Dignard</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Bachewich</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Harcus</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Marcil</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Bouin</snm>
                  <fnm>AP</fnm>
               </au>
               <au>
                  <snm>Sensen</snm>
                  <fnm>CW</fnm>
               </au>
               <au>
                  <snm>Hogues</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>van het Hoog</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Gordon</snm>
                  <fnm>P</fnm>
               </au>
               <etal/>
            </aug>
            <source>Mol Biol Cell</source>
            <pubdate>2002</pubdate>
            <volume>13</volume>
            <fpage>3452</fpage>
            <lpage>3465</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">129958</pubid>
                  <pubid idtype="pmpid" link="fulltext">12388749</pubid>
                  <pubid idtype="doi">10.1091/mbc.E02-05-0272</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B11">
            <title>
               <p>Haploinsufficiency-based large-scale forward genetic analysis of filamentous growth in the diploid human fungal pathogen <it>C. albicans</it>.</p>
            </title>
            <aug>
               <au>
                  <snm>Uhl</snm>
                  <fnm>MA</fnm>
               </au>
               <au>
                  <snm>Biery</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Craig</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Johnson</snm>
                  <fnm>AD</fnm>
               </au>
            </aug>
            <source>EMBO J</source>
            <pubdate>2003</pubdate>
            <volume>22</volume>
            <fpage>2668</fpage>
            <lpage>2678</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">156753</pubid>
                  <pubid idtype="pmpid" link="fulltext">12773383</pubid>
                  <pubid idtype="doi">10.1093/emboj/cdg256</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B12">
            <title>
               <p><it>Candida albicans </it>proteinases: resolving the mystery of a gene family.</p>
            </title>
            <aug>
               <au>
                  <snm>Hube</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Naglik</snm>
                  <fnm>J</fnm>
               </au>
            </aug>
            <source>Microbiology</source>
            <pubdate>2001</pubdate>
            <volume>147</volume>
            <fpage>1997</fpage>
            <lpage>2005</lpage>
            <xrefbib>
               <pubid idtype="pmpid" link="fulltext">11495978</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B13">
            <title>
               <p>The ALS5 gene of <it>Candida albicans </it>and analysis of the Als5p N-terminal domain.</p>
            </title>
            <aug>
               <au>
                  <snm>Hoyer</snm>
                  <fnm>LL</fnm>
               </au>
               <au>
                  <snm>Hecht</snm>
                  <fnm>JE</fnm>
               </au>
            </aug>
            <source>Yeast</source>
            <pubdate>2001</pubdate>
            <volume>18</volume>
            <fpage>49</fpage>
            <lpage>60</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1002/1097-0061(200101)18:1&lt;49::AID-YEA646>3.0.CO;2-M</pubid>
                  <pubid idtype="pmpid" link="fulltext">11124701</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B14">
            <title>
               <p>Functional analysis of the phospholipase C gene CaPLC1 and two unusual phospholipase C genes, CaPLC2 and CaPLC3, of <it>Candida albicans</it>.</p>
            </title>
            <aug>
               <au>
                  <snm>Kunze</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Melzer</snm>
                  <fnm>I</fnm>
               </au>
               <au>
                  <snm>Bennett</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Sanglard</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>MacCallum</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Norskau</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Coleman</snm>
                  <fnm>DC</fnm>
               </au>
               <au>
                  <snm>Odds</snm>
                  <fnm>FC</fnm>
               </au>
               <au>
                  <snm>Schafer</snm>
                  <fnm>W</fnm>
               </au>
               <au>
                  <snm>Hube</snm>
                  <fnm>B</fnm>
               </au>
            </aug>
            <source>Microbiology</source>
            <pubdate>2005</pubdate>
            <volume>151</volume>
            <fpage>3381</fpage>
            <lpage>3394</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1099/mic.0.28353-0</pubid>
                  <pubid idtype="pmpid" link="fulltext">16207920</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B15">
            <title>
               <p>Phospholipase B enzyme expression is not associated with other virulence attributes in <it>Candida albicans </it>isolates from patients with human immunodeficiency virus infection.</p>
            </title>
            <aug>
               <au>
                  <snm>Samaranayake</snm>
                  <fnm>YH</fnm>
               </au>
               <au>
                  <snm>Dassanayake</snm>
                  <fnm>RS</fnm>
               </au>
               <au>
                  <snm>Jayatilake</snm>
                  <fnm>JA</fnm>
               </au>
               <au>
                  <snm>Cheung</snm>
                  <fnm>BP</fnm>
               </au>
               <au>
                  <snm>Yau</snm>
                  <fnm>JY</fnm>
               </au>
               <au>
                  <snm>Yeung</snm>
                  <fnm>KW</fnm>
               </au>
               <au>
                  <snm>Samaranayake</snm>
                  <fnm>LP</fnm>
               </au>
            </aug>
            <source>J Med Microbiol</source>
            <pubdate>2005</pubdate>
            <volume>54</volume>
            <fpage>583</fpage>
            <lpage>593</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1099/jmm.0.45762-0</pubid>
                  <pubid idtype="pmpid" link="fulltext">15888468</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B16">
            <title>
               <p>Large-scale essential gene identification in <it>Candida albicans </it>and applications to antifungal drug discovery.</p>
            </title>
            <aug>
               <au>
                  <snm>Roemer</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Jiang</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Davison</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Ketela</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Veillette</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Breton</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Tandia</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Linteau</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Sillaots</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Marta</snm>
                  <fnm>C</fnm>
               </au>
               <etal/>
            </aug>
            <source>Mol Microbiol</source>
            <pubdate>2003</pubdate>
            <volume>50</volume>
            <fpage>167</fpage>
            <lpage>181</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1046/j.1365-2958.2003.03697.x</pubid>
                  <pubid idtype="pmpid" link="fulltext">14507372</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B17">
            <title>
               <p>Large-scale gene function analysis in <it>Candida albicans</it>.</p>
            </title>
            <aug>
               <au>
                  <snm>Bruno</snm>
                  <fnm>VM</fnm>
               </au>
               <au>
                  <snm>Mitchell</snm>
                  <fnm>AP</fnm>
               </au>
            </aug>
            <source>Trends Microbiol</source>
            <pubdate>2004</pubdate>
            <volume>12</volume>
            <fpage>157</fpage>
            <lpage>161</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/j.tim.2004.02.002</pubid>
                  <pubid idtype="pmpid" link="fulltext">15051065</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B18">
            <title>
               <p>Repetitive sequences (RPSs) in the chromosomes of <it>Candida albicans </it>are sandwiched between two novel stretches, HOK and RB2, common to each chromosome.</p>
            </title>
            <aug>
               <au>
                  <snm>Chindamporn</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Nakagawa</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Mizuguchi</snm>
                  <fnm>I</fnm>
               </au>
               <au>
                  <snm>Chibana</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Doi</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Tanaka</snm>
                  <fnm>K</fnm>
               </au>
            </aug>
            <source>Microbiology</source>
            <pubdate>1998</pubdate>
            <volume>144</volume>
            <fpage>849</fpage>
            <lpage>857</lpage>
            <xrefbib>
               <pubid idtype="pmpid" link="fulltext">9579060</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B19">
            <title>
               <p>Isolation, characterization, and sequencing of <it>Candida albicans </it>repetitive sequence element 2.</p>
            </title>
            <aug>
               <au>
                  <snm>Lasker</snm>
                  <fnm>BA</fnm>
               </au>
               <au>
                  <snm>Page</snm>
                  <fnm>LS</fnm>
               </au>
               <au>
                  <snm>Lot</snm>
                  <fnm>TJ</fnm>
               </au>
               <au>
                  <snm>Kobayashi</snm>
                  <fnm>GS</fnm>
               </au>
            </aug>
            <source>Gene</source>
            <pubdate>1992</pubdate>
            <volume>116</volume>
            <fpage>51</fpage>
            <lpage>57</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/0378-1119(92)90628-3</pubid>
                  <pubid idtype="pmpid" link="fulltext">1628844</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B20">
            <title>
               <p>Identification, characterization and sequence of <it>Candida albicans </it>repetitive DNAs Rel-1 and Rel-2.</p>
            </title>
            <aug>
               <au>
                  <snm>Thrash-Bingham</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Gorman</snm>
                  <fnm>JA</fnm>
               </au>
            </aug>
            <source>Curr Genet</source>
            <pubdate>1993</pubdate>
            <volume>23</volume>
            <fpage>455</fpage>
            <lpage>462</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1007/BF00312634</pubid>
                  <pubid idtype="pmpid">8319302</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B21">
            <title>
               <p>Sequence finishing and gene mapping for <it>Candida albicans </it>chromosome 7, and syntenic analysis against <it>Saccharomyces cerevisiae </it>genome.</p>
            </title>
            <aug>
               <au>
                  <snm>Chibana</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Oka</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Nakayama</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Aoyama</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Magee</snm>
                  <fnm>BB</fnm>
               </au>
               <au>
                  <snm>Magee</snm>
                  <fnm>PT</fnm>
               </au>
               <au>
                  <snm>Mikami</snm>
                  <fnm>Y</fnm>
               </au>
            </aug>
            <source>Genetics</source>
            <pubdate>2005</pubdate>
            <volume>170</volume>
            <fpage>1525</fpage>
            <lpage>1537</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1449773</pubid>
                  <pubid idtype="pmpid" link="fulltext">15937140</pubid>
                  <pubid idtype="doi">10.1534/genetics.104.034652</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B22">
            <title>
               <p>Centromeric DNA sequences in the pathogenic yeast <it>Candida albicans </it>are all different and unique.</p>
            </title>
            <aug>
               <au>
                  <snm>Sanyal</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Baum</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Carbon</snm>
                  <fnm>J</fnm>
               </au>
            </aug>
            <source>Proc Natl Acad Sci USA</source>
            <pubdate>2004</pubdate>
            <volume>101</volume>
            <fpage>11374</fpage>
            <lpage>11379</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">509209</pubid>
                  <pubid idtype="pmpid" link="fulltext">15272074</pubid>
                  <pubid idtype="doi">10.1073/pnas.0404318101</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B23">
            <title>
               <p>Isolation and characterization of a repeated sequence (RPS1) of <it>Candida albicans</it>.</p>
            </title>
            <aug>
               <au>
                  <snm>Iwaguchi</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Homma</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Chibana</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Tanaka</snm>
                  <fnm>K</fnm>
               </au>
            </aug>
            <source>J Gen Microbiol</source>
            <pubdate>1992</pubdate>
            <volume>138</volume>
            <fpage>1893</fpage>
            <lpage>1900</lpage>
            <xrefbib>
               <pubid idtype="pmpid">1402790</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B24">
            <title>
               <p>Diversity of tandemly repetitive sequences due to short periodic repetitions in the chromosomes of <it>Candida albicans</it>.</p>
            </title>
            <aug>
               <au>
                  <snm>Chibana</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Iwaguchi</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Homma</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Chindamporn</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Nakagawa</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Tanaka</snm>
                  <fnm>K</fnm>
               </au>
            </aug>
            <source>J Bacteriol</source>
            <pubdate>1994</pubdate>
            <volume>176</volume>
            <fpage>3851</fpage>
            <lpage>3858</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">205581</pubid>
                  <pubid idtype="pmpid" link="fulltext">8021166</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B25">
            <title>
               <p>Analysis of the chromosomal localization of the repetitive sequences (RPSs) in <it>Candida albicans</it>.</p>
            </title>
            <aug>
               <au>
                  <snm>Chindamporn</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Nakagawa</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Homma</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Chibana</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Doi</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Tanaka</snm>
                  <fnm>K</fnm>
               </au>
            </aug>
            <source>Microbiology</source>
            <pubdate>1995</pubdate>
            <volume>141</volume>
            <fpage>469</fpage>
            <lpage>476</lpage>
            <xrefbib>
               <pubid idtype="pmpid">7704277</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B26">
            <title>
               <p>Multiple LTR-retrotransposon families in the asexual yeast <it>Candida albicans</it>.</p>
            </title>
            <aug>
               <au>
                  <snm>Goodwin</snm>
                  <fnm>TJ</fnm>
               </au>
               <au>
                  <snm>Poulter</snm>
                  <fnm>RT</fnm>
               </au>
            </aug>
            <source>Genome Res</source>
            <pubdate>2000</pubdate>
            <volume>10</volume>
            <fpage>174</fpage>
            <lpage>191</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1101/gr.10.2.174</pubid>
                  <pubid idtype="pmpid" link="fulltext">10673276</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B27">
            <title>
               <p>Identification of a gene encoding the pyruvate decarboxylase gene regulator CaPdc2p from <it>Candida albicans</it>.</p>
            </title>
            <aug>
               <au>
                  <snm>Kaiser</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Munder</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Saluz</snm>
                  <fnm>HP</fnm>
               </au>
               <au>
                  <snm>Kunkel</snm>
                  <fnm>W</fnm>
               </au>
               <au>
                  <snm>Eck</snm>
                  <fnm>R</fnm>
               </au>
            </aug>
            <source>Yeast</source>
            <pubdate>1999</pubdate>
            <volume>15</volume>
            <fpage>585</fpage>
            <lpage>591</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1002/(SICI)1097-0061(199905)15:7&lt;585::AID-YEA401>3.0.CO;2-9</pubid>
                  <pubid idtype="pmpid" link="fulltext">10341421</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B28">
            <title>
               <p>A human-curated annotation of the <it>Candida albicans </it>genome.</p>
            </title>
            <aug>
               <au>
                  <snm>Braun</snm>
                  <fnm>BR</fnm>
               </au>
               <au>
                  <snm>van het Hoog</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>d'Enfert</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Martchenko</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Dungan</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Kuo</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Inglis</snm>
                  <fnm>DO</fnm>
               </au>
               <au>
                  <snm>Uhl</snm>
                  <fnm>MA</fnm>
               </au>
               <au>
                  <snm>Hogues</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Berriman</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Lorenz</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Levitin</snm>
                  <fnm>A</fnm>
               </au>
               <etal/>
            </aug>
            <source>PLOS Genet</source>
            <pubdate>2005</pubdate>
            <volume>1</volume>
            <fpage>36</fpage>
            <lpage>57</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1183520</pubid>
                  <pubid idtype="pmpid" link="fulltext">16103911</pubid>
                  <pubid idtype="doi">10.1371/journal.pgen.0010001</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B29">
            <title>
               <p>Complete inventory of ABC proteins in human pathogenic yeast, <it>Candida albicans</it>.</p>
            </title>
            <aug>
               <au>
                  <snm>Gaur</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Choudhury</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Prasad</snm>
                  <fnm>R</fnm>
               </au>
            </aug>
            <source>J Mol Microbiol Biotechnol</source>
            <pubdate>2005</pubdate>
            <volume>9</volume>
            <fpage>3</fpage>
            <lpage>15</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1159/000088141</pubid>
                  <pubid idtype="pmpid">16254441</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B30">
            <title>
               <p>The closely related species <it>Candida albicans </it>and <it>Candida dubliniensis </it>can mate.</p>
            </title>
            <aug>
               <au>
                  <snm>Pujol</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Daniels</snm>
                  <fnm>KJ</fnm>
               </au>
               <au>
                  <snm>Lockhart</snm>
                  <fnm>SR</fnm>
               </au>
               <au>
                  <snm>Srikantha</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Radke</snm>
                  <fnm>JB</fnm>
               </au>
               <au>
                  <snm>Geiger</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Soll</snm>
                  <fnm>DR</fnm>
               </au>
            </aug>
            <source>Eukaryot Cell</source>
            <pubdate>2004</pubdate>
            <volume>3</volume>
            <fpage>1015</fpage>
            <lpage>10127</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">500882</pubid>
                  <pubid idtype="pmpid" link="fulltext">15302834</pubid>
                  <pubid idtype="doi">10.1128/EC.3.4.1015-1027.2004</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B31">
            <title>
               <p>Comparative genomics using <it>Candida albicans </it>DNA microarrays reveals absence and divergence of virulence-associated genes in <it>Candida dubliniensis</it>.</p>
            </title>
            <aug>
               <au>
                  <snm>Moran</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Stokes</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Thewes</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Hube</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Coleman</snm>
                  <fnm>DC</fnm>
               </au>
               <au>
                  <snm>Sullivan</snm>
                  <fnm>D</fnm>
               </au>
            </aug>
            <source>Microbiology</source>
            <pubdate>2004</pubdate>
            <volume>150</volume>
            <fpage>3363</fpage>
            <lpage>3382</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1099/mic.0.27221-0</pubid>
                  <pubid idtype="pmpid" link="fulltext">15470115</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B32">
            <title>
               <p>The CARE-2 and rel-2 repetitive elements of <it>Candida albicans </it>contain LTR fragments of a new retrotransposon.</p>
            </title>
            <aug>
               <au>
                  <snm>Goodwin</snm>
                  <fnm>TJ</fnm>
               </au>
               <au>
                  <snm>Poulter</snm>
                  <fnm>RT</fnm>
               </au>
            </aug>
            <source>Gene</source>
            <pubdate>1998</pubdate>
            <volume>218</volume>
            <fpage>85</fpage>
            <lpage>93</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/S0378-1119(98)00362-X</pubid>
                  <pubid idtype="pmpid">9751806</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B33">
            <title>
               <p>Secreted lipases of <it>Candida albicans</it>: cloning, characterisation and expression analysis of a new gene family with at least ten members.</p>
            </title>
            <aug>
               <au>
                  <snm>Hube</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Stehr</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Bossenz</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Mazur</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Kretschmar</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Schafer</snm>
                  <fnm>W</fnm>
               </au>
            </aug>
            <source>Arch Microbiol</source>
            <pubdate>2000</pubdate>
            <volume>174</volume>
            <fpage>362</fpage>
            <lpage>374</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1007/s002030000218</pubid>
                  <pubid idtype="pmpid" link="fulltext">11131027</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B34">
            <title>
               <p>Telomere-telomere recombination is an efficient bypass pathway for telomere maintenance in <it>Saccharomyces cerevisiae</it>.</p>
            </title>
            <aug>
               <au>
                  <snm>Teng</snm>
                  <fnm>SC</fnm>
               </au>
               <au>
                  <snm>Zakian</snm>
                  <fnm>VA</fnm>
               </au>
            </aug>
            <source>Mol Cell Biol</source>
            <pubdate>1999</pubdate>
            <volume>19</volume>
            <fpage>8083</fpage>
            <lpage>8093</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">84893</pubid>
                  <pubid idtype="pmpid" link="fulltext">10567534</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B35">
            <title>
               <p>Homologous recombination in <it>Candida albicans</it>: role of CaRad52p in DNA repair, integration of linear DNA fragments and telomere length.</p>
            </title>
            <aug>
               <au>
                  <snm>Ciudad</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Andaluz</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Steinberg-Neifach</snm>
                  <fnm>O</fnm>
               </au>
               <au>
                  <snm>Lue</snm>
                  <fnm>NF</fnm>
               </au>
               <au>
                  <snm>Gow</snm>
                  <fnm>NA</fnm>
               </au>
               <au>
                  <snm>Calderone</snm>
                  <fnm>RA</fnm>
               </au>
               <au>
                  <snm>Larriba</snm>
                  <fnm>G</fnm>
               </au>
            </aug>
            <source>Mol Microbiol</source>
            <pubdate>2004</pubdate>
            <volume>53</volume>
            <fpage>1177</fpage>
            <lpage>1194</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1111/j.1365-2958.2004.04197.x</pubid>
                  <pubid idtype="pmpid" link="fulltext">15306020</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B36">
            <title>
               <p>Virulence-related surface glycoproteins in the yeast pathogen <it>Candida glabrata </it>are encoded in subtelomeric clusters and subject to RAP1- and SIR-dependent transcriptional silencing.</p>
            </title>
            <aug>
               <au>
                  <snm>De Las Penas</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Pan</snm>
                  <fnm>SJ</fnm>
               </au>
               <au>
                  <snm>Castano</snm>
                  <fnm>I</fnm>
               </au>
               <au>
                  <snm>Alder</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Cregg</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Cormack</snm>
                  <fnm>BP</fnm>
               </au>
            </aug>
            <source>Genes Dev</source>
            <pubdate>2003</pubdate>
            <volume>17</volume>
            <fpage>2245</fpage>
            <lpage>2258</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">196462</pubid>
                  <pubid idtype="pmpid" link="fulltext">12952896</pubid>
                  <pubid idtype="doi">10.1101/gad.1121003</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B37">
            <title>
               <p>Construction of an SfiI macrorestriction map of the <it>Candida albicans </it>genome.</p>
            </title>
            <aug>
               <au>
                  <snm>Chu</snm>
                  <fnm>WS</fnm>
               </au>
               <au>
                  <snm>Magee</snm>
                  <fnm>BB</fnm>
               </au>
               <au>
                  <snm>Magee</snm>
                  <fnm>PT</fnm>
               </au>
            </aug>
            <source>J Bacteriol</source>
            <pubdate>1993</pubdate>
            <volume>175</volume>
            <fpage>6637</fpage>
            <lpage>6651</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">206775</pubid>
                  <pubid idtype="pmpid" link="fulltext">8407841</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B38">
            <title>
               <p>Occurrence of ploidy shift in a strain of the imperfect yeast <it>Candida albicans</it>.</p>
            </title>
            <aug>
               <au>
                  <snm>Suzuki</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Kanbe</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Kuroiwa</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Tanaka</snm>
                  <fnm>K</fnm>
               </au>
            </aug>
            <source>J Gen Microbiol</source>
            <pubdate>1986</pubdate>
            <volume>132</volume>
            <fpage>443</fpage>
            <lpage>453</lpage>
            <xrefbib>
               <pubid idtype="pmpid">3519859</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B39">
            <title>
               <p>Ca-19 Chromosome Assignment</p>
            </title>
            <url>http://candida.bri.nrc.ca/index.cfm?page=CaChrom</url>
         </bibl>
         <bibl id="B40">
            <title>
               <p>Broad Institute <it>C. albicans </it>Sequencing Project</p>
            </title>
            <url>http://www.broad.mit.edu/annotation/genome/candida_albicans/Info.html</url>
         </bibl>
         <bibl id="B41">
            <title>
               <p>Sequencher 4.7</p>
            </title>
            <url>http://www.sequencher.com</url>
         </bibl>
         <bibl id="B42">
            <title>
               <p>Gene isolation by complementation in <it>Candida albicans </it>and applications to physical and genetic mapping.</p>
            </title>
            <aug>
               <au>
                  <snm>Goshorn</snm>
                  <fnm>AK</fnm>
               </au>
               <au>
                  <snm>Grindle</snm>
                  <fnm>SM</fnm>
               </au>
               <au>
                  <snm>Scherer</snm>
                  <fnm>S</fnm>
               </au>
            </aug>
            <source>Infect Immun</source>
            <pubdate>1992</pubdate>
            <volume>60</volume>
            <fpage>876</fpage>
            <lpage>884</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">257568</pubid>
                  <pubid idtype="pmpid" link="fulltext">1541560</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B43">
            <title>
               <p>Stable propagation of cosmid sized human DNA inserts in an F factor based vector.</p>
            </title>
            <aug>
               <au>
                  <snm>Kim</snm>
                  <fnm>UJ</fnm>
               </au>
               <au>
                  <snm>Shizuya</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>de Jong</snm>
                  <fnm>PJ</fnm>
               </au>
               <au>
                  <snm>Birren</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Simon</snm>
                  <fnm>MI</fnm>
               </au>
            </aug>
            <source>Nucleic Acids Res</source>
            <pubdate>1992</pubdate>
            <volume>20</volume>
            <fpage>1083</fpage>
            <lpage>1085</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">312094</pubid>
                  <pubid idtype="pmpid" link="fulltext">1549470</pubid>
                  <pubid idtype="doi">10.1093/nar/20.5.1083</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B44">
            <title>
               <p>A system of rapid isolation of end-DNA from a small amount of fosmid DNA, with vector-based PCR for chromosome walking.</p>
            </title>
            <aug>
               <au>
                  <snm>Chibana</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Heinecke</snm>
                  <fnm>EL</fnm>
               </au>
               <au>
                  <snm>Beckerman</snm>
                  <fnm>JL</fnm>
               </au>
               <au>
                  <snm>Magee</snm>
                  <fnm>PT</fnm>
               </au>
            </aug>
            <source>Genome</source>
            <pubdate>2001</pubdate>
            <volume>44</volume>
            <fpage>305</fpage>
            <lpage>308</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1139/gen-44-2-305</pubid>
                  <pubid idtype="pmpid" link="fulltext">11341742</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B45">
            <title>
               <p><it>C. albicans </it>Physical Map</p>
            </title>
            <url>http://albicansmap.ahc.umn.edu/</url>
         </bibl>
         <bibl id="B46">
            <title>
               <p>OpGen Technologies</p>
            </title>
            <url>http://www.opgen.com/</url>
         </bibl>
         <bibl id="B47">
            <title>
               <p>CG Database</p>
            </title>
            <url>http://www.candidagenome.org</url>
         </bibl>
         <bibl id="B48">
            <title>
               <p>Green Group</p>
            </title>
            <url>http://www.phrap.org</url>
         </bibl>
      </refgrp>
   </bm>
</art>

