<?xml version='1.0'?>
<!DOCTYPE art SYSTEM 'http://www.biomedcentral.com/xml/article.dtd'>
<art>
   <ui>gb-2010-11-1-r1</ui>
   <ji>GBJ</ji>
   <fm>
      <dochead>Research</dochead>
      <bibl>
         <title>
            <p>Genomic characterization of the <it>Yersinia </it>genus</p>
         </title>
         <aug>
            <au id="A1" ce="yes"><snm>Chen</snm><mi>E</mi><fnm>Peter</fnm><insr iid="I1"/><email>peter.chen@med.navy.mil</email></au>
            <au id="A2" ce="yes"><snm>Cook</snm><fnm>Christopher</fnm><insr iid="I1"/><email>christopher.cook@med.navy.mil</email></au>
            <au id="A3" ce="yes"><snm>Stewart</snm><mi>C</mi><fnm>Andrew</fnm><insr iid="I1"/><email>andrew.stewart@med.navy.mil</email></au>
            <au id="A4"><snm>Nagarajan</snm><fnm>Niranjan</fnm><insr iid="I2"/><insr iid="I7"/><email>ninagarajann@gis.a-star.edu.sg</email></au>
            <au id="A5"><snm>Sommer</snm><mi>D</mi><fnm>Dan</fnm><insr iid="I2"/><email>dsommer@umiacs.umd.edu</email></au>
            <au id="A6"><snm>Pop</snm><fnm>Mihai</fnm><insr iid="I2"/><email>mpop@umiacs.umd.edu</email></au>
            <au id="A7"><snm>Thomason</snm><fnm>Brendan</fnm><insr iid="I1"/><email>bthomason@afmic.detrick.army.mil</email></au>
            <au id="A8"><snm>Thomason</snm><mnm>P Kiley</mnm><fnm>Maureen</fnm><insr iid="I1"/><email>kileyma@mail.nih.gov</email></au>
            <au id="A9"><snm>Lentz</snm><fnm>Shannon</fnm><insr iid="I1"/><email>scourtney12@gmail.com</email></au>
            <au id="A10"><snm>Nolan</snm><fnm>Nichole</fnm><insr iid="I1"/><email>nichole.nolan@med.navy.mil</email></au>
            <au id="A11"><snm>Sozhamannan</snm><fnm>Shanmuga</fnm><insr iid="I1"/><email>shanmuga.sozhamannan@med.navy.mil</email></au>
            <au id="A12"><snm>Sulakvelidze</snm><fnm>Alexander</fnm><insr iid="I3"/><email>asulakvelidze@ufl.edu</email></au>
            <au id="A13"><snm>Mateczun</snm><fnm>Alfred</fnm><insr iid="I1"/><email>alfred.mateczun@med.navy.mil</email></au>
            <au id="A14"><snm>Du</snm><fnm>Lei</fnm><insr iid="I4"/><email>lei.du@roche.com</email></au>
            <au id="A15"><snm>Zwick</snm><mi>E</mi><fnm>Michael</fnm><insr iid="I1"/><insr iid="I5"/><email>mzwick@emory.edu</email></au>
            <au ca="yes" id="A16"><snm>Read</snm><mi>D</mi><fnm>Timothy</fnm><insr iid="I1"/><insr iid="I5"/><insr iid="I6"/><email>tread@emory.edu</email></au>
         </aug>
         <insg>
            <ins id="I1"><p>Biological Defense Research Directorate, Naval Medical Research Center, 503 Robert Grant Avenue, Silver Spring, Maryland 20910, USA</p></ins>
            <ins id="I2"><p>University of Maryland Institute for Advanced Computer Sciences, Center for Bioinformatics and Computational Biology, University of Maryland, College Park, Maryland 20742, USA</p></ins>
            <ins id="I3"><p>Emerging Pathogens Institute and Department of Molecular Genetics and Microbiology, University of Florida College of Medicine, Gainesville, Florida 32610, USA</p></ins>
            <ins id="I4"><p>454 Life Sciences Inc., 15 Commercial Street, Branford, Connecticut 06405, USA</p></ins>
            <ins id="I5"><p>Department of Human Genetics, Emory University School of Medicine, 615 Michael Street, Atlanta, Georgia 30322, USA</p></ins>
            <ins id="I6"><p>Division of Infectious Diseases, Emory University School of Medicine, 615 Michael Street, Atlanta, Georgia 30322, USA</p></ins>
            <ins id="I7"><p>Current address: Computational and Mathematical Biology, Genome Institute of Singapore, Singapore-127726</p></ins>
         </insg>
         <source>Genome Biology</source>
         <issn>1465-6906</issn>
         <pubdate>2010</pubdate>
         <volume>11</volume>
         <issue>1</issue>
         <fpage>R1</fpage>
         <url>http://genomebiology.com/2010/11/1/R1</url>
         <xrefbib><pubidlist><pubid idtype="pmpid">20047673</pubid><pubid idtype="doi">10.1186/gb-2010-11-1-r1</pubid></pubidlist></xrefbib>
      </bibl>
      <history><rec><date><day>23</day><month>5</month><year>2009</year></date></rec><revrec><date><day>7</day><month>10</month><year>2009</year></date></revrec><acc><date><day>4</day><month>1</month><year>2010</year></date></acc><pub><date><day>4</day><month>1</month><year>2010</year></date></pub></history>
      <cpyrt><year>2010</year><collab>Chen et al.; licensee BioMed Central Ltd.</collab><note>This is an open access article distributed under the terms of the Creative Commons Attribution License (<url>http://creativecommons.org/licenses/by/2.0</url>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</note></cpyrt>
      <shorttitle>
         <p><it>Yersinia </it>genomics</p>
      </shorttitle>
      <shortabs>
         <p>Comparative <it>Yersinia </it>genomics identifies features responsible for the colonization of specific host habitats and the horizontal transfer of virulence determinants.</p>
      </shortabs>
      <abs>
         <sec>
            <st>
               <p>Abstract</p>
            </st>
            <sec>
               <st>
                  <p>Background</p>
               </st>
               <p>New DNA sequencing technologies have enabled detailed comparative genomic analyses of entire genera of bacterial pathogens. Prior to this study, three species of the enterobacterial genus <it>Yersinia </it>that cause invasive human diseases (<it>Yersinia pestis</it>, <it>Yersinia pseudotuberculosis</it>, and <it>Yersinia enterocolitica</it>) had been sequenced. However, there were no genomic data on the <it>Yersinia </it>species with more limited virulence potential, frequently found in soil and water environments.</p>
            </sec>
            <sec>
               <st>
                  <p>Results</p>
               </st>
               <p>We used high-throughput sequencing-by-synthesis instruments to obtain 25- to 42-fold average redundancy, whole-genome shotgun data from the type strains of eight species: <it>Y. aldovae</it>, <it>Y. bercovieri</it>, <it>Y. frederiksenii</it>, <it>Y. kristensenii</it>, <it>Y. intermedia</it>, <it>Y. mollaretii, Y. rohdei</it>, and <it>Y. ruckeri</it>. The deepest branching species in the genus, <it>Y. ruckeri</it>, causative agent of red mouth disease in fish, has the smallest genome (3.7 Mb), although it shares the same core set of approximately 2,500 genes as the other members of the species, whose genomes range in size from 4.3 to 4.8 Mb. <it>Yersinia </it>genomes had a similar global partition of protein functions, as measured by the distribution of Cluster of Orthologous Groups families. Genome to genome variation in islands with genes encoding functions such as ureases, hydrogeneases and B-12 cofactor metabolite reactions may reflect adaptations to colonizing specific host habitats.</p>
            </sec>
            <sec>
               <st>
                  <p>Conclusions</p>
               </st>
               <p>Rapid high-quality draft sequencing was used successfully to compare pathogenic and non-pathogenic members of the <it>Yersinia </it>genus. This work underscores the importance of the acquisition of horizontally transferred genes in the evolution of <it>Y. pestis </it>and points to virulence determinants that have been gained and lost on multiple occasions in the history of the genus.</p>
            </sec>
         </sec>
      </abs>
   </fm>
   <meta>
      <classifications>
         <classification id="30010008" subtype="man_spc_id" type="BMC">Evolution</classification>
         <classification id="300100010" subtype="man_spc_id" type="BMC">Genome studies</classification>
         <classification id="300100014" subtype="man_spc_id" type="BMC">Microbiology and parasitology</classification>
      </classifications>
   </meta>
   <bdy>
      <sec>
         <st>
            <p>Background</p>
         </st>
         <p>Of the millions of species of bacteria that live on this planet, only a very small percentage cause serious human diseases <abbrgrp><abbr bid="B1">1</abbr></abbrgrp>. Comparative genetic studies are revealing that many pathogens have only recently emerged from protean environmental, commensal or zoonotic populations <abbrgrp><abbr bid="B2">2</abbr><abbr bid="B3">3</abbr><abbr bid="B4">4</abbr><abbr bid="B5">5</abbr></abbrgrp>. For a variety of reasons, most research effort has been focused on characterizing these pathogens, while their closely related non-pathogenic relatives have only been lightly studied. As a result, our understanding of the population biology of these clades remains biased, limiting our knowledge of the evolution of virulence and our ability to design reliable assays that distinguish pathogen signatures from the background in the clinic and environment <abbrgrp><abbr bid="B6">6</abbr></abbrgrp>.</p>
         <p>The recent development of second generation sequencing platforms (reviewed by Mardis <abbrgrp><abbr bid="B7">7</abbr><abbr bid="B8">8</abbr></abbrgrp> and Shendure <abbrgrp><abbr bid="B7">7</abbr><abbr bid="B8">8</abbr></abbrgrp>) offers an opportunity to change the direction of microbial genomics, enabling the rapid genome sequencing of large numbers of strains of both pathogenic and non-pathogenic strains. Here we describe the deployment of new sequencing technology to extensively sample eight genomes from the <it>Yersinia </it>genus of the family Enterobacteriaceae. The first published sequencing studies on the <it>Yersinia </it>genus have focused exclusively on invasive human disease-causing species that included five <it>Yersinia pestis </it>genome sequences (one of which, strain 91001, is from the avirulent 'microtus' biovar) <abbrgrp><abbr bid="B9">9</abbr><abbr bid="B10">10</abbr><abbr bid="B11">11</abbr><abbr bid="B12">12</abbr></abbrgrp>, two <it>Yersinia pseudotuberculosis </it><abbrgrp><abbr bid="B13">13</abbr><abbr bid="B14">14</abbr></abbrgrp> and one <it>Yersinia enterocolitica </it>biotype 1B <abbrgrp><abbr bid="B15">15</abbr></abbrgrp>. Primarily a zoonotic pathogen, <it>Y. pestis</it>, the causative agent of bubonic plague and a category A select agent, is a recently emerged lineage that has since undergone global expansion <abbrgrp><abbr bid="B2">2</abbr></abbrgrp>. Following introduction into a human through flea bite <abbrgrp><abbr bid="B16">16</abbr></abbrgrp>, <it>Y. pestis </it>is engulfed by macrophages and taken to the regional lymph nodes. <it>Y. pestis </it>then escapes the macrophages and multiplies to cause a highly lethal bacteremia if untreated with antibiotics. <it>Y. pseudotuberculosis </it>and <it>Y. enterocolitica </it>(primarily biotype 1B) are enteropathogens that cause gastroenteritis following ingestion and translocation of the Peyer's patches. Like <it>Y. pestis</it>, the enteropathogenic Yersiniae can escape macrophages and multiply outside host cells, but unlike their more virulent cogener, they only usually cause self-limiting inflammatory diseases.</p>
         <p>The generally accepted pathway for the evolution of these more severe disease-causing Yersiniae is memorably encapsulated by the recipe, 'add DNA, stir, reduce' <abbrgrp><abbr bid="B17">17</abbr></abbrgrp>. In each species DNA has been 'added' by horizontal gene transfer in the form of plasmids and genomic islands. All three human pathogens carry a 70-kb pYV virulence plasmid (also known as pCD), which carries the Ysc type III secretion system and Yops effectors <abbrgrp><abbr bid="B18">18</abbr><abbr bid="B19">19</abbr><abbr bid="B20">20</abbr></abbrgrp>, that is not detected in non-pathogenic species. <it>Y. pestis </it>also has two additional plasmids, pMT (also known as pFra), containing the F1 capsule-like antigen and murine toxin, and pPla (also known as pPCP1), which carries plasminogen-activating factor, Pla. <it>Y. pestis</it>, <it>Y. pseudotuberculosis</it>, and biotype 1B <it>Y. enterocolitica </it>also contain a chromosomally located, mobile, high-pathogenicity island (HPI) <abbrgrp><abbr bid="B21">21</abbr></abbrgrp>. The HPI includes a cluster of genes for biosynthesis of yersiniabactin, an iron-binding siderophore necessary for systemic infection <abbrgrp><abbr bid="B22">22</abbr></abbrgrp>. 'Stir' refers to intra-genomic change, notably the recent expansion of insertion sequences (IS) within <it>Y. pestis </it>(3.7% of the <it>Y. pestis </it>CO92 genome <abbrgrp><abbr bid="B9">9</abbr></abbrgrp>) and a high level of genome structural variation <abbrgrp><abbr bid="B23">23</abbr></abbrgrp>. 'Reduce' describes the loss of functions via deletions and pseudogene accumulation in <it>Y. pestis </it><abbrgrp><abbr bid="B9">9</abbr><abbr bid="B13">13</abbr></abbrgrp> due to shifts in selection pressure caused by the transition from <it>Y. pseudotuberculosis</it>-like enteropathogenicity to a flea-borne transmission cycle. This description of <it>Y. pestis </it>evolution is, of course, oversimplified. <it>Y. pestis </it>strains show considerable diversity at the phenotypic level and there is evidence of acquisition of plasmids and other horizontally transferred genes [<abbrgrp><abbr bid="B12">12</abbr><abbr bid="B24">24</abbr><abbr bid="B25">25</abbr></abbrgrp> DNA microarray, <abbrgrp><abbr bid="B26">26</abbr><abbr bid="B27">27</abbr></abbrgrp>].</p>
         <p>While most attention is focused on the three well-known human pathogens, several other, less familiar <it>Yersinia </it>species have been split off from <it>Y. enterocolitica </it>over the past 40 years based on biochemistry, serology and 16S RNA sequence <abbrgrp><abbr bid="B28">28</abbr><abbr bid="B29">29</abbr></abbrgrp>. <it>Y. ruckeri </it>is an agriculturally important fish pathogen that is a cause of 'red mouth' disease in salmonid fish. The species has sufficient phylogenetic divergence from the rest of the <it>Yersinia </it>genus to stir controversy about its taxonomic assignment <abbrgrp><abbr bid="B30">30</abbr></abbrgrp>. <it>Y. fredricksenii</it>, <it>Y. kristensenii</it>, <it>Y. intermedia</it>, <it>Y. mollaretii</it>, <it>Y. bercovieri</it>, and <it>Y. rohdei </it>have been isolated from human feces, fresh water, animal feces and intestines and foods <abbrgrp><abbr bid="B28">28</abbr></abbrgrp>. There have been reports associating some of the species with human diarrheal infections <abbrgrp><abbr bid="B31">31</abbr></abbrgrp> and lethality for mice <abbrgrp><abbr bid="B32">32</abbr></abbrgrp>. <it>Y. aldovae </it>is most often isolated from fresh water but has also been cultured from fish and the alimentary tracts of wild rodents <abbrgrp><abbr bid="B33">33</abbr></abbrgrp>. There is no report of isolation of <it>Y. aldovae </it>from human feces or urine <abbrgrp><abbr bid="B28">28</abbr></abbrgrp>.</p>
         <p>Using microbead-based, massively parallel sequencing by synthesis <abbrgrp><abbr bid="B34">34</abbr></abbrgrp> we rapidly and economically obtained high redundancy genome sequence of the type strains of each of these eight lesser known <it>Yersinia </it>species. From these genome sequences, we were able to determine the core gene set that defines the <it>Yersinia </it>genus and to look for clues to distinguish the genomes of human pathogens from less virulent strains.</p>
      </sec>
      <sec>
         <st>
            <p>Results</p>
         </st>
         <sec>
            <st>
               <p>High-redundancy draft genome sequences of eight <it>Yersinia </it>species</p>
            </st>
            <p>Whole genome shotgun coverage of eight previously unsequenced <it>Yersinia </it>species (Table <tblr tid="T1">1</tblr>) was obtained by single-end bead-based pyrosequencing <abbrgrp><abbr bid="B34">34</abbr></abbrgrp> using the 454 Life Sciences GS-20 instrument. Each of the eight genomes was sequenced to a high level of redundancy (between 25 and 44 sequencing reads per base) and assembled <it>de novo </it>into large contigs (Table <tblr tid="T2">2</tblr>; Additional file <supplr sid="S1">1</supplr>). Excluding contigs that covered repeat regions and therefore had significantly increased copy number, the quality of the sequence of the draft assemblies was high, with less than 0.1% of the sequence of each genome having a consensus quality score <abbrgrp><abbr bid="B35">35</abbr></abbrgrp> less than 40. Moreover, a more recent assessment of quality of GS-20 data suggests that the scores generated by the 454 Life Sciences software are an underestimation of the true sequence quality <abbrgrp><abbr bid="B36">36</abbr></abbrgrp>. The most common sequencing error encountered when assembling pyrosequencing data is the rare calling of incorrect numbers of homopolymers caused by variation in the intensity of fluorescence emitted upon extension with the labeled nucleoside <abbrgrp><abbr bid="B34">34</abbr></abbrgrp>.</p>
            <tbl id="T1"><title><p>Table 1</p></title><caption><p>Strains sequenced in this study</p></caption><tblbdy cols="8">
      <r>
         <c ca="left">
            <p>
               <b>Species</b>
            </p>
         </c>
         <c ca="left">
            <p>
               <b>ATCC number</b>
            </p>
         </c>
         <c ca="left">
            <p>
               <b>Other designations</b>
            </p>
         </c>
         <c ca="center">
            <p>
               <b>Year isolated</b>
            </p>
         </c>
         <c ca="left">
            <p>
               <b>Location isolated</b>
            </p>
         </c>
         <c ca="left">
            <p>
               <b>Description</b>
            </p>
         </c>
         <c ca="center">
            <p>
               <b>Optimum growth temperature</b>
            </p>
         </c>
         <c ca="center">
            <p>
               <b>Reference</b>
            </p>
         </c>
      </r>
      <r>
         <c cspan="8">
            <hr/>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. aldovae</it>
            </p>
         </c>
         <c ca="left">
            <p>35236T</p>
         </c>
         <c ca="left">
            <p>CNY 6065</p>
         </c>
         <c ca="center">
            <p>NR</p>
         </c>
         <c ca="left">
            <p>Czechoslovakia</p>
         </c>
         <c ca="left">
            <p>Drinking water</p>
         </c>
         <c ca="center">
            <p>26&#176;C</p>
         </c>
         <c ca="center">
            <p>
               <abbrgrp>
                  <abbr bid="B100">100</abbr>
               </abbrgrp>
            </p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. bercovieri</it>
            </p>
         </c>
         <c ca="left">
            <p>43970T</p>
         </c>
         <c ca="left">
            <p>CDC 2475-87</p>
         </c>
         <c ca="center">
            <p>NR</p>
         </c>
         <c ca="left">
            <p>France</p>
         </c>
         <c ca="left">
            <p>Human stool</p>
         </c>
         <c ca="center">
            <p>26&#176;C</p>
         </c>
         <c ca="center">
            <p>
               <abbrgrp>
                  <abbr bid="B101">101</abbr>
               </abbrgrp>
            </p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. frederiksenii</it>
            </p>
         </c>
         <c ca="left">
            <p>33641T</p>
         </c>
         <c ca="left">
            <p>CDC 1461-81, CIP 80-29</p>
         </c>
         <c ca="center">
            <p>NR</p>
         </c>
         <c ca="left">
            <p>Denmark</p>
         </c>
         <c ca="left">
            <p>Sewage</p>
         </c>
         <c ca="center">
            <p>26&#176;C</p>
         </c>
         <c ca="center">
            <p>
               <abbrgrp>
                  <abbr bid="B102">102</abbr>
               </abbrgrp>
            </p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. intermedia</it>
            </p>
         </c>
         <c ca="left">
            <p>29909T</p>
         </c>
         <c ca="left">
            <p>CIP 80-28</p>
         </c>
         <c ca="center">
            <p>NR</p>
         </c>
         <c ca="left">
            <p>NR</p>
         </c>
         <c ca="left">
            <p>Human urine</p>
         </c>
         <c ca="center">
            <p>37&#176;C</p>
         </c>
         <c ca="center">
            <p>
               <abbrgrp>
                  <abbr bid="B103">103</abbr>
               </abbrgrp>
            </p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. kristensenii</it>
            </p>
         </c>
         <c ca="left">
            <p>33638T</p>
         </c>
         <c ca="left">
            <p>CIP 80-30</p>
         </c>
         <c ca="center">
            <p>NR</p>
         </c>
         <c ca="left">
            <p>NR</p>
         </c>
         <c ca="left">
            <p>Human urine</p>
         </c>
         <c ca="center">
            <p>26&#176;C</p>
         </c>
         <c ca="center">
            <p>
               <abbrgrp>
                  <abbr bid="B104">104</abbr>
               </abbrgrp>
            </p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. mollaretii</it>
            </p>
         </c>
         <c ca="left">
            <p>43969T</p>
         </c>
         <c ca="left">
            <p>CDC 2465-87</p>
         </c>
         <c ca="center">
            <p>NR</p>
         </c>
         <c ca="left">
            <p>USA</p>
         </c>
         <c ca="left">
            <p>Soil</p>
         </c>
         <c ca="center">
            <p>26&#176;C</p>
         </c>
         <c ca="center">
            <p>
               <abbrgrp>
                  <abbr bid="B101">101</abbr>
               </abbrgrp>
            </p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. rohdei</it>
            </p>
         </c>
         <c ca="left">
            <p>43380T</p>
         </c>
         <c ca="left">
            <p>H271-36/78, CDC 3022-85</p>
         </c>
         <c ca="center">
            <p>1978</p>
         </c>
         <c ca="left">
            <p>Germany</p>
         </c>
         <c ca="left">
            <p>Dog feces</p>
         </c>
         <c ca="center">
            <p>26&#176;C</p>
         </c>
         <c ca="center">
            <p>
               <abbrgrp>
                  <abbr bid="B105">105</abbr>
               </abbrgrp>
            </p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. ruckeri</it>
            </p>
         </c>
         <c ca="left">
            <p>29473T</p>
         </c>
         <c ca="left">
            <p>2396-61</p>
         </c>
         <c ca="center">
            <p>1961</p>
         </c>
         <c ca="left">
            <p>Idaho, USA</p>
         </c>
         <c ca="left">
            <p>Rainbow trout (<it>Oncorhynchus mykiss</it>) with red mouth disease</p>
         </c>
         <c ca="center">
            <p>26&#176;C</p>
         </c>
         <c ca="center">
            <p>
               <abbrgrp>
                  <abbr bid="B67">67</abbr>
               </abbrgrp>
            </p>
         </c>
      </r>
   </tblbdy><tblfn>
      <p>NR, not reported in reference publication.</p>
   </tblfn></tbl>
            <tbl id="T2"><title><p>Table 2</p></title><caption><p>Genomes summary</p></caption><tblbdy cols="9">
      <r>
         <c ca="left">
            <p>
               <b>Species</b>
            </p>
         </c>
         <c ca="left">
            <p>
               <b>Type strain</b>
            </p>
         </c>
         <c ca="left">
            <p>
               <b>NCBI project ID</b>
            </p>
         </c>
         <c ca="left">
            <p>
               <b>GenBank accession number</b>
            </p>
         </c>
         <c ca="center">
            <p>
               <b>Total reads</b>
            </p>
         </c>
         <c ca="center">
            <p>
               <b>Number of contigs >500 nt</b>
            </p>
         </c>
         <c ca="center">
            <p>
               <b>Total length of large contigs</b>
            </p>
         </c>
         <c ca="center">
            <p>
               <b>% large contigs &lt;Q40</b>
            </p>
         </c>
         <c ca="center">
            <p>
               <b>Number of contigs aligned to chromosomal scaffold</b>
            </p>
         </c>
      </r>
      <r>
         <c cspan="9">
            <hr/>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. rohdei</it>
            </p>
         </c>
         <c ca="left">
            <p>ATCC_43380</p>
         </c>
         <c ca="left">
            <p>29767</p>
         </c>
         <c ca="left">
            <p>[Genbank:<ext-link ext-link-type="gen" ext-link-id="ACCD00000000">ACCD00000000</ext-link>]</p>
         </c>
         <c ca="center">
            <p>991,106</p>
         </c>
         <c ca="center">
            <p>83</p>
         </c>
         <c ca="center">
            <p>4,303,720</p>
         </c>
         <c ca="center">
            <p>0.11</p>
         </c>
         <c ca="center">
            <p>60</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. ruckeri</it>
            </p>
         </c>
         <c ca="left">
            <p>ATCC_29473</p>
         </c>
         <c ca="left">
            <p>29769</p>
         </c>
         <c ca="left">
            <p>[Genbank:<ext-link ext-link-type="gen" ext-link-id="ACCC00000000">ACCC00000000</ext-link>]</p>
         </c>
         <c ca="center">
            <p>1,347,304</p>
         </c>
         <c ca="center">
            <p>103</p>
         </c>
         <c ca="center">
            <p>3,716,658</p>
         </c>
         <c ca="center">
            <p>0.004</p>
         </c>
         <c ca="center">
            <p>68</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. aldovae</it>
            </p>
         </c>
         <c ca="left">
            <p>ATCC_35236</p>
         </c>
         <c ca="left">
            <p>29741</p>
         </c>
         <c ca="left">
            <p>[Genbank:<ext-link ext-link-type="gen" ext-link-id="ACCB00000000">ACCB00000000</ext-link>]</p>
         </c>
         <c ca="center">
            <p>1,125,002</p>
         </c>
         <c ca="center">
            <p>104</p>
         </c>
         <c ca="center">
            <p>4,277,123</p>
         </c>
         <c ca="center">
            <p>0.006</p>
         </c>
         <c ca="center">
            <p>60</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. kristensenii</it>
            </p>
         </c>
         <c ca="left">
            <p>ATCC_33638</p>
         </c>
         <c ca="left">
            <p>29761</p>
         </c>
         <c ca="left">
            <p>[Genbank:<ext-link ext-link-type="gen" ext-link-id="ACCA00000000">ACCA00000000</ext-link>]</p>
         </c>
         <c ca="center">
            <p>1,374,452</p>
         </c>
         <c ca="center">
            <p>86</p>
         </c>
         <c ca="center">
            <p>4,637,246</p>
         </c>
         <c ca="center">
            <p>0.003</p>
         </c>
         <c ca="center">
            <p>63</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. intermedia</it>
            </p>
         </c>
         <c ca="left">
            <p>ATCC_29909</p>
         </c>
         <c ca="left">
            <p>29755</p>
         </c>
         <c ca="left">
            <p>[Genbank:<ext-link ext-link-type="gen" ext-link-id="AALF00000000">AALF00000000</ext-link>]</p>
         </c>
         <c ca="center">
            <p>1,768,909</p>
         </c>
         <c ca="center">
            <p>74</p>
         </c>
         <c ca="center">
            <p>4,684,150</p>
         </c>
         <c ca="center">
            <p>0.003</p>
         </c>
         <c ca="center">
            <p>68</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. frederiksenii</it>
            </p>
         </c>
         <c ca="left">
            <p>ATCC_33641</p>
         </c>
         <c ca="left">
            <p>29743</p>
         </c>
         <c ca="left">
            <p>[Genbank:<ext-link ext-link-type="gen" ext-link-id="AALE00000000">AALE00000000</ext-link>]</p>
         </c>
         <c ca="center">
            <p>1,504,985</p>
         </c>
         <c ca="center">
            <p>90</p>
         </c>
         <c ca="center">
            <p>4,864,031</p>
         </c>
         <c ca="center">
            <p>0.005</p>
         </c>
         <c ca="center">
            <p>56</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. mollaretii</it>
            </p>
         </c>
         <c ca="left">
            <p>ATCC_43969</p>
         </c>
         <c ca="left">
            <p>16105</p>
         </c>
         <c ca="left">
            <p>[Genbank:<ext-link ext-link-type="gen" ext-link-id="AALD00000000">AALD00000000</ext-link>]</p>
         </c>
         <c ca="center">
            <p>1,825,876</p>
         </c>
         <c ca="center">
            <p>110</p>
         </c>
         <c ca="center">
            <p>4,535,932</p>
         </c>
         <c ca="center">
            <p>0.003</p>
         </c>
         <c ca="center">
            <p>80</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. bercovieri</it>
            </p>
         </c>
         <c ca="left">
            <p>ATCC_43970</p>
         </c>
         <c ca="left">
            <p>16104</p>
         </c>
         <c ca="left">
            <p>[Genbank:<ext-link ext-link-type="gen" ext-link-id="AALC00000000">AALC00000000</ext-link>]</p>
         </c>
         <c ca="center">
            <p>1,263,275</p>
         </c>
         <c ca="center">
            <p>144</p>
         </c>
         <c ca="center">
            <p>4,316,521</p>
         </c>
         <c ca="center">
            <p>0.006</p>
         </c>
         <c ca="center">
            <p>91</p>
         </c>
      </r>
   </tblbdy></tbl>
			<suppl id="S1">
            <title>
               <p>Additional file 1</p>
            </title>
            <caption>
               <p>Statistics from DIYA and frameshift detection programs on eight genomes sequenced in this study and other enterobacterial genomes from NCBI</p>
            </caption>
            <text>
               <p>Statistics from running DIYA <abbrgrp><abbr bid="B89">89</abbr></abbrgrp> and frameshift detection programs on the eight genomes sequenced in this study and various other enterobacterial genomes downloaded from NCBI.</p>
            </text>
            <file name="gb-2010-11-1-r1-S1.xls">
   <p>Click here for file</p>
</file>
			</suppl>
			<p>Previous studies and our experience suggest that at this level of sequence coverage the assembly gaps fall in repeat regions that cannot be spanned by single-end sequence reads (average length 109 nucleotides in this study) <abbrgrp><abbr bid="B34">34</abbr></abbrgrp>. Fewer RNA genes are observed compared to published <it>Yersinia </it>genomes finished using traditional Sanger sequencing technology (Additional file <supplr sid="S1">1</supplr>), reflecting the greater difficulty of uniquely assembling repetitive sequences with single-end reads. We assessed the quality of our assemblies using metrics implemented in the <it>amosvalidate </it>package <abbrgrp><abbr bid="B37">37</abbr></abbrgrp>. Specifically, we focused on three measures frequently correlated with assembly errors: density of polymorphisms within assembled reads, depth of coverage, and breakpoints in the alignment of unassembled reads to the final assembly. Regions in each genome where at least one measure suggested a possible mis-assembly were validated by manual inspection (Additional file <supplr sid="S2">2</supplr>). Many of the suspect regions corresponded to collapsed repeats, where the location of individual members of the repeat family within the genome could not be accurately determined. Based on the results of the <it>amosvalidate </it>analysis and the optical map alignment we found no evidence of mis-assemblies leading to chimeric contigs in the eight genomes we sequenced. Genomic regions flagged by the <it>amosvalidate </it>package are made available in GFF format (compatible with most genome browsers) in Additional file <supplr sid="S3">3</supplr>.</p>
			<suppl id="S2">
            <title>
               <p>Additional file 2</p>
            </title>
            <caption>
               <p>Results of <it>amosvalidate </it>analysis on the eight genomes of this study</p>
            </caption>
            <text>
               <p>Results of <it>amosvalidate </it><abbrgrp><abbr bid="B37">37</abbr></abbrgrp> analysis on the eight genomes of this study.</p>
            </text>
            <file name="gb-2010-11-1-r1-S2.doc">
   <p>Click here for file</p>
</file>
         </suppl>
		 <suppl id="S3">
            <title>
               <p>Additional file 3</p>
            </title>
            <caption>
               <p>Additional annotation files</p>
            </caption>
            <text>
               <p>These consist of ISfinder <abbrgrp><abbr bid="B40">40</abbr></abbrgrp>, RepeatScout <abbrgrp><abbr bid="B41">41</abbr></abbrgrp>and <it>amosvalidate </it><abbrgrp><abbr bid="B37">37</abbr></abbrgrp> results (GFF format); repeats found by RepeatScout in fasta format, scaffold files (NCBI AGP format); and information about length of contigs, read count, estimated repeat number, count in scaffold and whether or not the contig was placed by SOMA <abbrgrp><abbr bid="B39">39</abbr></abbrgrp>.</p>
            </text>
            <file name="gb-2010-11-1-r1-S3.gz">
   <p>Click here for file</p>
</file>
         </suppl>
            <p>Genome sizes were estimated initially as the sum of the sizes of the contigs from the shotgun assembly, with corrections for contigs representing collapsed repeats (Table <tblr tid="T2">2</tblr>). We also derived an independent estimate for the genome size from the whole-genome optical restriction mapping of the species <abbrgrp><abbr bid="B38">38</abbr></abbrgrp> (Additional file <supplr sid="S4">4</supplr>). Alignment of contigs to the optical maps <abbrgrp><abbr bid="B39">39</abbr></abbrgrp> suggested that the optical maps consistently overestimated sizes (2 to 10% on average). After correction, the map-based estimates and sequence-based estimates agreed well (within 7%). Two species, <it>Y. aldovae </it>(4.22 to 4.33 Mbp) and <it>Y. ruckeri </it>(3.58 to 3.89 Mbp), have a substantially reduced total genome size compared with the 4.6 to 4.8 Mbp seen in the genus generally. The agreement between the optical maps and sequence-based estimates of genome sizes tallied with experimental evidence for the lack of large plasmids in the sequenced genomes (Additional file <supplr sid="S5">5</supplr>). A screen for matches to known plasmid genes produced only a few candidate plasmid contigs, totaling less than 10 kbp of sequence in each genome.</p>
		<suppl id="S4">
            <title>
               <p>Additional file 4</p>
            </title>
            <caption>
               <p>Estimates for genome sizes (in Mbp) based on optical map data</p>
            </caption>
            <text>
               <p>Estimates for genome sizes (in Mbp) based on optical map data.</p>
            </text>
            <file name="gb-2010-11-1-r1-S4.doc">
   <p>Click here for file</p>
</file>
         </suppl>
         <suppl id="S5">
            <title>
               <p>Additional file 5</p>
            </title>
            <caption>
               <p>Pulsed field gel analysis of the eight sequenced <it>Yersinia </it>species and failure to detect plasmids</p>
            </caption>
            <text>
               <p>An <it>E. coli </it>strain with known plasmids was a positive control.</p>
            </text>
            <file name="gb-2010-11-1-r1-S5.doc">
   <p>Click here for file</p>
</file>
         </suppl>	
            <p>The number of IS elements per genome for the eight species (12 to 167 matches) discovered using the IS finder database <abbrgrp><abbr bid="B40">40</abbr></abbrgrp> was much lower than in the <it>Y. pestis </it>genome (1,147 matches; copy numbers estimates took into account the possibility of mis-assembly and were accordingly adjusted; see Methods). Furthermore, the non-pathogenic species with the most IS matches, namely <it>Y. bercovieri </it>(167 matches), <it>Y. aldovae </it>(143 matches) and <it>Y. ruckeri </it>(136 matches), have comparatively smaller genomes. We also searched for novel repeat families using a <it>de novo </it>repeat-finder <abbrgrp><abbr bid="B41">41</abbr></abbrgrp> and collected a non-redundant set of 44 repeat sequence families in the <it>Yersinia </it>genus (Table <tblr tid="T3">3</tblr>; Additional file <supplr sid="S6">6</supplr>). Interestingly, the well-known ERIC element <abbrgrp><abbr bid="B42">42</abbr></abbrgrp> was recovered by our <it>de novo </it>search and was found to be present in many copies in all the pathogenic species, but was relatively rare in the non-pathogenic ones. On the other hand, a similar and recently discovered element, YPAL <abbrgrp><abbr bid="B43">43</abbr></abbrgrp> (also recovered by the <it>de novo </it>search), was abundant in all the <it>Yersinia </it>genomes except the fish pathogen <it>Y. ruckeri</it>. Insertion sequence IS1541C in the IS finder database, which has expanded in <it>Y. pestis </it>(to more than 60 copies), had only a handful of strong matches in <it>Y. enterocolitica</it>, <it>Y. pseudotuberculosis</it>, and <it>Y. bercovieri </it>and no discernable matches in the other <it>Yersinia </it>genomes.</p>
            <tbl id="T3"><title><p>Table 3</p></title><caption><p>Distribution of common repeat sequences</p></caption><tblbdy cols="6">
      <r>
         <c>
            <p/>
         </c>
         <c ca="right">
            <p>
               <b>ERIC</b>
            </p>
            <p>
               <b>(127 bp)</b>
            </p>
         </c>
         <c ca="right">
            <p>
               <b>YPAL</b>
            </p>
            <p>
               <b>(167 bp)</b>
            </p>
         </c>
         <c ca="right">
            <p>
               <b><it>Kristensenii </it>39</b>
            </p>
            <p>
               <b>(142 bp)</b>
            </p>
         </c>
         <c ca="right">
            <p>
               <b>IS1541C</b>
            </p>
            <p>
               <b>(708 bp)</b>
            </p>
         </c>
         <c ca="right">
            <p>
               <b>Aldovae3</b>
            </p>
            <p>
               <b>(154 bp)</b>
            </p>
         </c>
      </r>
      <r>
         <c cspan="6">
            <hr/>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>E. coli</it>
            </p>
         </c>
         <c ca="right">
            <p>0</p>
         </c>
         <c ca="right">
            <p>3</p>
         </c>
         <c ca="right">
            <p>5</p>
         </c>
         <c ca="right">
            <p>0</p>
         </c>
         <c ca="right">
            <p>5</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. pestis</it>
            </p>
         </c>
         <c ca="right">
            <p>54</p>
         </c>
         <c ca="right">
            <p>43</p>
         </c>
         <c ca="right">
            <p>33</p>
         </c>
         <c ca="right">
            <p>61</p>
         </c>
         <c ca="right">
            <p>38</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. pseudotuberculosis</it>
            </p>
         </c>
         <c ca="right">
            <p>55</p>
         </c>
         <c ca="right">
            <p>52</p>
         </c>
         <c ca="right">
            <p>29</p>
         </c>
         <c ca="right">
            <p>5</p>
         </c>
         <c ca="right">
            <p>36</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. enterocolitica</it>
            </p>
         </c>
         <c ca="right">
            <p>63</p>
         </c>
         <c ca="right">
            <p>144</p>
         </c>
         <c ca="right">
            <p>100</p>
         </c>
         <c ca="right">
            <p>3</p>
         </c>
         <c ca="right">
            <p>75</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. aldovae</it>
            </p>
         </c>
         <c ca="right">
            <p>6</p>
         </c>
         <c ca="right">
            <p>84</p>
         </c>
         <c ca="right">
            <p>46</p>
         </c>
         <c ca="right">
            <p>0</p>
         </c>
         <c ca="right">
            <p>40</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. bercovieri</it>
            </p>
         </c>
         <c ca="right">
            <p>9</p>
         </c>
         <c ca="right">
            <p>45</p>
         </c>
         <c ca="right">
            <p>6</p>
         </c>
         <c ca="right">
            <p>9</p>
         </c>
         <c ca="right">
            <p>13</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. frederiksenii</it>
            </p>
         </c>
         <c ca="right">
            <p>0</p>
         </c>
         <c ca="right">
            <p>57</p>
         </c>
         <c ca="right">
            <p>6</p>
         </c>
         <c ca="right">
            <p>0</p>
         </c>
         <c ca="right">
            <p>5</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. intermedia</it>
            </p>
         </c>
         <c ca="right">
            <p>2</p>
         </c>
         <c ca="right">
            <p>91</p>
         </c>
         <c ca="right">
            <p>48</p>
         </c>
         <c ca="right">
            <p>0</p>
         </c>
         <c ca="right">
            <p>43</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. kristensenii</it>
            </p>
         </c>
         <c ca="right">
            <p>2</p>
         </c>
         <c ca="right">
            <p>99</p>
         </c>
         <c ca="right">
            <p>70</p>
         </c>
         <c ca="right">
            <p>0</p>
         </c>
         <c ca="right">
            <p>59</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. mollaretii</it>
            </p>
         </c>
         <c ca="right">
            <p>6</p>
         </c>
         <c ca="right">
            <p>62</p>
         </c>
         <c ca="right">
            <p>26</p>
         </c>
         <c ca="right">
            <p>0</p>
         </c>
         <c ca="right">
            <p>20</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. rohdei</it>
            </p>
         </c>
         <c ca="right">
            <p>0</p>
         </c>
         <c ca="right">
            <p>37</p>
         </c>
         <c ca="right">
            <p>8</p>
         </c>
         <c ca="right">
            <p>0</p>
         </c>
         <c ca="right">
            <p>7</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. ruckeri</it>
            </p>
         </c>
         <c ca="right">
            <p>45</p>
         </c>
         <c ca="right">
            <p>2</p>
         </c>
         <c ca="right">
            <p>0</p>
         </c>
         <c ca="right">
            <p>0</p>
         </c>
         <c ca="right">
            <p>2</p>
         </c>
      </r>
   </tblbdy><tblfn>
      <p>Three of the repeat sequences found using <it>de novo </it>searches matched the known repeat elements ERIC, YPAL, and IS1541C and are identified as such. <it>Kristensenii</it>39 and Aldovae3 are elements found from <it>de novo </it>searches in the <it>Y. kristensenii </it>and <it>Y. aldovae </it>genomes, respectively.</p>
   </tblfn></tbl>
		<suppl id="S6">
            <title>
               <p>Additional file 6</p>
            </title>
            <caption>
               <p>Sequences of the detected repeat families</p>
            </caption>
            <text>
               <p>Sequences of the detected repeat families.</p>
            </text>
            <file name="gb-2010-11-1-r1-S6.txt">
   <p>Click here for file</p>
</file>
         </suppl>	
         </sec>
         <sec>
            <st>
               <p>New <it>Yersinia </it>genome data reduce the pool of unique detection targets for <it>Y. pestis </it>and <it>Y. enterocolitica</it></p>
            </st>
            <p>The sequences generated in this study provide new background information for validating genus detection and diagnosis assays targeting pathogenic members of the <it>Yersinia </it>genus. The assay design process commonly starts by computationally identifying genomic regions that are unique to the targeted genus ('signatures') - an ideal signature is shared by all targeted pathogens but not found in a background comprising non-pathogenic near neighbors or in other unrelated microbes. While many pathogens are well characterized at the genomic level, the background set is only sparsely represented in genomic databases, thereby limiting the ability to computationally screen out non-specific candidate assays (false positives). As a result, many assays may fail experimental field tests, thereby increasing the costs of assay development efforts. To evaluate whether the new genomic sequences generated in our study can reduce the incidence of false positives in assay development, we computed signatures for the <it>Y. pestis </it>and <it>Y. enterocolitica </it>genera using the Insignia pipeline <abbrgrp><abbr bid="B44">44</abbr></abbrgrp>, the system previously used to successfully develop assays for the detection of <it>V. cholerae </it><abbrgrp><abbr bid="B44">44</abbr></abbrgrp>. We identified 171 and 100 regions within the genomes of <it>Y. pestis </it>and <it>Y. enterocolitica</it>, respectively, that represent good candidates for the design of detection assays. In <it>Y. pestis </it>these regions tended to cluster around the origin of replication, whereas in <it>Y. enterocolitica </it>there was a more even distribution. The average G+C content of the regions for the unique sequences in both species was close to the <it>Yersinia </it>average (47%) and there was not a strong association with putative genome islands (Additional files <supplr sid="S7">7</supplr>, <supplr sid="S8">8</supplr>, <supplr sid="S9">9</supplr>, <supplr sid="S10">10</supplr>, <supplr sid="S11">11</supplr>, <supplr sid="S12">12</supplr>, <abbrgrp><abbr bid="B45">45</abbr></abbrgrp>). For both species, most regions overlapped predicted genes (161 of 171 (94%) and 96 of 100 (96%) in <it>Y. pestis </it>and <it>Y. pseudotuberculosis</it>, respectively). Interestingly, 171 <it>Y. pestis </it>gene regions were spread over only 70 different genes, whereas the 96 <it>Y. enterocolitica </it>regions were found overlapping only 90 genes. There was no obvious trend in the nature of the genes harboring these putative signals except that many could be arguably classed as 'non-core' functions, encoding phage endonucleases, invasins, hemolysins and hypothetical proteins.</p>
            <suppl id="S7">
            <title>
               <p>Additional file 7</p>
            </title>
            <caption>
               <p><it>Y. pestis </it>CO92 signatures longer than 100 bp computed by the Insignia pipeline</p>
            </caption>
            <text>
               <p><it>Y. pestis </it>CO92 signatures longer than 100 bp computed by the Insignia <abbrgrp><abbr bid="B44">44</abbr></abbrgrp> pipeline.</p>
            </text>
            <file name="gb-2010-11-1-r1-S7.txt">
   <p>Click here for file</p>
</file>
         </suppl>
         <suppl id="S8">
            <title>
               <p>Additional file 8</p>
            </title>
            <caption>
               <p>Sequences of the new genomes that match (that is, invalidate) the <it>Y. pestis </it>CO92 signatures listed in Additional file <supplr sid="S7">7</supplr></p>
            </caption>
            <text>
               <p>Sequences of the new genomes that match (that is, invalidate) the <it>Y. pestis </it>CO92 signatures listed in Additional file <supplr sid="S7">7</supplr>.</p>
            </text>
            <file name="gb-2010-11-1-r1-S8.txt">
   <p>Click here for file</p>
</file>
         </suppl>
         <suppl id="S9">
            <title>
               <p>Additional file 9</p>
            </title>
            <caption>
               <p><it>Y. enterocolitica </it>signatures longer than 100 bp computed by the Insignia pipeline</p>
            </caption>
            <text>
               <p><it>Y. enterocolitica </it>signatures longer than 100 bp computed by the Insignia pipeline.</p>
            </text>
            <file name="gb-2010-11-1-r1-S9.txt">
   <p>Click here for file</p>
</file>
         </suppl>
         <suppl id="S10">
            <title>
               <p>Additional file 10</p>
            </title>
            <caption>
               <p>Sequences of the new genomes that match (that is, invalidate) the <it>Y. enterocolitica </it>signatures</p>
            </caption>
            <text>
               <p>Sequences of the new genomes that match (that is, invalidate) the <it>Y. enterocolitica </it>signatures.</p>
            </text>
            <file name="gb-2010-11-1-r1-S10.txt">
   <p>Click here for file</p>
</file>
         </suppl>
         <suppl id="S11">
            <title>
               <p>Additional file 11</p>
            </title>
            <caption>
               <p><it>Y. pestis </it>genome with the Insiginia-indentified repeats and genome islands plotted</p>
            </caption>
            <text>
               <p><it>Y. pestis </it>genome with the Insiginia-indentified repeats and genome islands identified using IslandViewer <abbrgrp><abbr bid="B45">45</abbr></abbrgrp> plotted. The figure was created using DNAPlotter <abbrgrp><abbr bid="B106">106</abbr></abbrgrp>.</p>
            </text>
            <file name="gb-2010-11-1-r1-S11.png">
   <p>Click here for file</p>
</file>
         </suppl>
         <suppl id="S12">
            <title>
               <p>Additional file 12</p>
            </title>
            <caption>
               <p><it>Y. enterocolitica </it>genome with the Insiginia-indentified repeats and genome plotted</p>
            </caption>
            <text>
               <p><it>Y. enterocolitica </it>genome with the Insiginia-indentified repeats and genome islands identified using IslandViewer <abbrgrp><abbr bid="B45">45</abbr></abbrgrp> plotted. The figure was created using DNAPlotter <abbrgrp><abbr bid="B106">106</abbr></abbrgrp>.</p>
            </text>
            <file name="gb-2010-11-1-r1-S12.png">
   <p>Click here for file</p>
</file>
         </suppl>			
			<p>Ten <it>Y. pestis</it>-specific and 31 <it>Y. enterocolitica</it>-specific putative signatures have significant matches in the new genome sequence data (Additional files <supplr sid="S7">7</supplr>, <supplr sid="S8">8</supplr>, <supplr sid="S9">9</supplr>, <supplr sid="S10">10</supplr>), indicating assays designed within these regions would result in false positive results. This result underscores the need for a further sampling of genomes of the <it>Yersinia </it>genus in order to assist the design of diagnostic assays.</p>
         </sec>
         <sec>
            <st>
               <p><it>Yersinia </it>whole-genome comparisons</p>
            </st>
            <p>We performed a multiple alignment of the 11 <it>Yersinia </it>species using the MAUVE algorithm <abbrgrp><abbr bid="B46">46</abbr></abbrgrp> (from here on <it>Y. pestis </it>CO92 and <it>Y. pseudotuberculosis </it>IP32953 were used as the representative genomes of their species) and obtained 98 locally collinear blocks (LCBs; Additional files <supplr sid="S13">13</supplr>, <supplr sid="S14">14</supplr>, <abbrgrp><abbr bid="B47">47</abbr></abbrgrp>). The mean length of the LCBs was 23,891 bp. The shortest block was 1,570 bp, and the longest was 201,130 bp. This multiple alignment of the 'core' region on average covered 52% of each <it>Yersinia </it>genome. The nucleotide diversity (&#928;) for the concatenated aligned region was 0.27, or an approximate genus-wide nucleotide sequence homology of 73%. As expected for a set of bacteria with this level of diversity, the alignment of the genomes shows evidence of multiple large genome rearrangements <abbrgrp><abbr bid="B23">23</abbr></abbrgrp> (Additional file <supplr sid="S13">13</supplr>).</p>
		<suppl id="S13">
            <title>
               <p>Additional file 13</p>
            </title>
            <caption>
               <p>Output of the MAUVE <abbrgrp><abbr bid="B46">46</abbr></abbrgrp> alignment of 11 <it>Yersinia </it>species</p>
            </caption>
            <text>
               <p>The eight genomes sequenced in this study are represented as pseudocontigs, ordered by a combination of optical mapping and alignment to the closest completed reference genome.</p>
            </text>
            <file name="gb-2010-11-1-r1-S13.jpeg">
   <p>Click here for file</p>
</file>
         </suppl>
		 <suppl id="S14">
            <title>
               <p>Additional file 14</p>
            </title>
            <caption>
               <p>Whole genome multiple alignment produced by MAUVE of the 11 <it>Yersinia </it>genomes</p>
            </caption>
            <text>
               <p>Whole genome multiple alignment produced by MAUVE of the 11 <it>Yersinia </it>genomes in XMFA format <abbrgrp><abbr bid="B106">106</abbr></abbrgrp>.</p>
            </text>
            <file name="gb-2010-11-1-r1-S14.zip">
   <p>Click here for file</p>
</file>
         </suppl>
            <p>Using an automated pipeline for annotation and clustering of protein orthologs based on the Markov chain clustering tool MCL <abbrgrp><abbr bid="B48">48</abbr></abbrgrp>, we estimated the size of the <it>Yersinia </it>protein core set to be 2,497 and the pan-genome <abbrgrp><abbr bid="B49">49</abbr></abbrgrp> to be 27,470 (Additional files <supplr sid="S15">15</supplr>, <supplr sid="S16">16</supplr>, <supplr sid="S17">17</supplr>, <supplr sid="S18">18</supplr>). The core number falls asymptotically as genomes are introduced and hence this estimate is somewhat lower than the recent analysis of only the <it>Y. enterocolitica</it>, <it>Y. pseudotuberculosis </it>and <it>Y. pestis </it>genomes (2,747 core proteins) <abbrgrp><abbr bid="B15">15</abbr></abbrgrp>. We found 681 genes to be in exactly one copy in each <it>Yersinia </it>genome and to be nearly identical in length. We used ClustalW <abbrgrp><abbr bid="B50">50</abbr></abbrgrp> to align the members of this highly conserved set, and concatenated individual gene product alignments to make a dataset of 170,940 amino acids for each of the species. Uninformative characters were removed from the dataset and a phylogeny of the genus was computed using Phylip <abbrgrp><abbr bid="B51">51</abbr></abbrgrp> (Figure <figr fid="F1">1</figr>). The topology of this tree was identical whether distance or parsimony methods were used (Additional files <supplr sid="S19">19</supplr>, <supplr sid="S20">20</supplr>) and was also identical to a tree based on the nucleotide sequence of the approximately 1.5 Mb of the core genome in LCBs (see above). The genus broke down into three major clades: the outlying fish pathogen, <it>Y. ruckeri</it>; <it>Y. pestis</it>/<it>Y. pseudotuberculosis</it>; and the remainder of the 'enterocolitica'-like species. <it>Y. kristensenii </it>ATCC33638T was the nearest neighbor of <it>Y. enterocolitica </it>8081. The outlying position of <it>Y. ruckeri </it>was confirmed further when we analyzed the contribution of the genome to reducing the size of the <it>Yersinia </it>core protein families set. If <it>Y. ruckeri </it>was excluded, the <it>Yersinia </it>core would be 2,232 protein families of N = 2 rather than 2,072 (Table <tblr tid="T4">4</tblr>). In contrast, omission of any one of the 10 other species only reduced the set by a maximum of 22 families.</p>
			<tbl id="T4"><title><p>Table 4</p></title><caption><p><it>Yersinia </it>core size reduction by exclusion of one species</p></caption><tblbdy cols="2">
      <r>
         <c ca="left">
            <p>
               <b>Species excluded</b>
            </p>
         </c>
         <c ca="center">
            <p>
               <b>Core protein families</b>
            </p>
         </c>
      </r>
      <r>
         <c cspan="2">
            <hr/>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>None</p>
         </c>
         <c ca="center">
            <p>2,072</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. enterocolitica</it>
            </p>
         </c>
         <c ca="center">
            <p>2,074</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. aldovae</it>
            </p>
         </c>
         <c ca="center">
            <p>2,085</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. bercovieri</it>
            </p>
         </c>
         <c ca="center">
            <p>2,079</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. frederiksenii</it>
            </p>
         </c>
         <c ca="center">
            <p>2,077</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. intermedia</it>
            </p>
         </c>
         <c ca="center">
            <p>2,080</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. kristensenii</it>
            </p>
         </c>
         <c ca="center">
            <p>2,076</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. mollaretii</it>
            </p>
         </c>
         <c ca="center">
            <p>2,078</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. rohdei</it>
            </p>
         </c>
         <c ca="center">
            <p>2,091</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. ruckeri</it>
            </p>
         </c>
         <c ca="center">
            <p>2,232</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. pseudotuberculosis</it>
            </p>
         </c>
         <c ca="center">
            <p>2,076</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. pestis</it>
            </p>
         </c>
         <c ca="center">
            <p>2,094</p>
         </c>
      </r>
   </tblbdy><tblfn>
      <p>The core protein families with number of members 2 or greater were recalculated in each case (see Materials and methods) with the protein set from one genome missing.</p>
   </tblfn></tbl>
            <fig id="F1"><title><p>Figure 1</p></title><caption><p><it>Yersinia </it>whole-genome phylogeny</p></caption><text>
   <p><b><it>Yersinia </it>whole-genome phylogeny</b>. The phylogeny of the <it>Yersinia </it>genus was constructed from a dataset of 681 concatenated, conserved protein sequences using the Neighbor-Joining (NJ) algorithm implemented by PHYLIP <abbrgrp><abbr bid="B51">51</abbr></abbrgrp>. The tree was rooted using <it>E. coli</it>. The scale measures number of substitutions per residue. Tree topologies computed using maximum likelihood and parsimony estimates are identical with each other and the NJ tree (Additional file <supplr sid="S20">20</supplr>). The only branches not supported in more than 99% of the 1,000 bootstrap replicates using both methods are marked with asterisks. Both these branches were supported by >57% of replicates.</p>
</text><graphic file="gb-2010-11-1-r1-1"/></fig>
			<suppl id="S15">
            <title>
               <p>Additional file 15</p>
            </title>
            <caption>
               <p>Output of the cluster analysis of the 11 <it>Yersinia </it>species</p>
            </caption>
            <text>
               <p>The top level directory consists of a directory called Additional_cluster_files and 5010 directories, one for each multi-protein cluster family. (This top level directory has been split into three data files for uploading purposes (Additional files <supplr sid="S15">15</supplr>, <supplr sid="S16">16</supplr>, <supplr sid="S17">17</supplr>).) Within the directory are the following files: PGL1_unique_<it>Yersinia</it>_unclustered.out - list of all protein singletons that MCL did not group into a cluster (see Materials and Methods); PGL1_<it>Yersinia</it>_unique_locus_tags.txt - names of the 11 locus tag prefixes used for each genome; PGL1_unique_<it>Yersinia</it>.gff - mapping each <it>Yersinia </it>protein to a cluster in tab delimited GFF; PGL1_unique_<it>Yersinia</it>.sigfile - list of the longest protein in each cluster; PGL1_unique_<it>Yersinia</it>.summary - summary table of features of each of the clusters; PGL1_unique_<it>Yersinia</it>.table - summary table of each protein in the clusters. Within each cluster directory are the following files, where 'x' is the cluster name: PGL1_unique_<it>Yersinia</it>-x.faa - multifasta file of the proteins in the cluster; PGL1_unique_<it>Yersinia</it>-x.summary - summary of the properties of the proteins; PGL1_unique_<it>Yersinia</it>-x.matches - blast matches between the proteins of the cluster; PGL1_unique_<it>Yersinia</it>-x.muscle.fasta - muscle alignment of the proteins; PGL1_unique_<it>Yersinia</it>-x.muscle.fasta.gblo - gblocks output of muscle alignment (that is, auto-trimmed alignment); PGL1_unique_<it>Yersinia</it>-x.muscle.fasta.gblo.htm - as above in html format; PGL1_unique_<it>Yersinia</it>-x.muscle.tree - treefile from muscle alignment; PGL1_unique_<it>Yersinia</it>-x.sif - matches between proteins in simple interaction format for display on graphing software.</p>
            </text>
            <file name="gb-2010-11-1-r1-S15.zip">
   <p>Click here for file</p>
</file>
         </suppl>
         <suppl id="S16">
            <title>
               <p>Additional file 16</p>
            </title>
            <caption>
               <p>Output of the cluster analysis of the 11 <it>Yersinia </it>species</p>
            </caption>
            <text>
               <p>The top level directory consists of a directory called Additional_cluster_files and 5010 directories, one for each multi-protein cluster family. (This top level directory has been split into three data files for uploading purposes (Additional files <supplr sid="S15">15</supplr>, <supplr sid="S16">16</supplr>, <supplr sid="S17">17</supplr>.) Within the directory are the following files: PGL1_unique_<it>Yersinia</it>_unclustered.out - list of all protein singletons that MCL did not group into a cluster (see Materials and Methods); PGL1_<it>Yersinia</it>_unique_locus_tags.txt - names of the 11 locus tag prefixes used for each genome; PGL1_unique_<it>Yersinia</it>.gff - mapping each <it>Yersinia </it>protein to a cluster in tab delimited GFF; PGL1_unique_<it>Yersinia</it>.sigfile - list of the longest protein in each cluster; PGL1_unique_<it>Yersinia</it>.summary - summary table of features of each of the clusters; PGL1_unique_<it>Yersinia</it>.table - summary table of each protein in the clusters. Within each cluster directory are the following files, where 'x' is the cluster name: PGL1_unique_<it>Yersinia</it>-x.faa - multifasta file of the proteins in the cluster; PGL1_unique_<it>Yersinia</it>-x.summary - summary of the properties of the proteins; PGL1_unique_<it>Yersinia</it>-x.matches - blast matches between the proteins of the cluster; PGL1_unique_<it>Yersinia</it>-x.muscle.fasta - muscle alignment of the proteins; PGL1_unique_<it>Yersinia</it>-x.muscle.fasta.gblo - gblocks output of muscle alignment (that is, auto-trimmed alignment); PGL1_unique_<it>Yersinia</it>-x.muscle.fasta.gblo.htm - as above in html format; PGL1_unique_<it>Yersinia</it>-x.muscle.tree - treefile from muscle alignment; PGL1_unique_<it>Yersinia</it>-x.sif - matches between proteins in simple interaction format for display on graphing software.</p>
            </text>
            <file name="gb-2010-11-1-r1-S16.zip">
   <p>Click here for file</p>
</file>
         </suppl>
         <suppl id="S17">
            <title>
               <p>Additional file 17</p>
            </title>
            <caption>
               <p>Output of the cluster analysis of the 11 <it>Yersinia </it>species</p>
            </caption>
            <text>
               <p>The top level directory consists of a directory called Additional_cluster_files and 5010 directories, one for each multi-protein cluster family. (This top level directory has been split into three data files for uploading purposes (Additional files <supplr sid="S15">15</supplr>, <supplr sid="S16">16</supplr>, <supplr sid="S17">17</supplr>.) Within the directory are the following files: PGL1_unique_<it>Yersinia</it>_unclustered.out - list of all protein singletons that MCL did not group into a cluster (see Materials and Methods); PGL1_<it>Yersinia</it>_unique_locus_tags.txt - names of the 11 locus tag prefixes used for each genome; PGL1_unique_<it>Yersinia</it>.gff - mapping each <it>Yersinia </it>protein to a cluster in tab delimited GFF; PGL1_unique_<it>Yersinia</it>.sigfile - list of the longest protein in each cluster; PGL1_unique_<it>Yersinia</it>.summary - summary table of features of each of the clusters; PGL1_unique_<it>Yersinia</it>.table - summary table of each protein in the clusters. Within each cluster directory are the following files, where 'x' is the cluster name: PGL1_unique_<it>Yersinia</it>-x.faa - multifasta file of the proteins in the cluster; PGL1_unique_<it>Yersinia</it>-x.summary - summary of the properties of the proteins; PGL1_unique_<it>Yersinia</it>-x.matches - blast matches between the proteins of the cluster; PGL1_unique_<it>Yersinia</it>-x.muscle.fasta - muscle alignment of the proteins; PGL1_unique_<it>Yersinia</it>-x.muscle.fasta.gblo - gblocks output of muscle alignment (that is, auto-trimmed alignment); PGL1_unique_<it>Yersinia</it>-x.muscle.fasta.gblo.htm - as above in html format; PGL1_unique_<it>Yersinia</it>-x.muscle.tree - treefile from muscle alignment; PGL1_unique_<it>Yersinia</it>-x.sif - matches between proteins in simple interaction format for display on graphing software.</p>
            </text>
            <file name="gb-2010-11-1-r1-S17.zip">
   <p>Click here for file</p>
</file>
         </suppl>
         <suppl id="S18">
            <title>
               <p>Additional file 18</p>
            </title>
            <caption>
               <p>Complete protein sets for the 11 species of <it>Yersinia</it></p>
            </caption>
            <text>
               <p>Complete protein sets for the 11 species of <it>Yersinia</it>.</p>
            </text>
            <file name="gb-2010-11-1-r1-S18.zip">
   <p>Click here for file</p>
</file>
         </suppl>
         <suppl id="S19">
            <title>
               <p>Additional file 19</p>
            </title>
            <caption>
               <p>Inferred evolutionary trees reconstructed using PHYLIP <abbrgrp><abbr bid="B51">51</abbr></abbrgrp> of the 11 <it>Yersinia </it>species proteomes based on parsimony</p>
            </caption>
            <text>
               <p>To evaluate node support, a majority rule-consensus tree of 1,000 bootstrap replicates was computed. <it>E. coli </it>was used as an outgroup species.</p>
            </text>
            <file name="gb-2010-11-1-r1-S19.pdf">
   <p>Click here for file</p>
</file>
         </suppl>
         <suppl id="S20">
            <title>
               <p>Additional file 20</p>
            </title>
            <caption>
               <p>Inferred evolutionary trees reconstructed using PHYLIP <abbrgrp><abbr bid="B51">51</abbr></abbrgrp> of the 11 <it>Yersinia </it>species proteomes based on maximum likelihood</p>
            </caption>
            <text>
               <p>To evaluate node support, a majority rule-consensus tree of 1,000 bootstrap replicates was computed. <it>E. coli </it>was used as an outgroup species.</p>
            </text>
            <file name="gb-2010-11-1-r1-S20.pdf">
   <p>Click here for file</p>
</file>
         </suppl>
            <p>Clustering the significant Cluster of Orthologous Groups (COG) hits <abbrgrp><abbr bid="B52">52</abbr></abbrgrp> for each genome hierarchically (Figure <figr fid="F2">2</figr>) yielded a similar pattern for the three basic clades. The overall composition of the COG matches in each genome, as measured by the proportion of the numbers in each COG supercategory, was similar throughout the genus, with the notable exceptions of the high percentage of group L COGs in <it>Y. pestis </it>due to the expansion of IS recombinases and the relatively low number of group G (sugar metabolism) COGs in <it>Y. ruckeri </it>(Figure <figr fid="F2">2</figr>).</p>
            <fig id="F2"><title><p>Figure 2</p></title><caption><p>Comparison of major COG groups in <it>Yersinia </it>genomes</p></caption><text>
   <p><b>Comparison of major COG groups in <it>Yersinia </it>genomes</b>. Bars represent the number of proteins assigned to COG superfamilies <abbrgrp><abbr bid="B52">52</abbr></abbrgrp> for each genome, based on matches to the Conserved Domain Database <abbrgrp><abbr bid="B95">95</abbr></abbrgrp> database with an E-value threshold &lt;10<sup>-10</sup>. The COG groups are: U, intracellular trafficking; G, carbohydrate transport and metabolism; R, general function prediction; I, lipid transport and metabolism; D, cell cycle control; H, coenzyme transport and metabolism; B, chromatin structure; P, inorganic ion transport and metabolism; W, extracellular structures; O, post-translational modification; J, translation; A, RNA processing and editing; L, replication, recombination and repair; C, energy production; M, cell wall/membrane biogenesis; Q, secondary metabolite biosynthesis; Z, cytoskeleton; V, defense mechanisms; E, amino acid transport and metabolism; K, transcription; N, cell motility; T, signal transduction; F, nucleotide transport; S, function unknown.</p>
</text><graphic file="gb-2010-11-1-r1-2"/></fig>
         </sec>
         <sec>
            <st>
               <p>Shared protein clusters in pathogenic <it>Yersinia</it>: yersiniabactin biosynthesis is the key chromosomal function specific to high virulence in humans</p>
            </st>
            <p>The <it>Yersinia </it>proteomes were investigated for common clusters in the three high virulence species missing from the low human virulence genomes (Figure <figr fid="F3">3</figr>). Because of the close evolutionary relationship of the 'enterocolictica' clade strains, the number of unique protein clusters in <it>Y. enterocolitica </it>was reduced to a greater degree than the more phylogentically isolated <it>Y. pestis </it>and <it>Y. pseudotuberculosis</it>. Many of the same genome islands identified as recent horizontal acquisition by <it>Y. pestis </it>and/or <it>Y. pseudotuberculosis </it><abbrgrp><abbr bid="B9">9</abbr><abbr bid="B13">13</abbr><abbr bid="B15">15</abbr></abbrgrp> were not present in any of the newly sequenced genomes. However, some genes, interesting from the perspective of the host specificity of the <it>Y. pestis</it>/<it>Y. pseutoberculosis </it>ancestor, were detected in other <it>Yersinia </it>species for the first time. These included orthologs of YPO3720/YPO3721, a hemolysin and activator protein in <it>Y. intermedia</it>, <it>Y. bercovieri </it>and <it>Y. fredricksenii</it>; YPO0599, a heme utilization protein also found in <it>Y. intermedia</it>; and YPO0399, an enhancin metalloprotease that had an ortholog in <it>Y. kristensenii </it>(ykris0001_41250). Enhancin was originally identified as a factor promoting baculovirus infection of gypsy moth midgut by degradation of mucin <abbrgrp><abbr bid="B53">53</abbr></abbrgrp>. Other loci in <it>Y. pestis</it>/<it>Y. pseudotuberculosis </it>linked with insect infection, the TccC and TcABC toxin clusters <abbrgrp><abbr bid="B54">54</abbr></abbrgrp>, were also found in <it>Y. mollaretti</it>. In <it>Y. mollaretti </it>the Tca and Tcc proteins show about 90% sequence identity to <it>Y. pestis</it>/<it>Y. pseudotuberculsis </it>and share identical flanking chromosomal locations. Further work will need to be undertaken to resolve whether the insertion of the toxin genes in <it>Y. mollaretti </it>is an independent horizontal transfer event or occurred prior to divergence of the species.</p>
            <fig id="F3"><title><p>Figure 3</p></title><caption><p>Distribution of protein clusters across <it>Y. enterocolitica </it>8081, <it>Y. pestis </it>CO92, and <it>Y. pseudotuberculosis </it>IP32953</p></caption><text>
   <p><b>Distribution of protein clusters across <it>Y. enterocolitica </it>8081, <it>Y. pestis </it>CO92, and <it>Y. pseudotuberculosis </it>IP32953</b>. <b>(a) </b>The Venn diagram shows the number of protein clusters unique or shared between the two other high virulence <it>Yersinia </it>species (see Materials and methods). <b>(b) </b>The number of shared and unique clusters that do not contain a single member of the eight low human virulence genomes sequenced in this study.</p>
</text><graphic file="gb-2010-11-1-r1-3"/></fig>
            <p>After comparison of the new low virulence genomes, the number of protein clusters shared by <it>Y. enterocolitica </it>and the other two pathogens was reduced to 12 and 13 for <it>Y. pseudotuberculosis </it>and <it>Y. pestis</it>, respectively (Figure <figr fid="F3">3</figr>). The remaining shared proteins were either identified as phage-related or of unknown role, providing few clues to possible functions that might define distinct pathogenic niches. Performing a similar analysis strategy between others genome of the 'enterocolitica' clade and <it>Y. pestis </it>or <it>Y. pseudotuberculosis </it>gave a similar result in terms of numbers and types of shared protein clusters.</p>
            <p>Only sixteen clusters of chromosomal proteins were found to be common to all three high-virulence species but absent from all eight non-pathogens (Figure <figr fid="F3">3</figr>). Eleven of these are components of the yersiniabactin biosynthesis operon (Additional file <supplr sid="S21">21</supplr>), further highlighting the critical importance of this iron binding siderophore for invasive disease. The other proteins are generally small proteins that are likely included because they fall in unassembled regions of the eight draft genomes. One other small island of three proteins constituting a multi-drug efflux pump (YE0443 to YE0445) was common to the high-virulence species but missing from the eight draft low-virulence species.</p>
		<suppl id="S21">
            <title>
               <p>Additional file 21</p>
            </title>
            <caption>
               <p>Twenty proteins conserved in pathogenic strains but missing from the non-pathogen set</p>
            </caption>
            <text>
               <p>A curve showing the rate of decline in number of this set as more non-pathogen genomes are added is also included.</p>
            </text>
            <file name="gb-2010-11-1-r1-S21.doc">
   <p>Click here for file</p>
</file>
         </suppl>	
         </sec>
         <sec>
            <st>
               <p>Variable regions of <it>Y. enterocolitica </it>clade genomes</p>
            </st>
            <p>The basic metabolic similarities of <it>Y. enterocolitica </it>and the seven species on the main branch of the <it>Yersinia </it>genus phylogenetic tree are further illustrated in Figure <figr fid="F4">4</figr>, where the best protein matches against each <it>Y. enterocolitica </it>8081 gene product <abbrgrp><abbr bid="B15">15</abbr></abbrgrp> are plotted against a circular genome map. Very few genes exclusive to <it>Y. enterocolitica </it>8081 were found outside of prophage regions, which is a typical result when groups of closely related bacterial genomes are compared <abbrgrp><abbr bid="B55">55</abbr></abbrgrp>. One of the largest islands found in <it>Y. enterocolitica </it>8081 was the 66-kb <it>Y. pseudotuberculosis </it>adhesion pathogenicity island (YAPI<sub>ye</sub>) <abbrgrp><abbr bid="B15">15</abbr><abbr bid="B56">56</abbr><abbr bid="B57">57</abbr></abbrgrp>, a unique feature of biotype 1B strains. YAPI<sub>ye</sub>, containing a type IV pilus gene cluster and other putative virulence determinants, such as arsenic resistance, is similar to a 99-kb YAPI<sub>pst </sub>that is found in several other serotypes of <it>Y. pseudotuberculosis </it><abbrgrp><abbr bid="B14">14</abbr><abbr bid="B57">57</abbr></abbrgrp> but is missing in <it>Y. pestis </it>and the serotype I <it>Y. pseudotuberculosis </it>strain IP32953 <abbrgrp><abbr bid="B14">14</abbr></abbrgrp>. A model has been proposed for the acquisition of YAPI in a common ancestor of <it>Y. pseudotuberculosis </it>and <it>Y. enterocolitica </it>and subsequent degradation to various degrees within the <it>Y. pseudotuberculosis </it>clade. However, the complete absence of YAPI from any of the seven species in the <it>Y. enterocolitica </it>branch (Figure <figr fid="F4">4</figr>), as well as from most strains of <it>Y. enterocolitica </it><abbrgrp><abbr bid="B15">15</abbr></abbrgrp>, argues against an ancient acquisition of YAPI, but instead suggests the recent independent acquisition of related islands by both <it>Y. enterocolitica </it>biogroup 1B and <it>Y. pseudotuberculosis</it>.</p>
            <fig id="F4"><title><p>Figure 4</p></title><caption><p>Protein-based comparison of <it>Y. enterocolitica </it>8081 to the <it>Yersinia </it>genus</p></caption><text>
   <p><b>Protein-based comparison of <it>Y. enterocolitica </it>8081 to the <it>Yersinia </it>genus</b>. The map represents the blast score ratio (BSR) <abbrgrp><abbr bid="B98">98</abbr><abbr bid="B99">99</abbr></abbrgrp> to the protein encoded by <it>Y. enterocolitica </it><abbrgrp><abbr bid="B15">15</abbr></abbrgrp>. Blue indicates a BSR >0.70 (strong match); cyan 0.69 to 0.4 (intermediate); green &lt;0.4 (weak). Red and pink outer circles are locations of the <it>Y. enterocolitica </it>genes on the + and - strands. The genomes are ordered from outside to inside based on the greatest overall similarity to <it>Y. enterocolitica</it>: <it>Y. kristensenii</it>, <it>Y. frederiksenii</it>, <it>Y. mollaretii</it>, <it>Y. intermedia</it>, <it>Y. bercovieri, Y. aldovae</it>, <it>Y. rohdei</it>, <it>Y. ruckeri</it>, <it>Y. pseudotuberculosis</it>, and <it>Y. pestis</it>. The black bars on the outside refer to genome islands in <it>Y. enterocolitica </it>identified by Thomson <it>et al</it>. <abbrgrp><abbr bid="B15">15</abbr></abbrgrp>.</p>
</text><graphic file="gb-2010-11-1-r1-4"/></fig>
            <p>Many genes previously thought to be unique to <it>Y. enterocolitica</it> in general and biotype 1B in particular turned out to have orthologs in the low human virulence species sequenced in this study. These included several putative biotype 1B-specific genes identified by microarray-based screening <abbrgrp><abbr bid="B58">58</abbr></abbrgrp>, including YE0344 HylD hemophore (yinte0001_41550 has 78% nucleotide identity), YE4052 metalloprotease (yinte0001_36030 has 95% nucleotide identity), and YE4088, a two-component sensor kinase, which had orthologs in all species. Large portions of the biogroup 1B-specific island containing the Yts1 type II secretion system were found in <it>Y. ruckeri</it>, <it>Y. mollaretii</it>, and <it>Y. aldovae</it>. <it>Y. aldovae</it> and <it>Y. mollaretii</it> also had islands containing <it>ysa</it> type three secretion systems (TTSS) with 75 to 85% nucleotide identity to the homolog in <it>Y. enterocolitica</it> 1B. The <it>ysa</it>genes are a chromosomal cluster <abbrgrp><abbr bid="B9">9</abbr><abbr bid="B13">13</abbr><abbr bid="B15">15</abbr></abbrgrp> that in <it>Y. enterocolitica</it>, at least, appears to play a role in virulence <abbrgrp><abbr bid="B59">59</abbr></abbrgrp>. The <it>Y. enterocolitica ysa </it>genes are found in the plasticity zone (Figure <figr fid="F4">4</figr>) and have very low similarity to the <it>Y. pestis </it>and <it>Y. pseudotuberculosis ysa </it>genes (which are more similar to the <it>Salmonella </it>SPI-2 island <abbrgrp><abbr bid="B60">60</abbr><abbr bid="B61">61</abbr></abbrgrp>) and are found between orthologs of YPO0254 and YPO0274 <abbrgrp><abbr bid="B9">9</abbr></abbrgrp>. Species within the <it>Yersinia </it>genus had either the <it>Y. enterocolitica </it>type of <it>ysa </it>TTSS locus or the <it>Y. pestis</it>/SPI-2 type (with the exception of <it>Y. aldovae</it>, which has both; Additional file <supplr sid="S22">22</supplr>). This suggested the exchange of chromosomal TTSS genes within <it>Yersinia</it>.</p>
			<suppl id="S22">
            <title>
               <p>Additional file 22</p>
            </title>
            <caption>
               <p>Phylogeny of TTSS component YscN in <it>Yersinia </it>and other enterobacteria species</p>
            </caption>
            <text>
               <p>Phylogeny of TTSS component YscN in <it>Yersinia </it>and other enterobacteria species.</p>
            </text>
            <file name="gb-2010-11-1-r1-S22.doc">
   <p>Click here for file</p>
</file>
         </suppl>
            <p>The modular nature of the islands found in the <it>Y. enterocolitica </it>genome was demonstrated further by two examples gleaned from comparison with the evolutionarily closest low human virulence genome, <it>Y. kristensenii </it>ATCC 33638T (Figure <figr fid="F1">1</figr>). The YGI-3 island <abbrgrp><abbr bid="B15">15</abbr></abbrgrp> in <it>Y. enterocolitica </it>8081 is a degraded integrated plasmid; at the same chromosomal locus in <it>Y. kristensenii </it>ATCC 33638T a prophage was found, suggesting that the YGI-3 location may be a recombinational hotspot. Another <it>Y. enterocolitica </it>8081 island, YGI-1, encodes a 'tight adherence' (<it>tad</it>) locus responsible for non-specific surface binding. <it>Y. kristensenii </it>ATCC 33638T had an identical 13 gene <it>tad </it>locus in the same position, but the nucleotide sequence identity of the region to <it>Y. enterocolitica </it>8081 was uniformly lower than that found for the rest of the genome, suggesting there had been either a gene conversion event replacing the <it>tad </it>locus with a set of new alleles in the recent history of <it>Y. kristensenii </it>or <it>Y. enterocolitica </it>or the locus was under very high positive selective pressure.</p>
         </sec>
         <sec>
            <st>
               <p>Niche-specific metabolic adaptations in the <it>Yersinia </it>genus</p>
            </st>
            <p>Comparison of the <it>Y. enterocolitica </it>genome to <it>Y. pestis </it>and <it>Y. pseudotuberculosis </it>revealed some potentially significant metabolic differences that may account for varying tropisms in gastric infections <abbrgrp><abbr bid="B62">62</abbr></abbrgrp>. <it>Y. enterocolitica </it>8081 alone contained entire gene clusters for cobalamin (vitamin B12) biosynthesis (<it>cbi</it>), 1,2-propanediol utilization (<it>pdu</it>), and tetrathionate respiration (<it>ttr</it>). In <it>Y. enterocolitica </it>and <it>Salmonella typhimurium </it><abbrgrp><abbr bid="B63">63</abbr><abbr bid="B64">64</abbr></abbrgrp>, vitamin B12 is produced under anaerobic conditions where it is used as a cofactor in 1,2-propanediol degradation, with tetrathionate serving as an electron acceptor. This study showed the genes for this pathway to be a general feature of species in the 'enterocolitica' branch of the <it>Yersinia </it>genus (with the caveat that some portions are missing in some species; for example, <it>Y. rohdei </it>is missing the <it>pdu </it>cluster (Table <tblr tid="T5">5</tblr>). Additionally, <it>Y. intermedia</it>, <it>Y. bercovieri</it>, and <it>Y. mollaretii </it>contained gene clusters encoding degradation of the membrane lipid constituent ethanolamine. Ethanolamine metabolism under anaerobic conditions also requires the B12 cofactor. <it>Y. intermedia </it>contained the full 17-gene cluster reported in <it>S. typhimurium </it><abbrgrp><abbr bid="B65">65</abbr></abbrgrp>, including structural components of the carboxysome organelle. Another discovery from the <it>Y. enterocolitica </it>genome analysis was the presence of two compact hydrogenase gene clusters, Hyd-2 and Hyd-4 <abbrgrp><abbr bid="B15">15</abbr></abbrgrp>. Hydrogen released from fermentation by intestinal microflora is imputed to be an important energy source for enteric gut pathogens <abbrgrp><abbr bid="B66">66</abbr></abbrgrp>. Both gene clusters are conserved across all the other seven enterocolitica-branch species, but are missing from <it>Y. pestis </it>and <it>Y. pseudotuberculosis</it>. <it>Y. ruckeri </it>contained a single [NiFe]-containing hydrogenase complex.</p>
            <tbl id="T5"><title><p>Table 5</p></title><caption><p>Key niche-specific genes in <it>Yersinia</it></p></caption><tblbdy cols="10">
      <r>
         <c>
            <p/>
         </c>
         <c ca="left">
            <p>
               <b>
                  <it>cbi</it>
               </b>
            </p>
         </c>
         <c ca="left">
            <p>
               <b>
                  <it>pdu</it>
               </b>
            </p>
         </c>
         <c ca="left">
            <p>
               <b>
                  <it>ttr</it>
               </b>
            </p>
         </c>
         <c ca="left">
            <p>
               <b>
                  <it>eut</it>
               </b>
            </p>
         </c>
         <c ca="left">
            <p>
               <b>
                  <it>hyd-2</it>
               </b>
            </p>
         </c>
         <c ca="left">
            <p>
               <b>
                  <it>hyd-4</it>
               </b>
            </p>
         </c>
         <c ca="left">
            <p>
               <b>
                  <it>ure</it>
               </b>
            </p>
         </c>
         <c ca="left">
            <p>
               <b>
                  <it>mtn</it>
               </b>
            </p>
         </c>
         <c ca="left">
            <p>
               <b>
                  <it>opg</it>
               </b>
            </p>
         </c>
      </r>
      <r>
         <c cspan="10">
            <hr/>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. enterocolitica</it>
            </p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. aldovae</it>
            </p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. bercovieri</it>
            </p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>
               <it>eutABC</it>
            </p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. frederiksenii</it>
            </p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. intermedia</it>
            </p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>
               <it>eutSPQTDMNEJGHABCLKR</it>
            </p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. kristensenii</it>
            </p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. mollaretii</it>
            </p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>
               <it>eutABC</it>
            </p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. rohdei</it>
            </p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. ruckeri</it>
            </p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>+/- hyfABCGHINfdhF</p>
         </c>
         <c ca="left">
            <p>+/- (hyaD, hypEDB)</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. pseudotuberculosis</it>
            </p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
         <c ca="left">
            <p>+</p>
         </c>
      </r>
      <r>
         <c ca="left">
            <p>
               <it>Y. pestis</it>
            </p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>+/-</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
         <c ca="left">
            <p>-</p>
         </c>
      </r>
   </tblbdy><tblfn>
      <p>Abbreviations: <it>cbi</it>, cobalamin (vitamin B12) biosynthesis; <it>pdu</it>, 1,2-propanediol utilization; <it>ttr</it>, tetrathionate respiration; <it>eut</it>, ethanolamine degradation; <it>hyd-2 </it>and <it>hyd-4</it>, hydrogenases 2 and 4, respectively; <it>ure</it>, urease; <it>mtn</it>, methionine salvage pathway; <it>opg</it>, osmoprotectant (synthesis of periplasmic branched glucans).</p>
   </tblfn></tbl>
            <p><it>Y. ruckeri</it>, the most evolutionarily distant member of the genus (Figure <figr fid="F1">1</figr>) with the smallest genome (3.7 Mb), had several features that were distinctive from its cogeners. The <it>Y. ruckeri </it>O-antigen operon contained a <it>neuB </it>sialic acid synthase gene, therefore the bacterium was predicted to produce a sialated outer surface structure. Among the common <it>Yersinia </it>genes that are missing only in <it>Y. ruckeri </it>were those for xylose utilization and urease activity, consistent with phenotypes that have long been known in clinical microbiology <abbrgrp><abbr bid="B67">67</abbr></abbrgrp> (Table <tblr tid="T3">3</tblr>). Surprisingly, we discovered that <it>Y. ruckeri </it>was also missing the <it>mtnKADCBEU </it>gene cluster that comprises the majority of the methionine salvage pathway <abbrgrp><abbr bid="B68">68</abbr></abbrgrp> found in most other Yersiniae. These genes have also been deleted from <it>Y. pestis</it>, but as with <it>Y. ruckeri</it>, the <it>mtnN </it>(methylthioadenosine nucleosidase) is maintained. The loss of these genes in <it>Y. pestis </it>has been interpreted as a consequence of adaptation to an obligate host-dwelling lifecycle, where the availability of the sulfur-containing amino acids is not a nutritional limitation <abbrgrp><abbr bid="B15">15</abbr></abbrgrp>.</p>
         </sec>
      </sec>
      <sec>
         <st>
            <p>Discussion</p>
         </st>
         <p>Whole-genome shotgun sequencing by high-throughput bead-based pyrosequencing has proved remarkably useful for the large-scale sequencing of closely related bacteria <abbrgrp><abbr bid="B49">49</abbr><abbr bid="B69">69</abbr><abbr bid="B70">70</abbr><abbr bid="B71">71</abbr><abbr bid="B72">72</abbr><abbr bid="B73">73</abbr><abbr bid="B74">74</abbr></abbrgrp>. High-quality <it>de novo </it>assemblies can be obtained with relatively few errors and gaps when the sequence read coverage redundancy is 15-fold or greater. Closing all the gaps in each genome sequence is time-consuming and costly; therefore, in the near future there will be an excess of draft bacterial sequences versus closed genomes in public databases. Our analysis strategy here melds both draft and complete genomes using consistent automated annotation that is scalable to encompass potentially much larger datasets. High quality draft sequencing is likely to shortly supersede comparative genome hybridization using microarrays <abbrgrp><abbr bid="B25">25</abbr><abbr bid="B58">58</abbr><abbr bid="B75">75</abbr><abbr bid="B76">76</abbr></abbrgrp> as the most popular strategy for genome-wide bacterial comparisons. Genome sequence datasets can be used to shed light on the novel functions in close relatives that may have been lost in the pathogen of interest, as well as orthologs in genomes that fall below the threshold for hybridization-based detection. The problems of using microarrays for comparisons of more diverse bacterial taxa are illustrated in a study of the <it>Yersinia </it>genus, using many of the strains sequenced in this work, where the estimated number of core genes was found to be only 292 <abbrgrp><abbr bid="B25">25</abbr></abbrgrp>.</p>
         <p>We cannot claim complete coverage of all the type strains of the <it>Yersinia </it>genus, as three new species have been created <abbrgrp><abbr bid="B77">77</abbr><abbr bid="B78">78</abbr><abbr bid="B79">79</abbr></abbrgrp> since our work began. Nonetheless, from this extensive genomic survey we have attempted to categorize the features that define <it>Yersinia</it>. The core of about 2,500 proteins present in all 11 species is not a subset of any other enterobacterial genome. Species of the <it>Y. enterocolitica </it>clade (Figure <figr fid="F1">1</figr>) have overall a similar array of protein functions and contain a number of conserved gene clusters (cobalamin, hydrogenases, ureases, and so on) found in other bacteria (<it>Helicobacter</it>, <it>Campylobacte</it>r, <it>Salmonella</it>, <it>Escherichia coli</it>) that colonize the mammalian gut. <it>Y. pestis </it>has lost many of these genes by deletion or disruption since its split from the enteric pathogen <it>Y. pseudotuberculosis </it>and adoption of an insect vector-mediated pathogenicity mode. The smaller <it>Y. ruckeri </it>chromosome does not appear to result from recent reductive evolution (as is the case of <it>Y. pestis</it>), evidenced by the relatively low number of frameshifts and pseudogenes, and the normal amount of repetitive contigs in the <it>newbler </it>genome assembly. Like <it>Y. pestis</it>, <it>Y. ruckeri </it>lacks urease, methionine salvage genes, and B12-related metabolism. The prevailing consensus is that the pathway of transmission of red mouth disease in fish is gastrointestinal yet the similarities of <it>Y. ruckeri </it>genome reduction to <it>Y. pestis </it>hint at an alternative mode of infection for <it>Y. ruckeri</it>.</p>
         <p>This comparative genomic study reaffirms that the distinguishing features of the high-level mammalian pathogens is the acquisition of a particular set of mobile elements: HPI, the pYV, pMT1 and pPCP plasmids, and the YADI island. However, the eight species sequenced in this study believed to have either low or zero potential for human infection, contain numerous, apparently horizontally transferred genes that would be considered putative virulence determinants if discovered in the genome of a more serious pathogen. Two examples are yaldo0001_40900 (bile salt hydrolase) and yfred0001_36480, an ortholog of the TibA adhesin of enterotoxigenic <it>E. coli</it>. Bile salt hydrolase in pathogenic <it>Brucella abortus </it>has been shown to enhance bile resistance during oral mouse infections <abbrgrp><abbr bid="B80">80</abbr></abbrgrp> and the TibA adhesin forms a biofilm that mediates human cell invasion <abbrgrp><abbr bid="B81">81</abbr></abbrgrp>. The low-virulence species contain a similar (and in some cases greater) number of matches to known drug resistance mechanisms that have been curated in the Antibiotic Resistance Genes Database <abbrgrp><abbr bid="B82">82</abbr></abbrgrp> (Additional file <supplr sid="S23">23</supplr>, <abbrgrp><abbr bid="B83">83</abbr></abbrgrp>). Adding DNA, stirring and reducing <abbrgrp><abbr bid="B17">17</abbr></abbrgrp> is, therefore, the general recipe for <it>Yersinia </it>genome evolution rather than a formula specific to pathogens. Comparative genomic studies such as these can be used to enhance our ability to rapidly assess the virulence potential of a genome sequence of an emerging pathogen and we plan to continue to build more extensive databases of non-pathogenic <it>Yersinia </it>genomes that will allow us to draw conclusions with more statistical power possible than just 11 representative species.</p>
		<suppl id="S23">
            <title>
               <p>Additional file 23</p>
            </title>
            <caption>
               <p>Putative antibiotic resistance genes in the <it>Yersinia </it>genus determined using the Antibiotic Resistance Genes Database</p>
            </caption>
            <text>
               <p>Putative antibiotic resistance genes in the <it>Yersinia </it>genus determined using the Antibiotic Resistance Genes Database <abbrgrp><abbr bid="B45">45</abbr></abbrgrp>.</p>
            </text>
            <file name="gb-2010-11-1-r1-S23.xls">
   <p>Click here for file</p>
</file>
         </suppl> 
      </sec>
      <sec>
         <st>
            <p>Conclusions</p>
         </st>
         <p>Genomes of the 11 <it>Yersinia </it>species studied range in estimated size from 3.7 to 4.8 Mb. The nucleotide diversity (&#928;) of the conserved backbone based on large collinear conserved blocks was calculated to be 0.27. There were no orthologs of genes and predicted proteins in the virulence-associated plasmids pYV, pMT1, and pPla, and the HPI of <it>Y. pestis </it>in the genomes of the type strains - eight non- or low-pathogenic <it>Yersinia </it>species</p>
         <p>Apart from functions encoded on the aforementioned plasmids, HPI and YAPI regions, only nine proteins detected as common to all three <it>Yersinia </it>pathogen species (<it>Y. pestis</it>, <it>Y. enterocolitica </it>and <it>Y. pseudotuberculosis</it>) were not found on at least one of the other eight species. Therefore, our study is in agreement with the hypothesis that genes acquired by recent horizontal transfer effectively define the members of the <it>Yersinia </it>genus virulent for humans.</p>
         <p>The core proteome of the 11 <it>Yersinia </it>species consists of approximately 2,500 proteins. <it>Yersinia </it>genomes had a similar global partition of protein functions, as measured by the distribution of COG families. Genome to genome variation in islands with genes encoding functions such as ureases, hydrogenases and B12 cofactor metabolite reactions may reflect adaptations to colonizing specific host habitats.</p>
         <p><it>Y. ruckeri</it>, a salmonid fish pathogen, is the earliest branching member of the genus and has the smallest genome (3.7 Mb). Like <it>Y. pestis</it>, <it>Y. ruckeri </it>lacks functional urease, methionine salvage genes, and B12-related metabolism. These losses may reflect adaptation to a lifestyle that does not include colonization of the mammalian gut.</p>
         <p>The absence of the YAPI island in any of the seven '<it>Y. enterocolitica </it>clade' genomes likely indicates that YAPI was acquired independently in <it>Y. enterocolitica </it>and <it>Y. pseudotuberculosis</it>.</p>
         <p>We identified 171 and 100 regions within the genomes of <it>Y. pestis </it>and <it>Y. enterocolitica</it>, respectively, that represented potential candidates for the design of nucleotide sequence-based assays for unique detection of each pathogen.</p>
      </sec>
      <sec>
         <st>
            <p>Materials and methods</p>
         </st>
         <sec>
            <st>
               <p>Bacterial strains</p>
            </st>
            <p>Type strains of the eight <it>Yersinia </it>species sequenced in this study (Table <tblr tid="T1">1</tblr>) were acquired from the American Type Culture Collection (ATCC) and propagated at 37&#176;C or 25&#176;C (<it>Y. ruckeri</it>) on Luria media. DNA for genome sequencing was prepared from overnight broth cultures propagated from single colonies streaked on a Luria agar plate using the Promega Wizard Maxiprep System (Promega, Madison, WI, USA).</p>
         </sec>
         <sec>
            <st>
               <p>Genome sequencing and assembly</p>
            </st>
            <p>Genomes were sequenced using the Genome Sequencer 20 Instrument (454 Life Sequencing Inc., Branford, CT) <abbrgrp><abbr bid="B34">34</abbr></abbrgrp>. Libraries for sequencing were prepared from 5 &#956;g of genomic DNA. The sequencing reads for each project were assembled <it>de novo </it>using the <it>newbler </it>program (version 01.51.02; 454 Life Sciences Inc).</p>
         </sec>
         <sec>
            <st>
               <p>Optical mapping</p>
            </st>
            <p>Optical maps <abbrgrp><abbr bid="B38">38</abbr></abbrgrp> for each genome using the restriction enzymes <it>Afl</it>II and <it>Nhe</it>I (<it>Y. aldovae </it>and <it>Y. kristensenii </it>only have maps for <it>Afl</it>II) were constructed by Opgen Inc. (Madison, WI). The <it>newbler </it>assemblies for each genome were scaffolded using the optical maps and the SOMA package <abbrgrp><abbr bid="B39">39</abbr></abbrgrp> (Additional file <supplr sid="S4">4</supplr>). Assemblies that did not align against the optical map were tested for high read coverage, unusual GC content, and good matches to plasmid-associated genes from the ACLAME database <abbrgrp><abbr bid="B84">84</abbr></abbrgrp> (BLAST E-value less than 10<sup>-20</sup>) to identify sequences that could potentially be part of an extrachromosomal element.</p>
         </sec>
         <sec>
            <st>
               <p>Detection of disrupted genes</p>
            </st>
            <p>We used two methods for detecting disrupted proteins used. In the first method clustered protein groups were used to adduce evidence for possible gene disruption events. The clusters were parsed for pairs of proteins that met the following criteria: both from the same genome; encoded by genes located on the same strand with less than 200 bp separating their frames; and total length of the combined genes was not greater than 120% of the longest gene in the cluster. The second method used was the FSFIND algorithm <abbrgrp><abbr bid="B85">85</abbr></abbrgrp> with a standard bacterial gene model to compare the accumulation of predicted frameshifts across different genomes.</p>
         </sec>
         <sec>
            <st>
               <p>Assembly validation</p>
            </st>
            <p>In order to rule out artifacts due to undocumented features of the <it>newbler </it>assemblies, new assemblies were generated for validation purposes by re-mapping all the shotgun reads to the sequence of the assembled contigs using AMOScmp <abbrgrp><abbr bid="B86">86</abbr></abbrgrp>. The resulting assembly was then subjected to analysis using the <it>amosvalidat</it>e package <abbrgrp><abbr bid="B37">37</abbr></abbrgrp>. The output of this program includes a list of genomic regions that contain inconsistencies highlighting possible misassemblies. The resulting regions were manually inspected to reduce the possibility of assembly errors. The regions flagged by the <it>amosvalidate </it>package are provided in GFF (general feature format), compatible with most genome browsers (Additional file <supplr sid="S3">3</supplr>).</p>
         </sec>
         <sec>
            <st>
               <p>Insertion sequences and <it>de novo </it>repeat finding</p>
            </st>
            <p>The presence of repeats is known to confound assembly programs and the <it>newbler </it>assembler is known to collapse high-fidelity repeat instances into a single contig. To account for the possibility of such misassemblies, we computed the copy number of contigs based on coverage statistics and used this information to correct our estimates for the abundance of classes of repeats (Additional file 3). To find known insertion sequences, the genomes were scanned for matches using the IS finder web service <abbrgrp><abbr bid="B40">40</abbr></abbrgrp> with a BLAST E-value threshold of 10<sup>-10 </sup>(matches to known repeat contigs were counted as multiple matches based on the coverage of the contig). In addition, we searched for common repeat sequences in the genome using the RepeatScout program <abbrgrp><abbr bid="B41">41</abbr></abbrgrp> after duplicating known repeat contigs. The repeats found in each genome were collected (64 sequences) and transformed into a non-redundant set of 44 sequences using the CD-HIT program <abbrgrp><abbr bid="B87">87</abbr></abbrgrp> (Additional file <supplr sid="S6">6</supplr>). The repeats found were then searched against all the genomes using BLAST with an E-value threshold of 10<sup>-10 </sup>to record matches. The resultant figures for repeat content are estimations that may be lower than the true number found in the genomes.</p>
         </sec>
         <sec>
            <st>
               <p>Finding unique DNA signatures in <it>Y. pestis </it>and <it>Y. enterocolitica</it></p>
            </st>
            <p>DNA signatures for the <it>Y. pestis </it>and the <it>Y. enterocolitica </it>genomes were identified using the Insignia pipeline <abbrgrp><abbr bid="B44">44</abbr></abbrgrp>. Signatures of 100 bp or longer were considered good candidates for the design of detection assays. These signatures were then compared with the genomes of the <it>Yersinia </it>strains sequenced during the current study using the MUMmer package <abbrgrp><abbr bid="B88">88</abbr></abbrgrp> with default parameters. Signatures that matched by more than 40 bp were deemed invalidated, as they would likely lead to false-positive results.</p>
         </sec>
         <sec>
            <st>
               <p>Automated annotation</p>
            </st>
            <p>We used DIYA <abbrgrp><abbr bid="B89">89</abbr></abbrgrp> for automated annotation, which is a pipeline for integrating bacterial analysis tools. Using DIYA, the assemblies generated by <it>newbler </it>were scaffolded based on the optical map, concatenated, and used as a template for the programs GLIMMER <abbrgrp><abbr bid="B90">90</abbr></abbrgrp>, tRNASCAN-SE <abbrgrp><abbr bid="B91">91</abbr></abbrgrp>, and RNAmmer <abbrgrp><abbr bid="B92">92</abbr></abbrgrp> for prediction of open reading frames and RNA genes, respectively. All predicted proteins encoded by each coding sequence were compared against a database of all proteins predicted from the canonical annotation of <it>Y. pestis </it>CO92 <abbrgrp><abbr bid="B9">9</abbr></abbrgrp> as a preliminary screen for potentially novel functions. The GenBank format files created from the eight genomes sequenced in this study were combined with other DIYA-annotated, published whole genomes to form a dataset for analysis. All proteins were searched against the UniRef50 database (July 2008) <abbrgrp><abbr bid="B93">93</abbr></abbrgrp> using BLASTP <abbrgrp><abbr bid="B94">94</abbr></abbrgrp> and against the Conserved Domain Database <abbrgrp><abbr bid="B95">95</abbr></abbrgrp> using RPSBLAST <abbrgrp><abbr bid="B96">96</abbr></abbrgrp> with an E-value threshold of 10<sup>-10 </sup>to record matches.</p>
         </sec>
         <sec>
            <st>
               <p>Database accession numbers</p>
            </st>
            <p>The annotated genome data were submitted to NCBI GenBank and the sequence data submitted to the NCBI Short Read Archive (SRA). The accession numbers are: <it>Y. rohdei</it>, ATCC_43380: [Genbank:<ext-link ext-link-id="ACCD00000000" ext-link-type="gen">ACCD00000000</ext-link>]/[SRA:SRA009766.1]; <it>Y. ruckeri </it>ATCC_29473: [Genbank:<ext-link ext-link-id="ACCC00000000" ext-link-type="gen">ACCC00000000</ext-link>]/[SRA:SRA009767.1]; <it>Y. aldovae </it>ATCC_35236: [Genbank:<ext-link ext-link-id="ACCB00000000" ext-link-type="gen">ACCB00000000</ext-link>]/[SRA:SRA009760.1]; <it>Y. kristensenii </it>ATCC_33638: [Genbank:<ext-link ext-link-id="ACCA00000000" ext-link-type="gen">ACCA00000000</ext-link>]/[SRA:SRA009764.1]; <it>Y. intermedia </it>ATCC_29909: [Genbank:<ext-link ext-link-id="AALF00000000" ext-link-type="gen">AALF00000000</ext-link>]/[SRA:SRA009763.1]; <it>Y. frederiksenii </it>ATCC_33641: [Genbank: <ext-link ext-link-id="AALE00000000" ext-link-type="gen">AALE00000000</ext-link>]/[SRA:SRA009762.1]; <it>Y. mollaretii </it>ATCC_43969: [Genbank:<ext-link ext-link-id="AALD00000000" ext-link-type="gen">AALD00000000</ext-link>]/[SRA:SRA009765.1]; <it>Y. bercovieri </it>ATCC_43970: [Genbank:<ext-link ext-link-id="AALC00000000" ext-link-type="gen">AALC00000000</ext-link>]/[SRA:SRA009761.1].</p>
         </sec>
         <sec>
            <st>
               <p>Whole-genome alignment using MAUVE</p>
            </st>
            <p><it>Yersinia </it>genomes were aligned using the standard MAUVE <abbrgrp><abbr bid="B46">46</abbr></abbrgrp> algorithm with default settings. A cutoff for 1,500 bp was set as the minimum LCB length. LCBs for each genome were extracted from the output of the program and concatenated. From the alignment nucleotide diversity was calculated by an in-house script using positions where there was a base in all 11 genomes. Because of the size of the dataset, the calculated value of &#928; is very robust in terms of sequence error. We calculated that 112,696 nucleotides of sequence in the concatenated core would have to be wrong to alter the estimation of <it>P </it>by &#177; 5% (Additional file <supplr sid="S24">24</supplr>). PHYLIP <abbrgrp><abbr bid="B51">51</abbr></abbrgrp> programs were used to build a consensus tree of the MAUVE alignment with bootstrapping 1,000 replicates. The underlying model for each replicate was Fitch-Margoliash. The final phylogeny was resolved according to the majority consensus rule.</p>
         <suppl id="S24">
            <title>
               <p>Additional file 24</p>
            </title>
            <caption>
               <p>Calculations for the estimation of &#928; from aligned <it>Yersinia </it>core genomes</p>
            </caption>
            <text>
               <p>Calculations for the estimation of &#928; from aligned <it>Yersinia </it>core genomes.</p>
            </text>
            <file name="gb-2010-11-1-r1-S24.doc">
   <p>Click here for file</p>
</file>
         </suppl>
		 </sec>
         <sec>
            <st>
               <p>Clustering protein orthologs</p>
            </st>
            <p>The complete predicted proteome from all genomes annotated in this study was searched against itself using BLASTP with default parameters. We removed short, spurious, and non-homologous hits by setting a bitscore/alignment length filtering threshold of 0.4 and minimum protein length of 30. Predicted proteins passing this filter were clustered into families based on these normalized distances using the MCL algorithm <abbrgrp><abbr bid="B48">48</abbr></abbrgrp> with an inflation parameter value of 4. These parameters were based on an investigation of clustering 12 completed <it>E. coli </it>genomes, which produced very similar results to a previous study <abbrgrp><abbr bid="B42">42</abbr></abbrgrp>.</p>
         </sec>
         <sec>
            <st>
               <p>Whole genome phylogenetic reconstruction</p>
            </st>
            <p>From the results of clustering analysis, 681 proteins were found that had exactly one member in each of the genomes and the length of each protein in the cluster was nearly identical. These protein sequences were aligned using ClustalW <abbrgrp><abbr bid="B50">50</abbr></abbrgrp>, and individual gene alignments were concatenated into a string of 170,940 amino acids for each genome. Uninformative characters were removed from the dataset using Gblocks <abbrgrp><abbr bid="B97">97</abbr></abbrgrp> and a phylogeny reconstructed with PHYLIP <abbrgrp><abbr bid="B51">51</abbr></abbrgrp> under a neighbor-joining model. To evaluate node support, a majority rule-consensus tree of 1,000 bootstrap replicates was computed.</p>
         </sec>
      </sec>
      <sec>
         <st>
            <p>Abbreviations</p>
         </st>
         <p>ATCC: American Type Culture Collection; COG: Cluster of Orthologous Groups; HPI: high-pathogenicity island; IS: insertion sequence; LCB: locally collinear block; SRA: Short Read Archive; TTSS: type III secretion system; YAPI: <it>Y. pseudotuberculosis </it>adhesion pathogenicity island.</p>
      </sec>
      <sec>
         <st>
            <p>Authors' contributions</p>
         </st>
         <p>TDR, MEZ, LD, and SS were involved in study design. AS, and AM were involved in materials. LD, MPKT, SL, and NNo were involved in 454 sequencing. SS, MPKT, and CC were involved in additional experiments. PEC, TDR, CC, MEZ, ACS, NN, MP, BT, and DDS were involved in data analysis. TDR, MP, and NN wrote the paper.</p>
      </sec>
   </bdy>
   <bm>
      <ack>
         <sec>
            <st>
               <p>Acknowledgements</p>
            </st>
            <p>We would like to thank Ayra Akmal, Kim Bishop-Lilly, Mike Cariaso, Brian Osborne, Bill Klimke, Tim Welch, Jennifer Tsai, Cheryl Timms Strauss and members of the 454 Service Center for their help and advice in completing this manuscript. This work was supported by grant TMTI0068_07_NM_T from the Joint Science and Technology Office for Chemical and Biological Defense (JSTO-CBD), Defense Threat Reduction Agency Initiative to TDR. The views expressed in this article are those of the authors and do not necessarily reflect the official policy or position of the US Department of the Navy, US Department of Defense, or the US Government. Some of the authors are employees of the US Government, and this work was prepared as part of their official duties. Title 17 USC &#167;105 provides that 'Copyright protection under this title is not available for any work of the United States Government.' Title 17 USC &#167;101 defines a US Government work as a work prepared by a military service member or employee of the US Government as part of that person's official duties.</p>
         </sec>
      </ack>
      <refgrp><bibl id="B1"><title><p>The Microbial Rosetta Stone Database: a compilation of global and emerging infectious microorganisms and bioterrorist threat agents.</p></title><aug><au><snm>Ecker</snm><fnm>DJ</fnm></au><au><snm>Sampath</snm><fnm>R</fnm></au><au><snm>Willett</snm><fnm>P</fnm></au><au><snm>Wyatt</snm><fnm>JR</fnm></au><au><snm>Samant</snm><fnm>V</fnm></au><au><snm>Massire</snm><fnm>C</fnm></au><au><snm>Hall</snm><fnm>TA</fnm></au><au><snm>Hari</snm><fnm>K</fnm></au><au><snm>McNeil</snm><fnm>JA</fnm></au><au><snm>Buchen-Osmond</snm><fnm>C</fnm></au><au><snm>Budowle</snm><fnm>B</fnm></au></aug><source>BMC Microbiol</source><pubdate>2005</pubdate><volume>5</volume><fpage>19</fpage><xrefbib><pubidlist><pubid idtype="doi">10.1186/1471-2180-5-19</pubid><pubid idtype="pmcid">1127111</pubid><pubid idtype="pmpid" link="fulltext">15850481</pubid></pubidlist></xrefbib></bibl><bibl id="B2"><title><p>Yersinia pestis, the cause of plague, is a recently emerged clone of Yersinia pseudotuberculosis.</p></title><aug><au><snm>Achtman</snm><fnm>M</fnm></au><au><snm>Zurth</snm><fnm>K</fnm></au><au><snm>Morelli</snm><fnm>G</fnm></au><au><snm>Torrea</snm><fnm>G</fnm></au><au><snm>Guiyoule</snm><fnm>A</fnm></au><au><snm>Carniel</snm><fnm>E</fnm></au></aug><source>Proc Natl Acad Sci USA</source><pubdate>1999</pubdate><volume>96</volume><fpage>14043</fpage><lpage>14048</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1073/pnas.96.24.14043</pubid><pubid idtype="pmcid">24187</pubid><pubid idtype="pmpid" link="fulltext">10570195</pubid></pubidlist></xrefbib></bibl><bibl id="B3"><title><p>Molecular mechanisms of pathogenicity: how do pathogenic microorganisms develop cross-kingdom host jumps?</p></title><aug><au><snm>van Baarlen</snm><fnm>P</fnm></au><au><snm>van Belkum</snm><fnm>A</fnm></au><au><snm>Summerbell</snm><fnm>RC</fnm></au><au><snm>Crous</snm><fnm>PW</fnm></au><au><snm>Thomma</snm><fnm>BP</fnm></au></aug><source>FEMS Microbiol Rev</source><pubdate>2007</pubdate><volume>31</volume><fpage>239</fpage><lpage>277</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1111/j.1574-6976.2007.00065.x</pubid><pubid idtype="pmpid" link="fulltext">17326816</pubid></pubidlist></xrefbib></bibl><bibl id="B4"><title><p>Global Genetic Population Structure of Bacillus anthracis.</p></title><aug><au><snm>Van Ert</snm><fnm>MN</fnm></au><au><snm>Easterday</snm><fnm>WR</fnm></au><au><snm>Huynh</snm><fnm>LY</fnm></au><au><snm>Okinaka</snm><fnm>RT</fnm></au><au><snm>Hugh-Jones</snm><fnm>ME</fnm></au><au><snm>Ravel</snm><fnm>J</fnm></au><au><snm>Zanecki</snm><fnm>SR</fnm></au><au><snm>Pearson</snm><fnm>T</fnm></au><au><snm>Simonson</snm><fnm>TS</fnm></au><au><snm>U'Ren</snm><fnm>JM</fnm></au><au><snm>Kachur</snm><fnm>SM</fnm></au><au><snm>Leadem-Dougherty</snm><fnm>RR</fnm></au><au><snm>Rhoton</snm><fnm>SD</fnm></au><au><snm>Zinser</snm><fnm>G</fnm></au><au><snm>Farlow</snm><fnm>J</fnm></au><au><snm>Coker</snm><fnm>PR</fnm></au><au><snm>Smith</snm><fnm>KL</fnm></au><au><snm>Wang</snm><fnm>B</fnm></au><au><snm>Kenefic</snm><fnm>LJ</fnm></au><au><snm>Fraser-Liggett</snm><fnm>CM</fnm></au><au><snm>Wagner</snm><fnm>DM</fnm></au><au><snm>Keim</snm><fnm>P</fnm></au></aug><source>PLoS ONE</source><pubdate>2007</pubdate><volume>2</volume><fpage>e461</fpage><xrefbib><pubidlist><pubid idtype="doi">10.1371/journal.pone.0000461</pubid><pubid idtype="pmcid">1866244</pubid><pubid idtype="pmpid" link="fulltext">17520020</pubid></pubidlist></xrefbib></bibl><bibl id="B5"><title><p>Microarray-based resequencing of multiple Bacillus anthracis isolates.</p></title><aug><au><snm>Zwick</snm><fnm>ME</fnm></au><au><snm>McAfee</snm><fnm>F</fnm></au><au><snm>Cutler</snm><fnm>DJ</fnm></au><au><snm>Read</snm><fnm>TD</fnm></au><au><snm>Ravel</snm><fnm>J</fnm></au><au><snm>Bowman</snm><fnm>GR</fnm></au><au><snm>Galloway</snm><fnm>DR</fnm></au><au><snm>Mateczun</snm><fnm>A</fnm></au></aug><source>Genome Biol</source><pubdate>2005</pubdate><volume>6</volume><fpage>R10</fpage><xrefbib><pubidlist><pubid idtype="doi">10.1186/gb-2004-6-1-r10</pubid><pubid idtype="pmcid">549062</pubid><pubid idtype="pmpid" link="fulltext">15642093</pubid></pubidlist></xrefbib></bibl><bibl id="B6"><title><p>Genomic fluidity and pathogenic bacteria: applications in diagnostics, epidemiology and intervention.</p></title><aug><au><snm>Ahmed</snm><fnm>N</fnm></au><au><snm>Dobrindt</snm><fnm>U</fnm></au><au><snm>Hacker</snm><fnm>J</fnm></au><au><snm>Hasnain</snm><fnm>SE</fnm></au></aug><source>Nat Rev Microbiol</source><pubdate>2008</pubdate><volume>6</volume><fpage>387</fpage><lpage>394</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1038/nrmicro1889</pubid><pubid idtype="pmpid" link="fulltext">18392032</pubid></pubidlist></xrefbib></bibl><bibl id="B7"><title><p>The impact of next-generation sequencing technology on genetics.</p></title><aug><au><snm>Mardis</snm><fnm>ER</fnm></au></aug><source>Trends Genet</source><pubdate>2008</pubdate><volume>24</volume><fpage>133</fpage><lpage>141</lpage><xrefbib><pubid idtype="pmpid" link="fulltext">18262675</pubid></xrefbib></bibl><bibl id="B8"><title><p>Next-generation DNA sequencing.</p></title><aug><au><snm>Shendure</snm><fnm>J</fnm></au><au><snm>Ji</snm><fnm>H</fnm></au></aug><source>Nat Biotechnol</source><pubdate>2008</pubdate><volume>26</volume><fpage>1135</fpage><lpage>1145</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1038/nbt1486</pubid><pubid idtype="pmpid" link="fulltext">18846087</pubid></pubidlist></xrefbib></bibl><bibl id="B9"><title><p>Genome sequence of Yersinia pestis, the causative agent of plague.</p></title><aug><au><snm>Parkhill</snm><fnm>J</fnm></au><au><snm>Wren</snm><fnm>BW</fnm></au><au><snm>Thomson</snm><fnm>NR</fnm></au><au><snm>Titball</snm><fnm>RW</fnm></au><au><snm>Holden</snm><fnm>MT</fnm></au><au><snm>Prentice</snm><fnm>MB</fnm></au><au><snm>Sebaihia</snm><fnm>M</fnm></au><au><snm>James</snm><fnm>KD</fnm></au><au><snm>Churcher</snm><fnm>C</fnm></au><au><snm>Mungall</snm><fnm>KL</fnm></au><au><snm>Baker</snm><fnm>S</fnm></au><au><snm>Basham</snm><fnm>D</fnm></au><au><snm>Bentley</snm><fnm>SD</fnm></au><au><snm>Brooks</snm><fnm>K</fnm></au><au><snm>Cerde&#241;o-T&#225;rraga</snm><fnm>AM</fnm></au><au><snm>Chillingworth</snm><fnm>T</fnm></au><au><snm>Cronin</snm><fnm>A</fnm></au><au><snm>Davies</snm><fnm>RM</fnm></au><au><snm>Davis</snm><fnm>P</fnm></au><au><snm>Dougan</snm><fnm>G</fnm></au><au><snm>Feltwell</snm><fnm>T</fnm></au><au><snm>Hamlin</snm><fnm>N</fnm></au><au><snm>Holroyd</snm><fnm>S</fnm></au><au><snm>Jagels</snm><fnm>K</fnm></au><au><snm>Karlyshev</snm><fnm>AV</fnm></au><au><snm>Leather</snm><fnm>S</fnm></au><au><snm>Moule</snm><fnm>S</fnm></au><au><snm>Oyston</snm><fnm>PC</fnm></au><au><snm>Quail</snm><fnm>M</fnm></au><au><snm>Rutherford</snm><fnm>K</fnm></au><etal/></aug><source>Nature</source><pubdate>2001</pubdate><volume>413</volume><fpage>523</fpage><lpage>527</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1038/35097083</pubid><pubid idtype="pmpid" link="fulltext">11586360</pubid></pubidlist></xrefbib></bibl><bibl id="B10"><title><p>Genome sequence of Yersinia pestis KIM.</p></title><aug><au><snm>Deng</snm><fnm>W</fnm></au><au><snm>Burland</snm><fnm>V</fnm></au><au><snm>Plunkett</snm><fnm>G</fnm></au><au><snm>Boutin</snm><fnm>A</fnm></au><au><snm>Mayhew</snm><fnm>GF</fnm></au><au><snm>Liss</snm><fnm>P</fnm></au><au><snm>Perna</snm><fnm>NT</fnm></au><au><snm>Rose</snm><fnm>DJ</fnm></au><au><snm>Mau</snm><fnm>B</fnm></au><au><snm>Zhou</snm><fnm>S</fnm></au><au><snm>Schwartz</snm><fnm>DC</fnm></au><au><snm>Fetherston</snm><fnm>JD</fnm></au><au><snm>Lindler</snm><fnm>LE</fnm></au><au><snm>Brubaker</snm><fnm>RR</fnm></au><au><snm>Plano</snm><fnm>GV</fnm></au><au><snm>Straley</snm><fnm>SC</fnm></au><au><snm>McDonough</snm><fnm>KA</fnm></au><au><snm>Nilles</snm><fnm>ML</fnm></au><au><snm>Matson</snm><fnm>JS</fnm></au><au><snm>Blattner</snm><fnm>FR</fnm></au><au><snm>Perry</snm><fnm>RD</fnm></au></aug><source>J Bacteriol</source><pubdate>2002</pubdate><volume>184</volume><fpage>4601</fpage><lpage>4611</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1128/JB.184.16.4601-4611.2002</pubid><pubid idtype="pmcid">135232</pubid><pubid idtype="pmpid" link="fulltext">12142430</pubid></pubidlist></xrefbib></bibl><bibl id="B11"><title><p>Complete genome sequence of Yersinia pestis strain 9 an isolate avirulent to humans.</p></title><aug><au><snm>Song</snm><fnm>Y</fnm></au><au><snm>Tong</snm><fnm>Z</fnm></au><au><snm>Wang</snm><fnm>J</fnm></au><au><snm>Wang</snm><fnm>L</fnm></au><au><snm>Guo</snm><fnm>Z</fnm></au><au><snm>Han</snm><fnm>Y</fnm></au><au><snm>Zhang</snm><fnm>J</fnm></au><au><snm>Pei</snm><fnm>D</fnm></au><au><snm>Zhou</snm><fnm>D</fnm></au><au><snm>Qin</snm><fnm>H</fnm></au><au><snm>Pang</snm><fnm>X</fnm></au><au><snm>Han</snm><fnm>Y</fnm></au><au><snm>Zhai</snm><fnm>J</fnm></au><au><snm>Li</snm><fnm>M</fnm></au><au><snm>Cui</snm><fnm>B</fnm></au><au><snm>Qi</snm><fnm>Z</fnm></au><au><snm>Jin</snm><fnm>L</fnm></au><au><snm>Dai</snm><fnm>R</fnm></au><au><snm>Chen</snm><fnm>F</fnm></au><au><snm>Li</snm><fnm>S</fnm></au><au><snm>Ye</snm><fnm>C</fnm></au><au><snm>Du</snm><fnm>Z</fnm></au><au><snm>Lin</snm><fnm>W</fnm></au><au><snm>Wang</snm><fnm>J</fnm></au><au><snm>Yu</snm><fnm>J</fnm></au><au><snm>Yang</snm><fnm>H</fnm></au><au><snm>Wang</snm><fnm>J</fnm></au><au><snm>Huang</snm><fnm>P</fnm></au><au><snm>Yang</snm><fnm>R</fnm></au></aug><source>DNA Res</source><pubdate>2004z</pubdate><volume>11</volume><fpage>179</fpage><lpage>197</lpage><xrefbib><pubid idtype="doi">10.1093/dnares/11.3.179</pubid></xrefbib></bibl><bibl id="B12"><title><p>Complete genome sequence of Yersinia pestis strains Antiqua and Nepal516: evidence of gene reduction in an emerging pathogen.</p></title><aug><au><snm>Chain</snm><fnm>PS</fnm></au><au><snm>Hu</snm><fnm>P</fnm></au><au><snm>Malfatti</snm><fnm>SA</fnm></au><au><snm>Radnedge</snm><fnm>L</fnm></au><au><snm>Larimer</snm><fnm>F</fnm></au><au><snm>Vergez</snm><fnm>LM</fnm></au><au><snm>Worsham</snm><fnm>P</fnm></au><au><snm>Chu</snm><fnm>MC</fnm></au><au><snm>Andersen</snm><fnm>GL</fnm></au></aug><source>J Bacteriol</source><pubdate>2006</pubdate><volume>188</volume><fpage>4453</fpage><lpage>4463</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1128/JB.00124-06</pubid><pubid idtype="pmcid">1482938</pubid><pubid idtype="pmpid" link="fulltext">16740952</pubid></pubidlist></xrefbib></bibl><bibl id="B13"><title><p>Insights into the evolution of Yersinia pestis through whole-genome comparison with Yersinia pseudotuberculosis.</p></title><aug><au><snm>Chain</snm><fnm>PS</fnm></au><au><snm>Carniel</snm><fnm>E</fnm></au><au><snm>Larimer</snm><fnm>FW</fnm></au><au><snm>Lamerdin</snm><fnm>J</fnm></au><au><snm>Stoutland</snm><fnm>PO</fnm></au><au><snm>Regala</snm><fnm>WM</fnm></au><au><snm>Georgescu</snm><fnm>AM</fnm></au><au><snm>Vergez</snm><fnm>LM</fnm></au><au><snm>Land</snm><fnm>ML</fnm></au><au><snm>Motin</snm><fnm>VL</fnm></au><au><snm>Brubaker</snm><fnm>RR</fnm></au><au><snm>Fowler</snm><fnm>J</fnm></au><au><snm>Hinnebusch</snm><fnm>J</fnm></au><au><snm>Marceau</snm><fnm>M</fnm></au><au><snm>Medigue</snm><fnm>C</fnm></au><au><snm>Simonet</snm><fnm>M</fnm></au><au><snm>Chenal-Francisque</snm><fnm>V</fnm></au><au><snm>Souza</snm><fnm>B</fnm></au><au><snm>Dacheux</snm><fnm>D</fnm></au><au><snm>Elliott</snm><fnm>JM</fnm></au><au><snm>Derbise</snm><fnm>A</fnm></au><au><snm>Hauser</snm><fnm>LJ</fnm></au><au><snm>Garcia</snm><fnm>E</fnm></au></aug><source>Proc Natl Acad Sci USA</source><pubdate>2004</pubdate><volume>101</volume><fpage>13826</fpage><lpage>13831</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1073/pnas.0404012101</pubid><pubid idtype="pmcid">518763</pubid><pubid idtype="pmpid" link="fulltext">15358858</pubid></pubidlist></xrefbib></bibl><bibl id="B14"><title><p>The complete genome sequence of Yersinia pseudotuberculosis IP31758, the causative agent of Far East scarlet-like fever.</p></title><aug><au><snm>Eppinger</snm><fnm>M</fnm></au><au><snm>Rosovitz</snm><fnm>MJ</fnm></au><au><snm>Fricke</snm><fnm>WF</fnm></au><au><snm>Rasko</snm><fnm>DA</fnm></au><au><snm>Kokorina</snm><fnm>G</fnm></au><au><snm>Fayolle</snm><fnm>C</fnm></au><au><snm>Lindler</snm><fnm>LE</fnm></au><au><snm>Carniel</snm><fnm>E</fnm></au><au><snm>Ravel</snm><fnm>J</fnm></au></aug><source>PLoS Genet</source><pubdate>2007</pubdate><volume>3</volume><fpage>e142</fpage><xrefbib><pubidlist><pubid idtype="doi">10.1371/journal.pgen.0030142</pubid><pubid idtype="pmcid">1959361</pubid><pubid idtype="pmpid" link="fulltext">17784789</pubid></pubidlist></xrefbib></bibl><bibl id="B15"><title><p>The Complete Genome Sequence and Comparative Genome Analysis of the High Pathogenicity Yersinia enterocolitica Strain 8081.</p></title><aug><au><snm>Thomson</snm><fnm>NR</fnm></au><au><snm>Howard</snm><fnm>S</fnm></au><au><snm>Wren</snm><fnm>BW</fnm></au><au><snm>Holden</snm><fnm>MT</fnm></au><au><snm>Crossman</snm><fnm>L</fnm></au><au><snm>Challis</snm><fnm>GL</fnm></au><au><snm>Churcher</snm><fnm>C</fnm></au><au><snm>Mungall</snm><fnm>K</fnm></au><au><snm>Brooks</snm><fnm>K</fnm></au><au><snm>Chillingworth</snm><fnm>T</fnm></au><au><snm>Feltwell</snm><fnm>T</fnm></au><au><snm>Abdellah</snm><fnm>Z</fnm></au><au><snm>Hauser</snm><fnm>H</fnm></au><au><snm>Jagels</snm><fnm>K</fnm></au><au><snm>Maddison</snm><fnm>M</fnm></au><au><snm>Moule</snm><fnm>S</fnm></au><au><snm>Sanders</snm><fnm>M</fnm></au><au><snm>Whitehead</snm><fnm>S</fnm></au><au><snm>Quail</snm><fnm>MA</fnm></au><au><snm>Dougan</snm><fnm>G</fnm></au><au><snm>Parkhill</snm><fnm>J</fnm></au><au><snm>Prentice</snm><fnm>MB</fnm></au></aug><source>PLoS Genet</source><pubdate>2006</pubdate><volume>2</volume><fpage>e206</fpage><xrefbib><pubidlist><pubid idtype="doi">10.1371/journal.pgen.0020206</pubid><pubid idtype="pmcid">1698947</pubid><pubid idtype="pmpid" link="fulltext">17173484</pubid></pubidlist></xrefbib></bibl><bibl id="B16"><title><p>Yersinia pestis and the plague.</p></title><aug><au><snm>Rollins</snm><fnm>SE</fnm></au><au><snm>Rollins</snm><fnm>SM</fnm></au><au><snm>Ryan</snm><fnm>ET</fnm></au></aug><source>Am J Clin Pathol</source><pubdate>2003</pubdate><volume>119</volume><issue>Suppl</issue><fpage>S78</fpage><lpage>85</lpage><xrefbib><pubid idtype="pmpid">12951845</pubid></xrefbib></bibl><bibl id="B17"><title><p>The yersiniae--a model genus to study the rapid evolution of bacterial pathogens.</p></title><aug><au><snm>Wren</snm><fnm>BW</fnm></au></aug><source>Nat Rev Microbiol</source><pubdate>2003</pubdate><volume>1</volume><fpage>55</fpage><lpage>64</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1038/nrmicro730</pubid><pubid idtype="pmpid" link="fulltext">15040180</pubid></pubidlist></xrefbib></bibl><bibl id="B18"><title><p>The Yersinia Ysc-Yop virulence apparatus.</p></title><aug><au><snm>Cornelis</snm><fnm>GR</fnm></au></aug><source>Int J Med Microbiol</source><pubdate>2002</pubdate><volume>291</volume><fpage>455</fpage><lpage>462</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1078/1438-4221-00153</pubid><pubid idtype="pmpid">11890544</pubid></pubidlist></xrefbib></bibl><bibl id="B19"><title><p><it>Yersinia </it>effectors target mammalian signaling pathways.</p></title><aug><au><snm>Juris</snm><fnm>SJ</fnm></au><au><snm>Shao</snm><fnm>F</fnm></au><au><snm>DIxon</snm><fnm>JE</fnm></au></aug><source>Cell Microbiol</source><pubdate>2002</pubdate><volume>4</volume><fpage>201</fpage><lpage>211</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1046/j.1462-5822.2002.00182.x</pubid><pubid idtype="pmpid" link="fulltext">11952637</pubid></pubidlist></xrefbib></bibl><bibl id="B20"><title><p><it>Yersinia </it>outer proteins: role in modulation of host cell signaling responses and pathogenesis.</p></title><aug><au><snm>Viboud</snm><fnm>GI</fnm></au><au><snm>Bliska</snm><fnm>JB</fnm></au></aug><source>Annu Rev Microbiol</source><pubdate>2005</pubdate><volume>59</volume><fpage>69</fpage><lpage>89</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1146/annurev.micro.59.030804.121320</pubid><pubid idtype="pmpid" link="fulltext">15847602</pubid></pubidlist></xrefbib></bibl><bibl id="B21"><title><p>The Yersinia high-pathogenicity island (HPI): evolutionary and functional aspects.</p></title><aug><au><snm>Schubert</snm><fnm>S</fnm></au><au><snm>Rakin</snm><fnm>A</fnm></au><au><snm>Heesemann</snm><fnm>J</fnm></au></aug><source>Int J Med Microbiol</source><pubdate>2004</pubdate><volume>294</volume><fpage>83</fpage><lpage>94</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1016/j.ijmm.2004.06.026</pubid><pubid idtype="pmpid">15493818</pubid></pubidlist></xrefbib></bibl><bibl id="B22"><title><p>The Yersinia high-pathogenicity island: an iron-uptake island.</p></title><aug><au><snm>Carniel</snm><fnm>E</fnm></au></aug><source>Microbes Infect</source><pubdate>2001</pubdate><volume>3</volume><fpage>561</fpage><lpage>569</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1016/S1286-4579(01)01412-5</pubid><pubid idtype="pmpid" link="fulltext">11418330</pubid></pubidlist></xrefbib></bibl><bibl id="B23"><title><p>Dynamics of genome rearrangement in bacterial populations.</p></title><aug><au><snm>Darling</snm><fnm>AE</fnm></au><au><snm>Miklos</snm><fnm>I</fnm></au><au><snm>Ragan</snm><fnm>MA</fnm></au></aug><source>PLoS Genet</source><pubdate>2008</pubdate><volume>4</volume><fpage>e1000128</fpage><xrefbib><pubidlist><pubid idtype="doi">10.1371/journal.pgen.1000128</pubid><pubid idtype="pmcid">2483231</pubid><pubid idtype="pmpid" link="fulltext">18650965</pubid></pubidlist></xrefbib></bibl><bibl id="B24"><title><p>Intraspecific diversity of Yersinia pestis.</p></title><aug><au><snm>Anisimov</snm><fnm>AP</fnm></au><au><snm>Lindler</snm><fnm>LE</fnm></au><au><snm>Pier</snm><fnm>GB</fnm></au></aug><source>Clin Microbiol Rev</source><pubdate>2004</pubdate><volume>17</volume><fpage>434</fpage><lpage>464</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1128/CMR.17.2.434-464.2004</pubid><pubid idtype="pmcid">387406</pubid><pubid idtype="pmpid" link="fulltext">15084509</pubid></pubidlist></xrefbib></bibl><bibl id="B25"><title><p>Yersinia genome diversity disclosed by Yersinia pestis genome-wide DNA microarray.</p></title><aug><au><snm>Wang</snm><fnm>X</fnm></au><au><snm>Han</snm><fnm>Y</fnm></au><au><snm>Li</snm><fnm>Y</fnm></au><au><snm>Guo</snm><fnm>Z</fnm></au><au><snm>Song</snm><fnm>Y</fnm></au><au><snm>Tan</snm><fnm>Y</fnm></au><au><snm>Du</snm><fnm>Z</fnm></au><au><snm>Rakin</snm><fnm>A</fnm></au><au><snm>Zhou</snm><fnm>D</fnm></au><au><snm>Yang</snm><fnm>R</fnm></au></aug><source>Can J Microbiol</source><pubdate>2007</pubdate><volume>53</volume><fpage>1211</fpage><lpage>1221</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1139/W07-087</pubid><pubid idtype="pmpid" link="fulltext">18026215</pubid></pubidlist></xrefbib></bibl><bibl id="B26"><title><p>Multiple antimicrobial resistance in plague: an emerging public health risk.</p></title><aug><au><snm>Welch</snm><fnm>TJ</fnm></au><au><snm>Fricke</snm><fnm>WF</fnm></au><au><snm>McDermott</snm><fnm>PF</fnm></au><au><snm>White</snm><fnm>DG</fnm></au><au><snm>Rosso</snm><fnm>ML</fnm></au><au><snm>Rasko</snm><fnm>DA</fnm></au><au><snm>Mammel</snm><fnm>MK</fnm></au><au><snm>Eppinger</snm><fnm>M</fnm></au><au><snm>Rosovitz</snm><fnm>MJ</fnm></au><au><snm>Wagner</snm><fnm>D</fnm></au><au><snm>Rahalison</snm><fnm>L</fnm></au><au><snm>Leclerc</snm><fnm>JE</fnm></au><au><snm>Hinshaw</snm><fnm>JM</fnm></au><au><snm>Lindler</snm><fnm>LE</fnm></au><au><snm>Cebula</snm><fnm>TA</fnm></au><au><snm>Carniel</snm><fnm>E</fnm></au><au><snm>Ravel</snm><fnm>J</fnm></au></aug><source>PLoS ONE</source><pubdate>2007</pubdate><volume>2</volume><fpage>e309</fpage><xrefbib><pubidlist><pubid idtype="doi">10.1371/journal.pone.0000309</pubid><pubid idtype="pmcid">1819562</pubid><pubid idtype="pmpid" link="fulltext">17375195</pubid></pubidlist></xrefbib></bibl><bibl id="B27"><title><p>A horizontally acquired filamentous phage contributes to the pathogenicity of the plague bacillus.</p></title><aug><au><snm>Derbise</snm><fnm>A</fnm></au><au><snm>Chenal-Francisque</snm><fnm>V</fnm></au><au><snm>Pouillot</snm><fnm>F</fnm></au><au><snm>Fayolle</snm><fnm>C</fnm></au><au><snm>Pr&#233;vost</snm><fnm>MC</fnm></au><au><snm>M&#233;digue</snm><fnm>C</fnm></au><au><snm>Hinnebusch</snm><fnm>BJ</fnm></au><au><snm>Carniel</snm><fnm>E</fnm></au></aug><source>Mol Microbiol</source><pubdate>2007</pubdate><volume>63</volume><fpage>1145</fpage><lpage>1157</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1111/j.1365-2958.2006.05570.x</pubid><pubid idtype="pmpid" link="fulltext">17238929</pubid></pubidlist></xrefbib></bibl><bibl id="B28"><title><p>Yersiniae other than Y. enterocolitica, Y. pseudotuberculosis, and Y. pestis: the ignored species.</p></title><aug><au><snm>Sulakvelidze</snm><fnm>A</fnm></au></aug><source>Microbes Infect</source><pubdate>2000</pubdate><volume>2</volume><fpage>497</fpage><lpage>513</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1016/S1286-4579(00)00311-7</pubid><pubid idtype="pmpid" link="fulltext">10865195</pubid></pubidlist></xrefbib></bibl><bibl id="B29"><title><p>Genus XLI. Yersinia Van Loghem 1944, 15AL.</p></title><aug><au><snm>Bottone</snm><fnm>EJ</fnm></au><au><snm>Bercovier</snm><fnm>H</fnm></au><au><snm>Mollaret</snm><fnm>HH</fnm></au></aug><source>Bergey's Manual of Systematic Bacteriology</source><pubdate>2005</pubdate><volume>2</volume><fpage>838</fpage><lpage>846</lpage></bibl><bibl id="B30"><title><p>Multilocus sequence typing for studying genetic relationships among Yersinia species.</p></title><aug><au><snm>Kotetishvili</snm><fnm>M</fnm></au><au><snm>Kreger</snm><fnm>A</fnm></au><au><snm>Wauters</snm><fnm>G</fnm></au><au><snm>Morris</snm><fnm>JG</fnm><suf>Jr</suf></au><au><snm>Sulakvelidze</snm><fnm>A</fnm></au><au><snm>Stine</snm><fnm>OC</fnm></au></aug><source>J Clin Microbiol</source><pubdate>2005</pubdate><volume>43</volume><fpage>2674</fpage><lpage>2684</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1128/JCM.43.6.2674-2684.2005</pubid><pubid idtype="pmcid">1151872</pubid><pubid idtype="pmpid" link="fulltext">15956383</pubid></pubidlist></xrefbib></bibl><bibl id="B31"><title><p>Clinical significance of virulence-related assay of Yersinia species.</p></title><aug><au><snm>Noble</snm><fnm>MA</fnm></au><au><snm>Barteluk</snm><fnm>RL</fnm></au><au><snm>Freeman</snm><fnm>HJ</fnm></au><au><snm>Subramaniam</snm><fnm>R</fnm></au><au><snm>Hudson</snm><fnm>JB</fnm></au></aug><source>J Clin Microbiol</source><pubdate>1987</pubdate><volume>25</volume><fpage>802</fpage><lpage>807</lpage><xrefbib><pubidlist><pubid idtype="pmcid">266092</pubid><pubid idtype="pmpid" link="fulltext">3584418</pubid></pubidlist></xrefbib></bibl><bibl id="B32"><title><p>Pathogenicity of Yersinia kristensenii for mice.</p></title><aug><au><snm>Robins-Browne</snm><fnm>RM</fnm></au><au><snm>Cianciosi</snm><fnm>S</fnm></au><au><snm>Bordun</snm><fnm>AM</fnm></au><au><snm>Wauters</snm><fnm>G</fnm></au></aug><source>Infect Immun</source><pubdate>1991</pubdate><volume>59</volume><fpage>162</fpage><lpage>167</lpage><xrefbib><pubidlist><pubid idtype="pmcid">257721</pubid><pubid idtype="pmpid" link="fulltext">1987029</pubid></pubidlist></xrefbib></bibl><bibl id="B33"><title><p>Mice and moles inhabiting mountainous areas of Shimane Peninsula as sources of infection with Yersinia pseudotuberculosis.</p></title><aug><au><snm>Fukushima</snm><fnm>H</fnm></au><au><snm>Gomyoda</snm><fnm>M</fnm></au><au><snm>Kaneko</snm><fnm>S</fnm></au></aug><source>J Clin Microbiol</source><pubdate>1990</pubdate><volume>28</volume><fpage>2448</fpage><lpage>2455</lpage><xrefbib><pubidlist><pubid idtype="pmcid">268204</pubid><pubid idtype="pmpid" link="fulltext">2254420</pubid></pubidlist></xrefbib></bibl><bibl id="B34"><title><p>Genome sequencing in microfabricated high-density picolitre reactors.</p></title><aug><au><snm>Margulies</snm><fnm>M</fnm></au><au><snm>Egholm</snm><fnm>M</fnm></au><au><snm>Altman</snm><fnm>WE</fnm></au><au><snm>Attiya</snm><fnm>S</fnm></au><au><snm>Bader</snm><fnm>JS</fnm></au><au><snm>Bemben</snm><fnm>LA</fnm></au><au><snm>Berka</snm><fnm>J</fnm></au><au><snm>Braverman</snm><fnm>MS</fnm></au><au><snm>Chen</snm><fnm>YJ</fnm></au><au><snm>Chen</snm><fnm>Z</fnm></au><au><snm>Dewell</snm><fnm>SB</fnm></au><au><snm>Du</snm><fnm>L</fnm></au><au><snm>Fierro</snm><fnm>JM</fnm></au><au><snm>Gomes</snm><fnm>XV</fnm></au><au><snm>Godwin</snm><fnm>BC</fnm></au><au><snm>He</snm><fnm>W</fnm></au><au><snm>Helgesen</snm><fnm>S</fnm></au><au><snm>Ho</snm><fnm>CH</fnm></au><au><snm>Ho</snm><fnm>CH</fnm></au><au><snm>Irzyk</snm><fnm>GP</fnm></au><au><snm>Jando</snm><fnm>SC</fnm></au><au><snm>Alenquer</snm><fnm>ML</fnm></au><au><snm>Jarvie</snm><fnm>TP</fnm></au><au><snm>Jirage</snm><fnm>KB</fnm></au><au><snm>Kim</snm><fnm>JB</fnm></au><au><snm>Knight</snm><fnm>JR</fnm></au><au><snm>Lanza</snm><fnm>JR</fnm></au><au><snm>Leamon</snm><fnm>JH</fnm></au><au><snm>Lefkowitz</snm><fnm>SM</fnm></au><au><snm>Lei</snm><fnm>M</fnm></au><etal/></aug><source>Nature</source><pubdate>2005</pubdate><volume>437</volume><fpage>376</fpage><lpage>380</lpage><xrefbib><pubidlist><pubid idtype="pmcid">1464427</pubid><pubid idtype="pmpid" link="fulltext">16056220</pubid></pubidlist></xrefbib></bibl><bibl id="B35"><title><p>Base-calling of automated sequencer traces using phred. II. Error probabilities.</p></title><aug><au><snm>Ewing</snm><fnm>B</fnm></au><au><snm>Green</snm><fnm>P</fnm></au></aug><source>Genome Res</source><pubdate>1998</pubdate><volume>8</volume><fpage>186</fpage><lpage>194</lpage><xrefbib><pubid idtype="pmpid" link="fulltext">9521922</pubid></xrefbib></bibl><bibl id="B36"><title><p>Quality scores and SNP detection in sequencing-by-synthesis systems.</p></title><aug><au><snm>Brockman</snm><fnm>W</fnm></au><au><snm>Alvarez</snm><fnm>P</fnm></au><au><snm>Young</snm><fnm>S</fnm></au><au><snm>Garber</snm><fnm>M</fnm></au><au><snm>Giannoukos</snm><fnm>G</fnm></au><au><snm>Lee</snm><fnm>WL</fnm></au><au><snm>Russ</snm><fnm>C</fnm></au><au><snm>Lander</snm><fnm>ES</fnm></au><au><snm>Nusbaum</snm><fnm>C</fnm></au><au><snm>Jaffe</snm><fnm>DB</fnm></au></aug><source>Genome Res</source><pubdate>2008</pubdate><volume>18</volume><fpage>763</fpage><lpage>770</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1101/gr.070227.107</pubid><pubid idtype="pmcid">2336812</pubid><pubid idtype="pmpid" link="fulltext">18212088</pubid></pubidlist></xrefbib></bibl><bibl id="B37"><title><p>Genome assembly forensics: finding the elusive mis-assembly.</p></title><aug><au><snm>Phillippy</snm><fnm>AM</fnm></au><au><snm>Schatz</snm><fnm>MC</fnm></au><au><snm>Pop</snm><fnm>M</fnm></au></aug><source>Genome Biol</source><pubdate>2008</pubdate><volume>9</volume><fpage>R55</fpage><xrefbib><pubidlist><pubid idtype="doi">10.1186/gb-2008-9-3-r55</pubid><pubid idtype="pmcid">2397507</pubid><pubid idtype="pmpid" link="fulltext">18341692</pubid></pubidlist></xrefbib></bibl><bibl id="B38"><title><p>Mapping the genome one molecule at a time--optical mapping.</p></title><aug><au><snm>Samad</snm><fnm>AH</fnm></au><au><snm>Cai</snm><fnm>WW</fnm></au><au><snm>Hu</snm><fnm>X</fnm></au><au><snm>Irvin</snm><fnm>B</fnm></au><au><snm>Jing</snm><fnm>J</fnm></au><au><snm>Reed</snm><fnm>J</fnm></au><au><snm>Meng</snm><fnm>X</fnm></au><au><snm>Huang</snm><fnm>J</fnm></au><au><snm>Huff</snm><fnm>E</fnm></au><au><snm>Porter</snm><fnm>B</fnm></au></aug><source>Nature</source><pubdate>1995</pubdate><volume>378</volume><fpage>516</fpage><lpage>517</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1038/378516a0</pubid><pubid idtype="pmpid" link="fulltext">7477412</pubid></pubidlist></xrefbib></bibl><bibl id="B39"><title><p>Scaffolding and validation of bacterial genome assemblies using optical restriction maps.</p></title><aug><au><snm>Nagarajan</snm><fnm>N</fnm></au><au><snm>Read</snm><fnm>TD</fnm></au><au><snm>Pop</snm><fnm>M</fnm></au></aug><source>Bioinformatics</source><pubdate>2008</pubdate><volume>24</volume><fpage>1229</fpage><lpage>35</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1093/bioinformatics/btn102</pubid><pubid idtype="pmcid">2373919</pubid><pubid idtype="pmpid" link="fulltext">18356192</pubid></pubidlist></xrefbib></bibl><bibl id="B40"><title><p>ISfinder: the reference centre for bacterial insertion sequences.</p></title><aug><au><snm>Siguier</snm><fnm>P</fnm></au><au><snm>Perochon</snm><fnm>J</fnm></au><au><snm>Lestrade</snm><fnm>L</fnm></au><au><snm>Mahillon</snm><fnm>J</fnm></au><au><snm>Chandler</snm><fnm>M</fnm></au></aug><source>Nucleic Acids Res</source><pubdate>2006</pubdate><volume>34</volume><fpage>D32</fpage><lpage>36</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1093/nar/gkj014</pubid><pubid idtype="pmcid">1347377</pubid><pubid idtype="pmpid" link="fulltext">16381877</pubid></pubidlist></xrefbib></bibl><bibl id="B41"><title><p>De novo identification of repeat families in large genomes.</p></title><aug><au><snm>Price</snm><fnm>AL</fnm></au><au><snm>Jones</snm><fnm>NC</fnm></au><au><snm>Pevzner</snm><fnm>PA</fnm></au></aug><source>Bioinformatics</source><pubdate>2005</pubdate><volume>21</volume><issue>Suppl 1</issue><fpage>i351</fpage><lpage>358</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1093/bioinformatics/bti1018</pubid><pubid idtype="pmpid" link="fulltext">15961478</pubid></pubidlist></xrefbib></bibl><bibl id="B42"><title><p>ERIC sequences: a novel family of repetitive elements in the genomes of Escherichia coli, Salmonella typhimurium and other enterobacteria.</p></title><aug><au><snm>Hulton</snm><fnm>CS</fnm></au><au><snm>Higgins</snm><fnm>CF</fnm></au><au><snm>Sharp</snm><fnm>PM</fnm></au></aug><source>Mol Microbiol</source><pubdate>1991</pubdate><volume>5</volume><fpage>825</fpage><lpage>834</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1111/j.1365-2958.1991.tb00755.x</pubid><pubid idtype="pmpid">1713281</pubid></pubidlist></xrefbib></bibl><bibl id="B43"><title><p>Structural organization and functional properties of miniature DNA insertion sequences in yersiniae.</p></title><aug><au><snm>De Gregorio</snm><fnm>E</fnm></au><au><snm>Silvestro</snm><fnm>G</fnm></au><au><snm>Venditti</snm><fnm>R</fnm></au><au><snm>Carlomagno</snm><fnm>MS</fnm></au><au><snm>Di Nocera</snm><fnm>PP</fnm></au></aug><source>J Bacteriol</source><pubdate>2006</pubdate><volume>188</volume><fpage>7876</fpage><lpage>7884</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1128/JB.00942-06</pubid><pubid idtype="pmcid">1636318</pubid><pubid idtype="pmpid" link="fulltext">16963573</pubid></pubidlist></xrefbib></bibl><bibl id="B44"><title><p>Comprehensive DNA signature discovery and validation.</p></title><aug><au><snm>Phillippy</snm><fnm>AM</fnm></au><au><snm>Mason</snm><fnm>JA</fnm></au><au><snm>Ayanbule</snm><fnm>K</fnm></au><au><snm>Sommer</snm><fnm>DD</fnm></au><au><snm>Taviani</snm><fnm>E</fnm></au><au><snm>Huq</snm><fnm>A</fnm></au><au><snm>Colwell</snm><fnm>RR</fnm></au><au><snm>Knight</snm><fnm>IT</fnm></au><au><snm>Salzberg</snm><fnm>SL</fnm></au></aug><source>PLoS Comput Biol</source><pubdate>2007</pubdate><volume>3</volume><fpage>e98</fpage><xrefbib><pubidlist><pubid idtype="doi">10.1371/journal.pcbi.0030098</pubid><pubid idtype="pmcid">1868776</pubid><pubid idtype="pmpid" link="fulltext">17511514</pubid></pubidlist></xrefbib></bibl><bibl id="B45"><title><p>IslandViewer: an integrated interface for computational identification and visualization of genomic islands.</p></title><aug><au><snm>Langille</snm><fnm>MG</fnm></au><au><snm>Brinkman</snm><fnm>FS</fnm></au></aug><source>Bioinformatics</source><pubdate>2009</pubdate><volume>25</volume><fpage>664</fpage><lpage>665</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1093/bioinformatics/btp030</pubid><pubid idtype="pmcid">2647836</pubid><pubid idtype="pmpid" link="fulltext">19151094</pubid></pubidlist></xrefbib></bibl><bibl id="B46"><title><p>Mauve: multiple alignment of conserved genomic sequence with rearrangements.</p></title><aug><au><snm>Darling</snm><fnm>AC</fnm></au><au><snm>Mau</snm><fnm>B</fnm></au><au><snm>Blattner</snm><fnm>FR</fnm></au><au><snm>Perna</snm><fnm>NT</fnm></au></aug><source>Genome Res</source><pubdate>2004</pubdate><volume>14</volume><fpage>1394</fpage><lpage>1403</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1101/gr.2289704</pubid><pubid idtype="pmcid">442156</pubid><pubid idtype="pmpid" link="fulltext">15231754</pubid></pubidlist></xrefbib></bibl><bibl id="B47"><title><p>MAUVE Aligner User Guide</p></title><url>http://asap.ahabs.wisc.edu/mauve-aligner/mauve-user-guide/</url></bibl><bibl id="B48"><title><p>An efficient algorithm for large-scale detection of protein families.</p></title><aug><au><snm>Enright</snm><fnm>AJ</fnm></au><au><snm>Van Dongen</snm><fnm>S</fnm></au><au><snm>Ouzounis</snm><fnm>CA</fnm></au></aug><source>Nucleic Acids Res</source><pubdate>2002</pubdate><volume>30</volume><fpage>1575</fpage><lpage>1584</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1093/nar/30.7.1575</pubid><pubid idtype="pmcid">101833</pubid><pubid idtype="pmpid" link="fulltext">11917018</pubid></pubidlist></xrefbib></bibl><bibl id="B49"><title><p>Genome analysis of multiple pathogenic isolates of Streptococcus agalactiae: implications for the microbial "pan-genome".</p></title><aug><au><snm>Tettelin</snm><fnm>H</fnm></au><au><snm>Masignani</snm><fnm>V</fnm></au><au><snm>Cieslewicz</snm><fnm>MJ</fnm></au><au><snm>Donati</snm><fnm>C</fnm></au><au><snm>Medini</snm><fnm>D</fnm></au><au><snm>Ward</snm><fnm>NL</fnm></au><au><snm>Angiuoli</snm><fnm>SV</fnm></au><au><snm>Crabtree</snm><fnm>J</fnm></au><au><snm>Jones</snm><fnm>AL</fnm></au><au><snm>Durkin</snm><fnm>AS</fnm></au><au><snm>Deboy</snm><fnm>RT</fnm></au><au><snm>Davidsen</snm><fnm>TM</fnm></au><au><snm>Mora</snm><fnm>M</fnm></au><au><snm>Scarselli</snm><fnm>M</fnm></au><au><snm>Margarit y Ros</snm><fnm>I</fnm></au><au><snm>Peterson</snm><fnm>JD</fnm></au><au><snm>Hauser</snm><fnm>CR</fnm></au><au><snm>Sundaram</snm><fnm>JP</fnm></au><au><snm>Nelson</snm><fnm>WC</fnm></au><au><snm>Madupu</snm><fnm>R</fnm></au><au><snm>Brinkac</snm><fnm>LM</fnm></au><au><snm>Dodson</snm><fnm>RJ</fnm></au><au><snm>Rosovitz</snm><fnm>MJ</fnm></au><au><snm>Sullivan</snm><fnm>SA</fnm></au><au><snm>Daugherty</snm><fnm>SC</fnm></au><au><snm>Haft</snm><fnm>DH</fnm></au><au><snm>Selengut</snm><fnm>J</fnm></au><au><snm>Gwinn</snm><fnm>ML</fnm></au><au><snm>Zhou</snm><fnm>L</fnm></au><au><snm>Zafar</snm><fnm>N</fnm></au><etal/></aug><source>Proc Natl Acad Sci USA</source><pubdate>2005</pubdate><volume>102</volume><fpage>13950</fpage><lpage>13955</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1073/pnas.0506758102</pubid><pubid idtype="pmcid">1216834</pubid><pubid idtype="pmpid" link="fulltext">16172379</pubid></pubidlist></xrefbib></bibl><bibl id="B50"><title><p>Clustal W and Clustal X version 2.0.</p></title><aug><au><snm>Larkin</snm><fnm>MA</fnm></au><au><snm>Blackshields</snm><fnm>G</fnm></au><au><snm>Brown</snm><fnm>NP</fnm></au><au><snm>Chenna</snm><fnm>R</fnm></au><au><snm>McGettigan</snm><fnm>PA</fnm></au><au><snm>McWilliam</snm><fnm>H</fnm></au><au><snm>Valentin</snm><fnm>F</fnm></au><au><snm>Wallace</snm><fnm>IM</fnm></au><au><snm>Wilm</snm><fnm>A</fnm></au><au><snm>Lopez</snm><fnm>R</fnm></au><au><snm>Thompson</snm><fnm>JD</fnm></au><au><snm>Gibson</snm><fnm>TJ</fnm></au><au><snm>Higgins</snm><fnm>DG</fnm></au></aug><source>Bioinformatics</source><pubdate>2007</pubdate><volume>23</volume><fpage>2947</fpage><lpage>2948</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1093/bioinformatics/btm404</pubid><pubid idtype="pmpid" link="fulltext">17846036</pubid></pubidlist></xrefbib></bibl><bibl id="B51"><title><p>PHYLIP: Phylogeny Inference Package, version 3.6.</p></title><aug><au><snm>Felsenstein</snm><fnm>J</fnm></au></aug><publisher>Seattle, WA, USA.: University of Washington</publisher><pubdate>2001</pubdate></bibl><bibl id="B52"><title><p>The COG database: a tool for genome-scale analysis of protein functions and evolution.</p></title><aug><au><snm>Tatusov</snm><fnm>RL</fnm></au><au><snm>Galperin</snm><fnm>MY</fnm></au><au><snm>Natale</snm><fnm>DA</fnm></au><au><snm>Koonin</snm><fnm>EV</fnm></au></aug><source>Nucleic Acids Res</source><pubdate>2000</pubdate><volume>28</volume><fpage>33</fpage><lpage>36</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1093/nar/28.1.33</pubid><pubid idtype="pmcid">102395</pubid><pubid idtype="pmpid" link="fulltext">10592175</pubid></pubidlist></xrefbib></bibl><bibl id="B53"><title><p>Enhancin, the granulosis virus protein that facilitates nucleopolyhedrovirus (NPV) infections, is a metalloprotease.</p></title><aug><au><snm>Lepore</snm><fnm>LS</fnm></au><au><snm>Roelvink</snm><fnm>PR</fnm></au><au><snm>Granados</snm><fnm>RR</fnm></au></aug><source>J Invertebr Pathol</source><pubdate>1996</pubdate><volume>68</volume><fpage>131</fpage><lpage>140</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1006/jipa.1996.0070</pubid><pubid idtype="pmpid" link="fulltext">8858909</pubid></pubidlist></xrefbib></bibl><bibl id="B54"><title><p>Insecticidal toxins from the bacterium Photorhabdus luminescens.</p></title><aug><au><snm>Bowen</snm><fnm>D</fnm></au><au><snm>Rocheleau</snm><fnm>TA</fnm></au><au><snm>Blackburn</snm><fnm>M</fnm></au><au><snm>Andreev</snm><fnm>O</fnm></au><au><snm>Golubeva</snm><fnm>E</fnm></au><au><snm>Bhartia</snm><fnm>R</fnm></au><au><snm>ffrench-Constant</snm><fnm>RH</fnm></au></aug><source>Science</source><pubdate>1998</pubdate><volume>280</volume><fpage>2129</fpage><lpage>2132</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1126/science.280.5372.2129</pubid><pubid idtype="pmpid" link="fulltext">9641921</pubid></pubidlist></xrefbib></bibl><bibl id="B55"><title><p>Phages and the evolution of bacterial pathogens: from genomic rearrangements to lysogenic conversion.</p></title><aug><au><snm>Brussow</snm><fnm>H</fnm></au><au><snm>Canchaya</snm><fnm>C</fnm></au><au><snm>Hardt</snm><fnm>WD</fnm></au></aug><source>Microbiol Mol Biol Rev</source><pubdate>2004</pubdate><volume>68</volume><fpage>560</fpage><lpage>602</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1128/MMBR.68.3.560-602.2004</pubid><pubid idtype="pmcid">515249</pubid><pubid idtype="pmpid" link="fulltext">15353570</pubid></pubidlist></xrefbib></bibl><bibl id="B56"><title><p>Describing ancient horizontal gene transfers at the nucleotide and gene levels by comparative pathogenicity island genometrics.</p></title><aug><au><snm>Collyn</snm><fnm>F</fnm></au><au><snm>Guy</snm><fnm>L</fnm></au><au><snm>Marceau</snm><fnm>M</fnm></au><au><snm>Simonet</snm><fnm>M</fnm></au><au><snm>Roten</snm><fnm>CA</fnm></au></aug><source>Bioinformatics</source><pubdate>2006</pubdate><volume>22</volume><fpage>1072</fpage><lpage>1079</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1093/bioinformatics/bti793</pubid><pubid idtype="pmpid" link="fulltext">16303795</pubid></pubidlist></xrefbib></bibl><bibl id="B57"><title><p>YAPI, a new Yersinia pseudotuberculosis pathogenicity island.</p></title><aug><au><snm>Collyn</snm><fnm>F</fnm></au><au><snm>Billault</snm><fnm>A</fnm></au><au><snm>Mullet</snm><fnm>C</fnm></au><au><snm>Simonet</snm><fnm>M</fnm></au><au><snm>Marceau</snm><fnm>M</fnm></au></aug><source>Infect Immun</source><pubdate>2004</pubdate><volume>72</volume><fpage>4784</fpage><lpage>4790</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1128/IAI.72.8.4784-4790.2004</pubid><pubid idtype="pmcid">470613</pubid><pubid idtype="pmpid" link="fulltext">15271940</pubid></pubidlist></xrefbib></bibl><bibl id="B58"><title><p>Application of comparative phylogenomics to study the evolution of Yersinia enterocolitica and to identify genetic differences relating to pathogenicity.</p></title><aug><au><snm>Howard</snm><fnm>SL</fnm></au><au><snm>Gaunt</snm><fnm>MW</fnm></au><au><snm>Hinds</snm><fnm>J</fnm></au><au><snm>Witney</snm><fnm>AA</fnm></au><au><snm>Stabler</snm><fnm>R</fnm></au><au><snm>Wren</snm><fnm>BW</fnm></au></aug><source>J Bacteriol</source><pubdate>2006</pubdate><volume>188</volume><fpage>3645</fpage><lpage>3653</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1128/JB.188.10.3645-3653.2006</pubid><pubid idtype="pmcid">1482848</pubid><pubid idtype="pmpid" link="fulltext">16672618</pubid></pubidlist></xrefbib></bibl><bibl id="B59"><title><p>A chromosomally encoded type III secretion pathway in Yersinia enterocolitica is important in virulence.</p></title><aug><au><snm>Haller</snm><fnm>JC</fnm></au><au><snm>Carlson</snm><fnm>S</fnm></au><au><snm>Pederson</snm><fnm>KJ</fnm></au><au><snm>Pierson</snm><fnm>DE</fnm></au></aug><source>Mol Microbiol</source><pubdate>2000</pubdate><volume>36</volume><fpage>1436</fpage><lpage>1446</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1046/j.1365-2958.2000.01964.x</pubid><pubid idtype="pmpid" link="fulltext">10931293</pubid></pubidlist></xrefbib></bibl><bibl id="B60"><title><p>Analysis of the boundaries of Salmonella pathogenicity island 2 and the corresponding chromosomal region of Escherichia coli K-12.</p></title><aug><au><snm>Hensel</snm><fnm>M</fnm></au><au><snm>Shea</snm><fnm>JE</fnm></au><au><snm>Baumler</snm><fnm>AJ</fnm></au><au><snm>Gleeson</snm><fnm>C</fnm></au><au><snm>Blattner</snm><fnm>F</fnm></au><au><snm>Holden</snm><fnm>DW</fnm></au></aug><source>J Bacteriol</source><pubdate>1997</pubdate><volume>179</volume><fpage>1105</fpage><lpage>1111</lpage><xrefbib><pubidlist><pubid idtype="pmcid">178805</pubid><pubid idtype="pmpid" link="fulltext">9023191</pubid></pubidlist></xrefbib></bibl><bibl id="B61"><title><p>Identification of a virulence locus encoding a second type III secretion system in Salmonella typhimurium.</p></title><aug><au><snm>Shea</snm><fnm>JE</fnm></au><au><snm>Hensel</snm><fnm>M</fnm></au><au><snm>Gleeson</snm><fnm>C</fnm></au><au><snm>Holden</snm><fnm>DW</fnm></au></aug><source>Proc Natl Acad Sci USA</source><pubdate>1996</pubdate><volume>93</volume><fpage>2593</fpage><lpage>2597</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1073/pnas.93.6.2593</pubid><pubid idtype="pmcid">39842</pubid><pubid idtype="pmpid">8637919</pubid></pubidlist></xrefbib></bibl><bibl id="B62"><title><p>Comparative genome analyses of the pathogenic Yersiniae based on the genome sequence of Yersinia enterocolitica strain 8081.</p></title><aug><au><snm>Thomson</snm><fnm>NR</fnm></au><au><snm>Howard</snm><fnm>S</fnm></au><au><snm>Wren</snm><fnm>BW</fnm></au><au><snm>Prentice</snm><fnm>MB</fnm></au></aug><source>Adv Exp Med Biol</source><pubdate>2007</pubdate><volume>603</volume><fpage>2</fpage><lpage>16</lpage><xrefbib><pubidlist><pubid idtype="doi">full_text</pubid><pubid idtype="pmpid" link="fulltext">17966400</pubid></pubidlist></xrefbib></bibl><bibl id="B63"><title><p>Cobalamin synthesis in Yersinia enterocolitica 8081. Functional aspects of a putative metabolic island.</p></title><aug><au><snm>Prentice</snm><fnm>MB</fnm></au><au><snm>Cuccui</snm><fnm>J</fnm></au><au><snm>Thomson</snm><fnm>N</fnm></au><au><snm>Parkhill</snm><fnm>J</fnm></au><au><snm>Deery</snm><fnm>E</fnm></au><au><snm>Warren</snm><fnm>MJ</fnm></au></aug><source>Adv Exp Med Biol</source><pubdate>2003</pubdate><volume>529</volume><fpage>43</fpage><lpage>46</lpage><xrefbib><pubidlist><pubid idtype="doi">full_text</pubid><pubid idtype="pmpid" link="fulltext">12756726</pubid></pubidlist></xrefbib></bibl><bibl id="B64"><title><p>Cobalamin (coenzyme B12): synthesis and biological significance.</p></title><aug><au><snm>Roth</snm><fnm>JR</fnm></au><au><snm>Lawrence</snm><fnm>JG</fnm></au><au><snm>Bobik</snm><fnm>TA</fnm></au></aug><source>Annu Rev Microbiol</source><pubdate>1996</pubdate><volume>50</volume><fpage>137</fpage><lpage>181</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1146/annurev.micro.50.1.137</pubid><pubid idtype="pmpid" link="fulltext">8905078</pubid></pubidlist></xrefbib></bibl><bibl id="B65"><title><p>The 17-gene ethanolamine (eut) operon of Salmonella typhimurium encodes five homologues of carboxysome shell proteins.</p></title><aug><au><snm>Kofoid</snm><fnm>E</fnm></au><au><snm>Rappleye</snm><fnm>C</fnm></au><au><snm>Stojiljkovic</snm><fnm>I</fnm></au><au><snm>Roth</snm><fnm>J</fnm></au></aug><source>J Bacteriol</source><pubdate>1999</pubdate><volume>181</volume><fpage>5317</fpage><lpage>5329</lpage><xrefbib><pubidlist><pubid idtype="pmcid">94038</pubid><pubid idtype="pmpid" link="fulltext">10464203</pubid></pubidlist></xrefbib></bibl><bibl id="B66"><title><p>Use of molecular hydrogen as an energy substrate by human pathogenic bacteria.</p></title><aug><au><snm>Maier</snm><fnm>RJ</fnm></au></aug><source>Biochem Soc Trans</source><pubdate>2005</pubdate><volume>33</volume><fpage>83</fpage><lpage>85</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1042/BST0330083</pubid><pubid idtype="pmpid" link="fulltext">15667272</pubid></pubidlist></xrefbib></bibl><bibl id="B67"><title><p>Yersinia ruckeri sp. nov., the Redmouth (RM) Bacterium.</p></title><aug><au><snm>Ewing</snm><fnm>WH</fnm></au><au><snm>Ross</snm><fnm>AJ</fnm></au><au><snm>Brenner</snm><fnm>DJ</fnm></au><au><snm>R</snm><fnm>FG</fnm></au></aug><source>Int J Syst Bacteriol</source><pubdate>1978</pubdate><volume>28</volume><fpage>37</fpage><lpage>44</lpage></bibl><bibl id="B68"><title><p>Bacterial variations on the methionine salvage pathway.</p></title><aug><au><snm>Sekowska</snm><fnm>A</fnm></au><au><snm>D&#233;nervaud</snm><fnm>V</fnm></au><au><snm>Ashida</snm><fnm>H</fnm></au><au><snm>Michoud</snm><fnm>K</fnm></au><au><snm>Haas</snm><fnm>D</fnm></au><au><snm>Yokota</snm><fnm>A</fnm></au><au><snm>Danchin</snm><fnm>A</fnm></au></aug><source>BMC Microbiol</source><pubdate>2004</pubdate><volume>4</volume><fpage>9</fpage><xrefbib><pubidlist><pubid idtype="doi">10.1186/1471-2180-4-9</pubid><pubid idtype="pmcid">395828</pubid><pubid idtype="pmpid" link="fulltext">15102328</pubid></pubidlist></xrefbib></bibl><bibl id="B69"><title><p>Comparative Genomic Analyses of Seventeen Streptococcus pneumoniae Strains: Insights into the Pneumococcal Supragenome.</p></title><aug><au><snm>Hiller</snm><fnm>NL</fnm></au><au><snm>Janto</snm><fnm>B</fnm></au><au><snm>Hogg</snm><fnm>JS</fnm></au><au><snm>Boissy</snm><fnm>R</fnm></au><au><snm>Yu</snm><fnm>S</fnm></au><au><snm>Powell</snm><fnm>E</fnm></au><au><snm>Keefe</snm><fnm>R</fnm></au><au><snm>Ehrlich</snm><fnm>NE</fnm></au><au><snm>Shen</snm><fnm>K</fnm></au><au><snm>Hayes</snm><fnm>J</fnm></au><au><snm>Barbadora</snm><fnm>K</fnm></au><au><snm>Klimke</snm><fnm>W</fnm></au><au><snm>Dernovoy</snm><fnm>D</fnm></au><au><snm>Tatusova</snm><fnm>T</fnm></au><au><snm>Parkhill</snm><fnm>J</fnm></au><au><snm>Bentley</snm><fnm>SD</fnm></au><au><snm>Post</snm><fnm>JC</fnm></au><au><snm>Ehrlich</snm><fnm>GD</fnm></au><au><snm>Hu</snm><fnm>FZ</fnm></au></aug><source>J Bacteriol</source><pubdate>2007</pubdate><volume>189</volume><fpage>8186</fpage><lpage>95</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1128/JB.00690-07</pubid><pubid idtype="pmcid">2168654</pubid><pubid idtype="pmpid" link="fulltext">17675389</pubid></pubidlist></xrefbib></bibl><bibl id="B70"><title><p>Characterization and modeling of the Haemophilus influenzae core and supragenomes based on the complete genomic sequences of Rd and 12 clinical nontypeable strains.</p></title><aug><au><snm>Hogg</snm><fnm>JS</fnm></au><au><snm>Hu</snm><fnm>FZ</fnm></au><au><snm>Janto</snm><fnm>B</fnm></au><au><snm>Boissy</snm><fnm>R</fnm></au><au><snm>Hayes</snm><fnm>J</fnm></au><au><snm>Keefe</snm><fnm>R</fnm></au><au><snm>Post</snm><fnm>JC</fnm></au><au><snm>Ehrlich</snm><fnm>GD</fnm></au></aug><source>Genome Biol</source><pubdate>2007</pubdate><volume>8</volume><fpage>R103</fpage><xrefbib><pubidlist><pubid idtype="doi">10.1186/gb-2007-8-6-r103</pubid><pubid idtype="pmcid">2394751</pubid><pubid idtype="pmpid" link="fulltext">17550610</pubid></pubidlist></xrefbib></bibl><bibl id="B71"><title><p>Dynamics of Pseudomonas aeruginosa genome evolution.</p></title><aug><au><snm>Mathee</snm><fnm>K</fnm></au><au><snm>Narasimhan</snm><fnm>G</fnm></au><au><snm>Valdes</snm><fnm>C</fnm></au><au><snm>Qiu</snm><fnm>X</fnm></au><au><snm>Matewish</snm><fnm>JM</fnm></au><au><snm>Koehrsen</snm><fnm>M</fnm></au><au><snm>Rokas</snm><fnm>A</fnm></au><au><snm>Yandava</snm><fnm>CN</fnm></au><au><snm>Engels</snm><fnm>R</fnm></au><au><snm>Zeng</snm><fnm>E</fnm></au><au><snm>Olavarietta</snm><fnm>R</fnm></au><au><snm>Doud</snm><fnm>M</fnm></au><au><snm>Smith</snm><fnm>RS</fnm></au><au><snm>Montgomery</snm><fnm>P</fnm></au><au><snm>White</snm><fnm>JR</fnm></au><au><snm>Godfrey</snm><fnm>PA</fnm></au><au><snm>Kodira</snm><fnm>C</fnm></au><au><snm>Birren</snm><fnm>B</fnm></au><au><snm>Galagan</snm><fnm>JE</fnm></au><au><snm>Lory</snm><fnm>S</fnm></au></aug><source>Proc Natl Acad Sci USA</source><pubdate>2008</pubdate><volume>105</volume><fpage>3100</fpage><lpage>3105</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1073/pnas.0711982105</pubid><pubid idtype="pmcid">2268591</pubid><pubid idtype="pmpid" link="fulltext">18287045</pubid></pubidlist></xrefbib></bibl><bibl id="B72"><title><p>High-throughput sequencing provides insights into genome variation and evolution in Salmonella Typhi.</p></title><aug><au><snm>Holt</snm><fnm>K</fnm></au><au><snm>Parkhill</snm><fnm>J</fnm></au><au><snm>Mazzoni</snm><fnm>C</fnm></au><au><snm>Roumagnac</snm><fnm>P</fnm></au><au><snm>Weill</snm><fnm>F</fnm></au><au><snm>Goodhead</snm><fnm>I</fnm></au><au><snm>Rance</snm><fnm>R</fnm></au><au><snm>Baker</snm><fnm>S</fnm></au><au><snm>Maskell</snm><fnm>D</fnm></au><au><snm>Wain</snm><fnm>J</fnm></au><au><snm>Dolecek</snm><fnm>C</fnm></au><au><snm>Achtman</snm><fnm>M</fnm></au><au><snm>Dougan</snm><fnm>G</fnm></au></aug><source>Nat Genet</source><pubdate>2008</pubdate><volume>40</volume><fpage>987</fpage><lpage>93</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1038/ng.195</pubid><pubid idtype="pmcid">2652037</pubid><pubid idtype="pmpid" link="fulltext">18660809</pubid></pubidlist></xrefbib></bibl><bibl id="B73"><title><p>Population Genomic Analysis of Strain Variation in Leptospirillum Group II Bacteria Involved in Acid Mine Drainage Formation.</p></title><aug><au><snm>Simmons</snm><fnm>S</fnm></au><au><snm>Dibartolo</snm><fnm>G</fnm></au><au><snm>Denef</snm><fnm>V</fnm></au><au><snm>Goltsman</snm><fnm>D</fnm></au><au><snm>Thelen</snm><fnm>M</fnm></au><au><snm>Banfield</snm><fnm>J</fnm></au><au><snm>Eisen</snm><fnm>J</fnm></au></aug><source>Plos Biol</source><pubdate>2008</pubdate><volume>6</volume><fpage>e177</fpage><xrefbib><pubidlist><pubid idtype="doi">10.1371/journal.pbio.0060177</pubid><pubid idtype="pmcid">2475542</pubid><pubid idtype="pmpid" link="fulltext">18651792</pubid></pubidlist></xrefbib></bibl><bibl id="B74"><title><p>The pan-genome structure of Escherichia coli: comparative genomic analysis of E. coli commensal and pathogenic isolates.</p></title><aug><au><snm>Rasko</snm><fnm>D</fnm></au><au><snm>Rosovitz</snm><fnm>M</fnm></au><au><snm>Myers</snm><fnm>G</fnm></au><au><snm>Mongodin</snm><fnm>E</fnm></au><au><snm>Fricke</snm><fnm>W</fnm></au><au><snm>Gajer</snm><fnm>P</fnm></au><au><snm>Crabtree</snm><fnm>J</fnm></au><au><snm>Sperandio</snm><fnm>V</fnm></au><au><snm>Ravel</snm><fnm>J</fnm></au></aug><source>Journal of Bacteriology</source><pubdate>2008</pubdate><volume>190</volume><fpage>6881</fpage><lpage>93</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1128/JB.00619-08</pubid><pubid idtype="pmcid">2566221</pubid><pubid idtype="pmpid" link="fulltext">18676672</pubid></pubidlist></xrefbib></bibl><bibl id="B75"><title><p>The genome sequence of Bacillus anthracis Ames and comparison to closely related bacteria.</p></title><aug><au><snm>Read</snm><fnm>TD</fnm></au><au><snm>Peterson</snm><fnm>SN</fnm></au><au><snm>Tourasse</snm><fnm>N</fnm></au><au><snm>Baillie</snm><fnm>LW</fnm></au><au><snm>Paulsen</snm><fnm>IT</fnm></au><au><snm>Nelson</snm><fnm>KE</fnm></au><au><snm>Tettelin</snm><fnm>H</fnm></au><au><snm>Fouts</snm><fnm>DE</fnm></au><au><snm>Eisen</snm><fnm>JA</fnm></au><au><snm>Gill</snm><fnm>SR</fnm></au><au><snm>Holtzapple</snm><fnm>EK</fnm></au><au><snm>Okstad</snm><fnm>OA</fnm></au><au><snm>Helgason</snm><fnm>E</fnm></au><au><snm>Rilstone</snm><fnm>J</fnm></au><au><snm>Wu</snm><fnm>M</fnm></au><au><snm>Kolonay</snm><fnm>JF</fnm></au><au><snm>Beanan</snm><fnm>MJ</fnm></au><au><snm>Dodson</snm><fnm>RJ</fnm></au><au><snm>Brinkac</snm><fnm>LM</fnm></au><au><snm>Gwinn</snm><fnm>M</fnm></au><au><snm>DeBoy</snm><fnm>RT</fnm></au><au><snm>Madpu</snm><fnm>R</fnm></au><au><snm>Daugherty</snm><fnm>SC</fnm></au><au><snm>Durkin</snm><fnm>AS</fnm></au><au><snm>Haft</snm><fnm>DH</fnm></au><au><snm>Nelson</snm><fnm>WC</fnm></au><au><snm>Peterson</snm><fnm>JD</fnm></au><au><snm>Pop</snm><fnm>M</fnm></au><au><snm>Khouri</snm><fnm>HM</fnm></au><au><snm>Radune</snm><fnm>D</fnm></au><etal/></aug><source>Nature</source><pubdate>2003</pubdate><volume>423</volume><fpage>81</fpage><lpage>86</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1038/nature01586</pubid><pubid idtype="pmpid" link="fulltext">12721629</pubid></pubidlist></xrefbib></bibl><bibl id="B76"><title><p>Complete genome sequence and comparative genomic analysis of an emerging human pathogen, serotype V Streptococcus agalactiae.</p></title><aug><au><snm>Tettelin</snm><fnm>H</fnm></au><au><snm>Masignani</snm><fnm>V</fnm></au><au><snm>Cieslewicz</snm><fnm>MJ</fnm></au><au><snm>Eisen</snm><fnm>JA</fnm></au><au><snm>Peterson</snm><fnm>S</fnm></au><au><snm>Wessels</snm><fnm>MR</fnm></au><au><snm>Paulsen</snm><fnm>IT</fnm></au><au><snm>Nelson</snm><fnm>KE</fnm></au><au><snm>Margarit</snm><fnm>I</fnm></au><au><snm>Read</snm><fnm>TD</fnm></au><au><snm>Madoff</snm><fnm>LC</fnm></au><au><snm>Wolf</snm><fnm>AM</fnm></au><au><snm>Beanan</snm><fnm>MJ</fnm></au><au><snm>Brinkac</snm><fnm>LM</fnm></au><au><snm>Daugherty</snm><fnm>SC</fnm></au><au><snm>DeBoy</snm><fnm>RT</fnm></au><au><snm>Durkin</snm><fnm>AS</fnm></au><au><snm>Kolonay</snm><fnm>JF</fnm></au><au><snm>Madupu</snm><fnm>R</fnm></au><au><snm>Lewis</snm><fnm>MR</fnm></au><au><snm>Radune</snm><fnm>D</fnm></au><au><snm>Fedorova</snm><fnm>NB</fnm></au><au><snm>Scanlan</snm><fnm>D</fnm></au><au><snm>Khouri</snm><fnm>H</fnm></au><au><snm>Mulligan</snm><fnm>S</fnm></au><au><snm>Carty</snm><fnm>HA</fnm></au><au><snm>Cline</snm><fnm>RT</fnm></au><au><snm>Van Aken</snm><fnm>SE</fnm></au><au><snm>Gill</snm><fnm>J</fnm></au><au><snm>Scarselli</snm><fnm>M</fnm></au><etal/></aug><source>Proc Natl Acad Sci USA</source><pubdate>2002</pubdate><volume>99</volume><fpage>12391</fpage><lpage>12396</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1073/pnas.182380799</pubid><pubid idtype="pmcid">129455</pubid><pubid idtype="pmpid" link="fulltext">12200547</pubid></pubidlist></xrefbib></bibl><bibl id="B77"><title><p>Yersinia aleksiciae sp. nov.</p></title><aug><au><snm>Sprague</snm><fnm>LD</fnm></au><au><snm>Neubauer</snm><fnm>H</fnm></au></aug><source>Int J Syst Evol Microbiol</source><pubdate>2005</pubdate><volume>55</volume><fpage>831</fpage><lpage>835</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1099/ijs.0.63220-0</pubid><pubid idtype="pmpid" link="fulltext">15774670</pubid></pubidlist></xrefbib></bibl><bibl id="B78"><title><p>Yersinia similis sp. nov.</p></title><aug><au><snm>Sprague</snm><fnm>LD</fnm></au><au><snm>Scholz</snm><fnm>HC</fnm></au><au><snm>Amann</snm><fnm>S</fnm></au><au><snm>Busse</snm><fnm>HJ</fnm></au><au><snm>Neubauer</snm><fnm>H</fnm></au></aug><source>Int J Syst Evol Microbiol</source><pubdate>2008</pubdate><volume>58</volume><fpage>952</fpage><lpage>958</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1099/ijs.0.65417-0</pubid><pubid idtype="pmpid" link="fulltext">18398201</pubid></pubidlist></xrefbib></bibl><bibl id="B79"><title><p>Yersinia massiliensis sp. nov., isolated from fresh water.</p></title><aug><au><snm>Merhej</snm><fnm>V</fnm></au><au><snm>Adekambi</snm><fnm>T</fnm></au><au><snm>Pagnier</snm><fnm>I</fnm></au><au><snm>Raoult</snm><fnm>D</fnm></au><au><snm>Drancourt</snm><fnm>M</fnm></au></aug><source>Int J Syst Evol Microbiol</source><pubdate>2008</pubdate><volume>58</volume><fpage>779</fpage><lpage>784</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1099/ijs.0.65219-0</pubid><pubid idtype="pmpid" link="fulltext">18398169</pubid></pubidlist></xrefbib></bibl><bibl id="B80"><title><p>A bile salt hydrolase of Brucella abortus contributes to the establishment of a successful infection through the oral route in mice.</p></title><aug><au><snm>Delpino</snm><fnm>MV</fnm></au><au><snm>Marchesini</snm><fnm>MI</fnm></au><au><snm>Estein</snm><fnm>SM</fnm></au><au><snm>Comerci</snm><fnm>DJ</fnm></au><au><snm>Cassataro</snm><fnm>J</fnm></au><au><snm>Fossati</snm><fnm>CA</fnm></au><au><snm>Baldi</snm><fnm>PC</fnm></au></aug><source>Infect Immun</source><pubdate>2007</pubdate><volume>75</volume><fpage>299</fpage><lpage>305</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1128/IAI.00952-06</pubid><pubid idtype="pmcid">1828384</pubid><pubid idtype="pmpid" link="fulltext">17088355</pubid></pubidlist></xrefbib></bibl><bibl id="B81"><title><p>The TibA adhesin/invasin from enterotoxigenic Escherichia coli is self recognizing and induces bacterial aggregation and biofilm formation.</p></title><aug><au><snm>Sherlock</snm><fnm>O</fnm></au><au><snm>Vejborg</snm><fnm>RM</fnm></au><au><snm>Klemm</snm><fnm>P</fnm></au></aug><source>Infect Immun</source><pubdate>2005</pubdate><volume>73</volume><fpage>1954</fpage><lpage>1963</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1128/IAI.73.4.1954-1963.2005</pubid><pubid idtype="pmcid">1087433</pubid><pubid idtype="pmpid" link="fulltext">15784535</pubid></pubidlist></xrefbib></bibl><bibl id="B82"><title><p>ARDB--Antibiotic Resistance Genes Database.</p></title><aug><au><snm>Liu</snm><fnm>B</fnm></au><au><snm>Pop</snm><fnm>M</fnm></au></aug><source>Nucleic Acids Res</source><pubdate>2009</pubdate><volume>37</volume><fpage>D443</fpage><lpage>447</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1093/nar/gkn656</pubid><pubid idtype="pmcid">2686595</pubid><pubid idtype="pmpid" link="fulltext">18832362</pubid></pubidlist></xrefbib></bibl><bibl id="B83"><title><p>Antibiotic Resistance Genes Database</p></title><url>http://ardb.cbcb.umd.edu/</url></bibl><bibl id="B84"><title><p>ACLAME: a CLAssification of Mobile genetic Elements.</p></title><aug><au><snm>Leplae</snm><fnm>R</fnm></au><au><snm>Hebrant</snm><fnm>A</fnm></au><au><snm>Wodak</snm><fnm>SJ</fnm></au><au><snm>Toussaint</snm><fnm>A</fnm></au></aug><source>Nucleic Acids Res</source><pubdate>2004</pubdate><volume>32</volume><fpage>D45</fpage><lpage>49</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1093/nar/gkh084</pubid><pubid idtype="pmcid">308818</pubid><pubid idtype="pmpid" link="fulltext">14681355</pubid></pubidlist></xrefbib></bibl><bibl id="B85"><title><p>Frameshift detection in prokaryotic genomic sequences.</p></title><aug><au><snm>Kislyuk</snm><fnm>A</fnm></au><au><snm>Lomsadze</snm><fnm>A</fnm></au><au><snm>Lapidus</snm><fnm>AL</fnm></au><au><snm>Borodovsky</snm><fnm>M</fnm></au></aug><source>Int J Bioinform Res Appl</source><pubdate>2009</pubdate><volume>5</volume><fpage>458</fpage><lpage>477</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1504/IJBRA.2009.027519</pubid><pubid idtype="pmpid" link="fulltext">19640832</pubid></pubidlist></xrefbib></bibl><bibl id="B86"><title><p>Comparative genome assembly.</p></title><aug><au><snm>Pop</snm><fnm>M</fnm></au><au><snm>Phillippy</snm><fnm>A</fnm></au><au><snm>Delcher</snm><fnm>AL</fnm></au><au><snm>Salzberg</snm><fnm>SL</fnm></au></aug><source>Brief Bioinform</source><pubdate>2004</pubdate><volume>5</volume><fpage>237</fpage><lpage>248</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1093/bib/5.3.237</pubid><pubid idtype="pmpid" link="fulltext">15383210</pubid></pubidlist></xrefbib></bibl><bibl id="B87"><title><p>Cd-hit: a fast program for clustering and comparing large sets of protein or nucleotide sequences.</p></title><aug><au><snm>Li</snm><fnm>W</fnm></au><au><snm>Godzik</snm><fnm>A</fnm></au></aug><source>Bioinformatics</source><pubdate>2006</pubdate><volume>22</volume><fpage>1658</fpage><lpage>1659</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1093/bioinformatics/btl158</pubid><pubid idtype="pmpid" link="fulltext">16731699</pubid></pubidlist></xrefbib></bibl><bibl id="B88"><title><p>Versatile and open software for comparing large genomes.</p></title><aug><au><snm>Kurtz</snm><fnm>S</fnm></au><au><snm>Phillippy</snm><fnm>A</fnm></au><au><snm>Delcher</snm><fnm>AL</fnm></au><au><snm>Smoot</snm><fnm>M</fnm></au><au><snm>Shumway</snm><fnm>M</fnm></au><au><snm>Antonescu</snm><fnm>C</fnm></au><au><snm>Salzberg</snm><fnm>SL</fnm></au></aug><source>Genome Biol</source><pubdate>2004</pubdate><volume>5</volume><fpage>R12</fpage><xrefbib><pubidlist><pubid idtype="doi">10.1186/gb-2004-5-2-r12</pubid><pubid idtype="pmcid">395750</pubid><pubid idtype="pmpid" link="fulltext">14759262</pubid></pubidlist></xrefbib></bibl><bibl id="B89"><title><p>DIYA: A bacterial annotation pipeline for any genomics lab.</p></title><aug><au><snm>Stewart</snm><fnm>AC</fnm></au><au><snm>Osborne</snm><fnm>B</fnm></au><au><snm>Read</snm><fnm>TD</fnm></au></aug><source>Bioinformatics</source><pubdate>2009</pubdate><volume>25</volume><fpage>962</fpage><lpage>3</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1093/bioinformatics/btp097</pubid><pubid idtype="pmcid">2660880</pubid><pubid idtype="pmpid" link="fulltext">19254921</pubid></pubidlist></xrefbib></bibl><bibl id="B90"><title><p>Microbial gene identification using interpolated Markov models.</p></title><aug><au><snm>Salzberg</snm><fnm>SL</fnm></au><au><snm>Delcher</snm><fnm>AL</fnm></au><au><snm>Kasif</snm><fnm>S</fnm></au><au><snm>White</snm><fnm>O</fnm></au></aug><source>Nucleic Acids Res</source><pubdate>1998</pubdate><volume>26</volume><fpage>544</fpage><lpage>548</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1093/nar/26.2.544</pubid><pubid idtype="pmcid">147303</pubid><pubid idtype="pmpid" link="fulltext">9421513</pubid></pubidlist></xrefbib></bibl><bibl id="B91"><title><p>tRNAscan-SE: a program for improved detection of transfer RNA genes in genomic sequence.</p></title><aug><au><snm>Lowe</snm><fnm>TM</fnm></au><au><snm>Eddy</snm><fnm>SR</fnm></au></aug><source>Nucleic Acids Res</source><pubdate>1997</pubdate><volume>25</volume><fpage>955</fpage><lpage>964</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1093/nar/25.5.955</pubid><pubid idtype="pmcid">146525</pubid><pubid idtype="pmpid" link="fulltext">9023104</pubid></pubidlist></xrefbib></bibl><bibl id="B92"><title><p>RNAmmer: consistent and rapid annotation of ribosomal RNA genes.</p></title><aug><au><snm>Lagesen</snm><fnm>K</fnm></au><au><snm>Hallin</snm><fnm>P</fnm></au><au><snm>R&#248;dland</snm><fnm>EA</fnm></au><au><snm>Staerfeldt</snm><fnm>HH</fnm></au><au><snm>Rognes</snm><fnm>T</fnm></au><au><snm>Ussery</snm><fnm>DW</fnm></au></aug><source>Nucleic Acids Res</source><pubdate>2007</pubdate><volume>35</volume><fpage>3100</fpage><lpage>3108</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1093/nar/gkm160</pubid><pubid idtype="pmcid">1888812</pubid><pubid idtype="pmpid" link="fulltext">17452365</pubid></pubidlist></xrefbib></bibl><bibl id="B93"><title><p>UniRef: comprehensive and non-redundant UniProt reference clusters.</p></title><aug><au><snm>Suzek</snm><fnm>BE</fnm></au><au><snm>Huang</snm><fnm>H</fnm></au><au><snm>McGarvey</snm><fnm>P</fnm></au><au><snm>Mazumder</snm><fnm>R</fnm></au><au><snm>Wu</snm><fnm>CH</fnm></au></aug><source>Bioinformatics</source><pubdate>2007</pubdate><volume>23</volume><fpage>1282</fpage><lpage>1288</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1093/bioinformatics/btm098</pubid><pubid idtype="pmpid" link="fulltext">17379688</pubid></pubidlist></xrefbib></bibl><bibl id="B94"><title><p>Basic local alignment search tool.</p></title><aug><au><snm>Altschul</snm><fnm>SF</fnm></au><au><snm>Gish</snm><fnm>W</fnm></au><au><snm>Miller</snm><fnm>W</fnm></au><au><snm>Myers</snm><fnm>EW</fnm></au><au><snm>Lipman</snm><fnm>DJ</fnm></au></aug><source>J Mol Biol</source><pubdate>1990</pubdate><volume>215</volume><fpage>403</fpage><lpage>410</lpage><xrefbib><pubid idtype="pmpid">2231712</pubid></xrefbib></bibl><bibl id="B95"><title><p>Conserved Domain Database(CDD)</p></title><url>http://www.ncbi.nlm.nih.gov/sites/entrez?db=cdd</url></bibl><bibl id="B96"><title><p>Gapped BLAST and PSI-BLAST: a new generation of protein database search programs.</p></title><aug><au><snm>Altschul</snm><fnm>SF</fnm></au><au><snm>Madden</snm><fnm>TL</fnm></au><au><snm>Schaffer</snm><fnm>AA</fnm></au><au><snm>Zhang</snm><fnm>J</fnm></au><au><snm>Zhang</snm><fnm>Z</fnm></au><au><snm>Miller</snm><fnm>W</fnm></au><au><snm>Lipman</snm><fnm>DJ</fnm></au></aug><source>Nucleic Acids Res</source><pubdate>1997</pubdate><volume>25</volume><fpage>3389</fpage><lpage>3402</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1093/nar/25.17.3389</pubid><pubid idtype="pmcid">146917</pubid><pubid idtype="pmpid" link="fulltext">9254694</pubid></pubidlist></xrefbib></bibl><bibl id="B97"><title><p>Improvement of phylogenies after removing divergent and ambiguously aligned blocks from protein sequence alignments.</p></title><aug><au><snm>Talavera</snm><fnm>G</fnm></au><au><snm>Castresana</snm><fnm>J</fnm></au></aug><source>Syst Biol</source><pubdate>2007</pubdate><volume>56</volume><fpage>564</fpage><lpage>577</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1080/10635150701472164</pubid><pubid idtype="pmpid" link="fulltext">17654362</pubid></pubidlist></xrefbib></bibl><bibl id="B98"><title><p>Genome sequence of Chlamydophila caviae (Chlamydia psittaci GPIC): examining the role of niche-specific genes in the evolution of the Chlamydiaceae.</p></title><aug><au><snm>Read</snm><fnm>TD</fnm></au><au><snm>Myers</snm><fnm>GS</fnm></au><au><snm>Brunham</snm><fnm>RC</fnm></au><au><snm>Nelson</snm><fnm>WC</fnm></au><au><snm>Paulsen</snm><fnm>IT</fnm></au><au><snm>Heidelberg</snm><fnm>J</fnm></au><au><snm>Holtzapple</snm><fnm>E</fnm></au><au><snm>Khouri</snm><fnm>H</fnm></au><au><snm>Federova</snm><fnm>NB</fnm></au><au><snm>Carty</snm><fnm>HA</fnm></au><au><snm>Umayam</snm><fnm>LA</fnm></au><au><snm>Haft</snm><fnm>DH</fnm></au><au><snm>Peterson</snm><fnm>J</fnm></au><au><snm>Beanan</snm><fnm>MJ</fnm></au><au><snm>White</snm><fnm>O</fnm></au><au><snm>Salzberg</snm><fnm>SL</fnm></au><au><snm>Hsia</snm><fnm>RC</fnm></au><au><snm>McClarty</snm><fnm>G</fnm></au><au><snm>Rank</snm><fnm>RG</fnm></au><au><snm>Bavoil</snm><fnm>PM</fnm></au><au><snm>Fraser</snm><fnm>CM</fnm></au></aug><source>Nucleic Acids Res</source><pubdate>2003</pubdate><volume>31</volume><fpage>2134</fpage><lpage>2147</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1093/nar/gkg321</pubid><pubid idtype="pmcid">153749</pubid><pubid idtype="pmpid" link="fulltext">12682364</pubid></pubidlist></xrefbib></bibl><bibl id="B99"><title><p>Visualization of comparative genomic analyses by BLAST score ratio.</p></title><aug><au><snm>Rasko</snm><fnm>DA</fnm></au><au><snm>Myers</snm><fnm>GS</fnm></au><au><snm>Ravel</snm><fnm>J</fnm></au></aug><source>BMC Bioinformatics</source><pubdate>2005</pubdate><volume>6</volume><fpage>2</fpage><xrefbib><pubidlist><pubid idtype="doi">10.1186/1471-2105-6-2</pubid><pubid idtype="pmcid">545078</pubid><pubid idtype="pmpid" link="fulltext">15634352</pubid></pubidlist></xrefbib></bibl><bibl id="B100"><title><p>Yersinia aldovae (Formerly Yersinia enterocolitica-Like Group X2): a New Species of Enterobacteriaceae Isolated from Aquatic Ecosystems.</p></title><aug><au><snm>Bercovier</snm><fnm>H</fnm></au><au><snm>Steigerwalt</snm><fnm>AG</fnm></au><au><snm>Guiyoule</snm><fnm>A</fnm></au><au><snm>Huntley-Carter</snm><fnm>G</fnm></au><au><snm>Brenner</snm><fnm>DJ</fnm></au></aug><source>Int J Syst Bacteriol</source><pubdate>1984</pubdate><volume>34</volume><fpage>166</fpage><lpage>172</lpage></bibl><bibl id="B101"><title><p>Yersinia mollaretii sp. nov. and Yersinia bercovieri sp. nov., Formerly Called Yersinia enterocolitica Biogroups 3A and 3B.</p></title><aug><au><snm>Wauters</snm><fnm>G</fnm></au><au><snm>Janssens</snm><fnm>M</fnm></au><au><snm>Steigerwalt</snm><fnm>AG</fnm></au><au><snm>Brenner</snm><fnm>DJ</fnm></au></aug><source>Int J Syst Bacteriol</source><pubdate>1988</pubdate><volume>38</volume><fpage>424</fpage></bibl><bibl id="B102"><title><p>Yersinia frederiksenii: A new species of enterobacteriaceae composed of rhamnose-positive strains (formerly called atypical yersinia enterocolitica or Yersinia enterocolitica -Like).</p></title><aug><au><snm>Ursing</snm><fnm>J</fnm></au><au><snm>Brennert</snm><fnm>DJ</fnm></au><au><snm>Bercovier</snm><fnm>H</fnm></au><au><snm>Fanning</snm><fnm>GR</fnm></au><au><snm>Steigerwalt</snm><fnm>AG</fnm></au><au><snm>Brault</snm><fnm>J</fnm></au><au><snm>Mollaret</snm><fnm>HH</fnm></au></aug><source>Current Microbiology</source><pubdate>1980</pubdate><volume>4</volume><fpage>213</fpage><lpage>217</lpage><xrefbib><pubid idtype="doi">10.1007/BF02605859</pubid></xrefbib></bibl><bibl id="B103"><title><p>Yersinia intermedia: A new species of enterobacteriaceae composed of rhamnose-positive, melibiose-positive, raffinose-positive strains (formerly called Yersinia enterocolitica or Yersinia enterocolitica -like).</p></title><aug><au><snm>Brenner</snm><fnm>DJ</fnm></au><au><snm>Bercovier</snm><fnm>HH</fnm></au><au><snm>Ursing</snm><fnm>J</fnm></au><au><snm>Alonso</snm><fnm>JM</fnm></au><au><snm>Steigerwalt</snm><fnm>AG</fnm></au><au><snm>Fanning</snm><fnm>GR</fnm></au><au><snm>Carter</snm><fnm>GP</fnm></au><au><snm>Mollaret</snm><fnm>HH</fnm></au></aug><source>Current Microbiology</source><pubdate>1980</pubdate><volume>4</volume><fpage>207</fpage><lpage>212</lpage><xrefbib><pubid idtype="doi">10.1007/BF02605858</pubid></xrefbib></bibl><bibl id="B104"><title><p>Yersinia kristensenii: A new species of enterobacteriaceae composed of sucrose-negative strains (formerly called atypical Yersinia enterocolitica or Yersinia enterocolitica -Like).</p></title><aug><au><snm>Bercovier</snm><fnm>H</fnm></au><au><snm>Ursing</snm><fnm>J</fnm></au><au><snm>Brenner</snm><fnm>DJ</fnm></au><au><snm>Steigerwalt</snm><fnm>AG</fnm></au><au><snm>Fanning</snm><fnm>GR</fnm></au><au><snm>Carter</snm><fnm>GP</fnm></au><au><snm>Mollaret</snm><fnm>HH</fnm></au></aug><source>Current Microbiology</source><pubdate>1980</pubdate><volume>4</volume><fpage>219</fpage><lpage>224</lpage><xrefbib><pubid idtype="doi">10.1007/BF02605860</pubid></xrefbib></bibl><bibl id="B105"><title><p>Yersinia rohdei sp. nov. isolated from human and dog feces and surface water.</p></title><aug><au><snm>Aleksic</snm><fnm>S</fnm></au><au><snm>Steigerwalt</snm><fnm>AG</fnm></au><au><snm>Bockemuehl</snm><fnm>J</fnm></au></aug><source>Int J Syst Bacteriol</source><pubdate>1987</pubdate></bibl><bibl id="B106"><title><p>DNAPlotter: circular and linear interactive genome visualization.</p></title><aug><au><snm>Carver</snm><fnm>T</fnm></au><au><snm>Thomson</snm><fnm>N</fnm></au><au><snm>Bleasby</snm><fnm>A</fnm></au><au><snm>Berriman</snm><fnm>M</fnm></au><au><snm>Parkhill</snm><fnm>J</fnm></au></aug><source>Bioinformatics</source><pubdate>2009</pubdate><volume>25</volume><fpage>119</fpage><lpage>120</lpage><xrefbib><pubidlist><pubid idtype="doi">10.1093/bioinformatics/btn578</pubid><pubid idtype="pmcid">2612626</pubid><pubid idtype="pmpid" link="fulltext">18990721</pubid></pubidlist></xrefbib></bibl></refgrp>
   </bm>
</art>
