<?xml version="1.0" encoding="UTF-8"?>
<record
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"
    xmlns="http://www.loc.gov/MARC21/slim">

  <leader>06432cam a2200601Ii 4500</leader>
  <controlfield tag="001">ocn944961030</controlfield>
  <controlfield tag="003">OCoLC</controlfield>
  <controlfield tag="005">20190328114814.0</controlfield>
  <controlfield tag="006">m     o  d        </controlfield>
  <controlfield tag="007">cr cnu|||unuuu</controlfield>
  <controlfield tag="008">160317t20162016mau     ob    001 0 eng d</controlfield>
  <datafield tag="040" ind1=" " ind2=" ">
    <subfield code="a">N$T</subfield>
    <subfield code="b">eng</subfield>
    <subfield code="e">rda</subfield>
    <subfield code="e">pn</subfield>
    <subfield code="c">N$T</subfield>
    <subfield code="d">EBLCP</subfield>
    <subfield code="d">N$T</subfield>
    <subfield code="d">OPELS</subfield>
    <subfield code="d">OCLCF</subfield>
    <subfield code="d">YDXCP</subfield>
    <subfield code="d">CDX</subfield>
    <subfield code="d">UMI</subfield>
    <subfield code="d">AZK</subfield>
    <subfield code="d">TOH</subfield>
    <subfield code="d">STF</subfield>
    <subfield code="d">DEBBG</subfield>
    <subfield code="d">COO</subfield>
    <subfield code="d">DEBSZ</subfield>
    <subfield code="d">VGM</subfield>
    <subfield code="d">IUL</subfield>
    <subfield code="d">VT2</subfield>
    <subfield code="d">U3W</subfield>
    <subfield code="d">D6H</subfield>
    <subfield code="d">UOK</subfield>
    <subfield code="d">CEF</subfield>
    <subfield code="d">KSU</subfield>
    <subfield code="d">OCLCQ</subfield>
    <subfield code="d">AU@</subfield>
    <subfield code="d">OCLCQ</subfield>
    <subfield code="d">WYU</subfield>
    <subfield code="d">TKN</subfield>
  </datafield>
  <datafield tag="019" ind1=" " ind2=" ">
    <subfield code="a">961332310</subfield>
    <subfield code="a">961514762</subfield>
  </datafield>
  <datafield tag="020" ind1=" " ind2=" ">
    <subfield code="a">9780128038543</subfield>
    <subfield code="q">(electronic bk.)</subfield>
  </datafield>
  <datafield tag="020" ind1=" " ind2=" ">
    <subfield code="a">0128038543</subfield>
    <subfield code="q">(electronic bk.)</subfield>
  </datafield>
  <datafield tag="020" ind1=" " ind2=" ">
    <subfield code="a">0128037814</subfield>
  </datafield>
  <datafield tag="020" ind1=" " ind2=" ">
    <subfield code="a">9780128037812</subfield>
  </datafield>
  <datafield tag="020" ind1=" " ind2=" ">
    <subfield code="z">9780128037812</subfield>
  </datafield>
  <datafield tag="035" ind1=" " ind2=" ">
    <subfield code="a">(OCoLC)944961030</subfield>
    <subfield code="z">(OCoLC)961332310</subfield>
    <subfield code="z">(OCoLC)961514762</subfield>
  </datafield>
  <datafield tag="050" ind1=" " ind2="4">
    <subfield code="a">QA76.76.S46</subfield>
  </datafield>
  <datafield tag="072" ind1=" " ind2="7">
    <subfield code="a">COM</subfield>
    <subfield code="x">051390</subfield>
    <subfield code="2">bisacsh</subfield>
  </datafield>
  <datafield tag="072" ind1=" " ind2="7">
    <subfield code="a">COM</subfield>
    <subfield code="x">051440</subfield>
    <subfield code="2">bisacsh</subfield>
  </datafield>
  <datafield tag="072" ind1=" " ind2="7">
    <subfield code="a">COM</subfield>
    <subfield code="x">051230</subfield>
    <subfield code="2">bisacsh</subfield>
  </datafield>
  <datafield tag="082" ind1="0" ind2="4">
    <subfield code="a">005.3</subfield>
    <subfield code="2">23</subfield>
  </datafield>
  <datafield tag="100" ind1="1" ind2=" ">
    <subfield code="a">Berman, Jules J.,</subfield>
    <subfield code="e">author.</subfield>
  </datafield>
  <datafield tag="245" ind1="1" ind2="0">
    <subfield code="a">Data simplification : taming information with open source tools / </subfield>
    <subfield code="h">[electronic resource]</subfield>
    <subfield code="c">Jules J. Berman.</subfield>
  </datafield>
  <datafield tag="264" ind1=" " ind2="1">
    <subfield code="a">Cambridge, MA :</subfield>
    <subfield code="b">Morgan Kaufmann is an imprint of Elsevier,</subfield>
    <subfield code="c">2016.</subfield>
  </datafield>
  <datafield tag="264" ind1=" " ind2="4">
    <subfield code="c">&#xFFFD;2016</subfield>
  </datafield>
  <datafield tag="300" ind1=" " ind2=" ">
    <subfield code="a">1 online resource</subfield>
  </datafield>
  <datafield tag="336" ind1=" " ind2=" ">
    <subfield code="a">text</subfield>
    <subfield code="b">txt</subfield>
    <subfield code="2">rdacontent</subfield>
  </datafield>
  <datafield tag="337" ind1=" " ind2=" ">
    <subfield code="a">computer</subfield>
    <subfield code="b">c</subfield>
    <subfield code="2">rdamedia</subfield>
  </datafield>
  <datafield tag="338" ind1=" " ind2=" ">
    <subfield code="a">online resource</subfield>
    <subfield code="b">cr</subfield>
    <subfield code="2">rdacarrier</subfield>
  </datafield>
  <datafield tag="504" ind1=" " ind2=" ">
    <subfield code="a">Includes bibliographical references and index.</subfield>
  </datafield>
  <datafield tag="588" ind1="0" ind2=" ">
    <subfield code="a">Online resource; title from PDF title page (EBSCO, viewed March 21, 2016).</subfield>
  </datafield>
  <datafield tag="520" ind1=" " ind2=" ">
    <subfield code="a">Data Simplification: Taming Information With Open Source Tools addresses the simple fact that modern data is too big and complex to analyze in its native form. Data simplification is the process whereby large and complex data is rendered usable. Complex data must be simplified before it can be analyzed, but the process of data simplification is anything but simple, requiring a specialized set of skills and tools. This book provides data scientists from every scientific discipline with the methods and tools to simplify their data for immediate analysis or long-term storage in a form that can be readily repurposed or integrated with other data. Drawing upon years of practical experience, and using numerous examples and use cases, Jules Berman discusses the principles, methods, and tools that must be studied and mastered to achieve data simplification, open source tools, free utilities and snippets of code that can be reused and repurposed to simplify data, natural language processing and machine translation as a tool to simplify data, and data summarization and visualization and the role they play in making data useful for the end user.</subfield>
  </datafield>
  <datafield tag="505" ind1="0" ind2=" ">
    <subfield code="a">Front cover; Data Simplification: Taming Information With Open Source Tools; Copyright; Dedication; Contents; Foreword; Preface; Organization of this book; Chapter Organization; How to Read this Book; Nota Bene; Glossary; References; Author Biography; Chapter 1: The Simple Life; 1.1. Simplification Drives Scientific Progress; 1.2. The Human Mind is a Simplifying Machine; 1.3. Simplification in Nature; 1.4. The Complexity Barrier; 1.5. Getting Ready; Open Source Tools; Perl; Python; Ruby; Text Editors; OpenOffice; LibreOffice; Command Line Utilities; Cygwin, Linux Emulation for Windows.</subfield>
  </datafield>
  <datafield tag="505" ind1="8" ind2=" ">
    <subfield code="a">DOS Batch ScriptsLinux Bash Scripts; Interactive Line Interpreters; Package Installers; System Calls; Glossary; References; Chapter 2: Structuring Text; 2.1. The Meaninglessness of Free Text; 2.2. Sorting Text, the Impossible Dream; 2.3. Sentence Parsing; 2.4. Abbreviations; 2.5. Annotation and the Simple Science of Metadata; 2.6. Specifications Good, Standards Bad; Open Source Tools; ASCII; Regular Expressions; Format Commands; Converting Nonprintable Files to Plain-Text; Dublin Core; Glossary; References; Chapter 3: Indexing Text; 3.1. How Data Scientists Use Indexes.</subfield>
  </datafield>
  <datafield tag="505" ind1="8" ind2=" ">
    <subfield code="a">3.2. Concordances and Indexed Lists3.3. Term Extraction and Simple Indexes; 3.4. Autoencoding and Indexing with Nomenclatures; 3.5. Computational Operations on Indexes; Open Source Tools; Word Lists; Doublet Lists; Ngram Lists; Glossary; References; Chapter 4: Understanding Your Data; 4.1. Ranges and Outliers; 4.2. Simple Statistical Descriptors; 4.3. Retrieving Image Information; 4.4. Data Profiling; 4.5. Reducing Data; Open Source Tools; Gnuplot; MatPlotLib; R, for Statistical Programming; Numpy; Scipy; ImageMagick; Displaying Equations in LaTex; Normalized Compression Distance.</subfield>
  </datafield>
  <datafield tag="505" ind1="8" ind2=" ">
    <subfield code="a">Pearson's CorrelationThe Ridiculously Simple Dot Product; Glossary; References; Chapter 5: Identifying and Deidentifying Data; 5.1. Unique Identifiers; 5.2. Poor Identifiers, Horrific Consequences; 5.3. Deidentifiers and Reidentifiers; 5.4. Data Scrubbing; 5.5. Data Encryption and Authentication; 5.6. Timestamps, Signatures, and Event Identifiers; Open Source Tools; Pseudorandom Number Generators; UUID; Encryption and Decryption with OpenSSL; One-Way Hash Implementations; Steganography; Glossary; References; Chapter 6: Giving Meaning to Data; 6.1. Meaning and Triples.</subfield>
  </datafield>
  <datafield tag="505" ind1="8" ind2=" ">
    <subfield code="a">6.2. Driving Down Complexity With Classifications6.3. Driving Up Complexity With Ontologies; 6.4. The Unreasonable Effectiveness of Classifications; 6.5. Properties That Cross Multiple Classes; Open Source Tools; Syntax for Triples; RDF Schema; RDF Parsers; Visualizing Class Relationships; Glossary; References; Chapter 7: Object-oriented Data; 7.1. The Importance of Self-Explaining Data; 7.2. Introspection and Reflection; 7.3. Object-Oriented Data Objects; 7.4. Working With Object-Oriented Data; Open Source Tools; Persistent Data; SQLite Databases; Glossary; References.</subfield>
  </datafield>
  <datafield tag="650" ind1=" " ind2="0">
    <subfield code="a">Open source software.</subfield>
  </datafield>
  <datafield tag="650" ind1=" " ind2="0">
    <subfield code="a">Data mining.</subfield>
  </datafield>
  <datafield tag="650" ind1=" " ind2="0">
    <subfield code="a">Database management.</subfield>
  </datafield>
  <datafield tag="650" ind1=" " ind2="7">
    <subfield code="a">COMPUTERS</subfield>
    <subfield code="x">Programming</subfield>
    <subfield code="x">Open Source.</subfield>
    <subfield code="2">bisacsh</subfield>
  </datafield>
  <datafield tag="650" ind1=" " ind2="7">
    <subfield code="a">COMPUTERS</subfield>
    <subfield code="x">Software Development &amp; Engineering</subfield>
    <subfield code="x">Tools.</subfield>
    <subfield code="2">bisacsh</subfield>
  </datafield>
  <datafield tag="650" ind1=" " ind2="7">
    <subfield code="a">COMPUTERS</subfield>
    <subfield code="x">Software Development &amp; Engineering</subfield>
    <subfield code="x">General.</subfield>
    <subfield code="2">bisacsh</subfield>
  </datafield>
  <datafield tag="650" ind1=" " ind2="7">
    <subfield code="a">Data mining.</subfield>
    <subfield code="2">fast</subfield>
    <subfield code="0">(OCoLC)fst00887946</subfield>
  </datafield>
  <datafield tag="650" ind1=" " ind2="7">
    <subfield code="a">Database management.</subfield>
    <subfield code="2">fast</subfield>
    <subfield code="0">(OCoLC)fst00888037</subfield>
  </datafield>
  <datafield tag="650" ind1=" " ind2="7">
    <subfield code="a">Open source software.</subfield>
    <subfield code="2">fast</subfield>
    <subfield code="0">(OCoLC)fst01046097</subfield>
  </datafield>
  <datafield tag="655" ind1=" " ind2="4">
    <subfield code="a">Electronic books.</subfield>
  </datafield>
  <datafield tag="776" ind1="0" ind2="8">
    <subfield code="i">Print version:</subfield>
    <subfield code="a">Berman, Jules J.</subfield>
    <subfield code="t">Data simplification : taming information with open source tools.</subfield>
    <subfield code="d">Cambridge, MA : Elsevier, [2016]</subfield>
    <subfield code="z">9780128037812</subfield>
    <subfield code="w">(DLC)   18934818</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2="0">
    <subfield code="3">ScienceDirect</subfield>
    <subfield code="u">http://www.sciencedirect.com/science/book/9780128037812</subfield>
  </datafield>
  <datafield tag="999" ind1=" " ind2=" ">
    <subfield code="c">247302</subfield>
    <subfield code="d">247302</subfield>
  </datafield>
</record>
