<?xml version="1.0" encoding="UTF-8"?>
<course>
  <academic-career-val type="integer">3</academic-career-val>
  <assumed-knowledge-and-required-skills></assumed-knowledge-and-required-skills>
  <available-through-customised-graduate-programs type="integer">1</available-through-customised-graduate-programs>
  <co-teaching-course-id type="integer" nil="true"></co-teaching-course-id>
  <consent-description>Please contact MATHSadmin@maths.anu.edu.au for consent to enrol in this course</consent-description>
  <consent-required type="boolean">true</consent-required>
  <corequisites></corequisites>
  <cost-considerations></cost-considerations>
  <course-code>MATH6210</course-code>
  <course-description>&lt;span&gt;The main focus of the course will be supervised learning, primarily&amp;nbsp;for classification.&amp;nbsp; The emphasis will be on practical applications of&amp;nbsp;the methodologies that are described, with the R system used for the&amp;nbsp;computations.&lt;/span&gt;&lt;span&gt; &lt;p&gt;&lt;span&gt;&lt;span&gt;Attention will be given to:&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;&lt;ol&gt;&lt;li&gt;&lt;span&gt;&lt;span&gt;Generalizability and predictive accuracy, in the practical contexts&amp;nbsp;in which methods are applied.&lt;/span&gt;&lt;/span&gt;&lt;/li&gt;&lt;li&gt;&lt;span&gt;&lt;span&gt;Low-dimensional visual representation of results, as an aid to&amp;nbsp;diagnosis and insight.&lt;/span&gt;&lt;/span&gt;&lt;/li&gt;&lt;li&gt;&lt;span&gt;&lt;span&gt;Interpretability of model parameters, including potential for&amp;nbsp;misinterpretation.&lt;/span&gt;&lt;/span&gt;&lt;/li&gt;&lt;/ol&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt;There will be very limited attention to regression methods with a&amp;nbsp;continuous outcome variable.&amp;nbsp;&amp;nbsp;Relevant statistical theory will mostly be assumed and described rather&amp;nbsp;than derived mathematically.&amp;nbsp; There will be somewhat more attention to&amp;nbsp;the mathematical derivation and description of algorithms.&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt; &lt;p&gt;Topic to be covered include:&lt;/p&gt;&lt;ul&gt;&lt;li&gt;Basic statistical ideas - populations, distributions, samples and&amp;nbsp;random samples&lt;/li&gt;&lt;li&gt;Classification models and methods - including: linear&amp;nbsp;discriminant&amp;nbsp;analysis; trees; random forests; neural nets; boosting and&amp;nbsp;bagging&amp;nbsp;approaches; support vector machines.&lt;/li&gt;&lt;li&gt;Linear regression approaches to classification, compared with&amp;nbsp;linear discriminant analysis,&lt;/li&gt;&lt;li&gt;The training/test approach to assessing accuracy, and&amp;nbsp;cross-validation.&lt;/li&gt;&lt;li&gt;Strategies in the (common) situation where source and target&amp;nbsp;population differ, typically in time but in other respects also.&lt;/li&gt;&lt;li&gt;Unsupervised models - kmeans, association rules, hierarchical&amp;nbsp;clustering, model based clusters.&lt;/li&gt;&lt;li&gt;Low-dimensional views of classification results - distance&amp;nbsp;methods&amp;nbsp;and ordination.&lt;/li&gt;&lt;li&gt;Strategies for working with large data sets.&lt;/li&gt;&lt;li&gt;Practical approaches to classification with real life data sets,&amp;nbsp;using different methods to gain different insights into presentation.&lt;/li&gt;&lt;li&gt;Privacy and security.&lt;/li&gt;&lt;li&gt;Use of the R system for handling the calculations.&lt;/li&gt;&lt;/ul&gt;&lt;p&gt;Note: Graduate students attend joint classes with undergraduates but will be assessed separately.&lt;/p&gt;&lt;/span&gt;&lt;/span&gt;</course-description>
  <course-group nil="true"></course-group>
  <eligibility>Bachelor degree; with third year Mathematics.</eligibility>
  <filled-flag type="integer">1</filled-flag>
  <first-year-course type="boolean">false</first-year-course>
  <id type="integer">12657</id>
  <incompatibility></incompatibility>
  <indicative-assessment>&lt;p&gt;Assessment will be based on:&lt;/p&gt;&lt;ul&gt;&lt;li&gt;3 Assignments (60%; LO 1-5)&lt;/li&gt;&lt;li&gt;Presentation (40%; LO1-5)&lt;/li&gt;&lt;/ul&gt;</indicative-assessment>
  <indicative-reading-list></indicative-reading-list>
  <is-active type="integer">1</is-active>
  <is-public type="integer">1</is-public>
  <learning-outcomes>&lt;p&gt;On satisfying the requirements of this course, students will have the knowledge and skills to:&lt;/p&gt;1. Explain the fundamental issues involved in the use of the training/test methodology, cross-validation and the bootstrap to provide accuracy assessments.&lt;br /&gt;2. Understand and explain ideas of source and target sample, and their relevance to the practical application of classification and other data mining techniques. &lt;br /&gt;3. Demonstrate accurate and efficient use of classification and related data mining techniques, using the R system for the computations.&lt;br /&gt;4. Demonstrate capacity for mathematical reasoning through analyzing, proving and explaining concepts from the theory that underpins classification and related data mining methods.&lt;br /&gt;5. Apply problem-solving using classification and related data mining techniques to diverse situations in business, biology, engineering and other sciences.</learning-outcomes>
  <lock-version type="integer">0</lock-version>
  <long-title>Data Mining</long-title>
  <max-units type="integer">6</max-units>
  <min-units type="integer">6</min-units>
  <other-information></other-information>
  <preliminary-reading></preliminary-reading>
  <prescribed-texts></prescribed-texts>
  <progress-units type="integer">6</progress-units>
  <quota></quota>
  <recommended-courses></recommended-courses>
  <requisite-statement>Third year Mathematics is required.&amp;nbsp; </requisite-statement>
  <restricted-program-entry type="integer" nil="true"></restricted-program-entry>
  <short-title>Data Mining</short-title>
  <student-contribution-band>Band 2 NP</student-contribution-band>
  <subject>Mathematics</subject>
  <technology-requirements></technology-requirements>
  <updated-by nil="true"></updated-by>
  <version type="integer" nil="true"></version>
  <workload></workload>
  <year type="integer">2010</year>
</course>
