@Book{ baldi:03:modeling, year = {2003}, title = {Modeling the Internet and the Web: Probabilstic Methods and Algorithms}, publisher = {Wiley}, author = {P. Baldi and P. Frasconi and P. Smyth}, url = {http://ibook.ics.uci.edu/} } @InProceedings{ aggarwal:01:intelligentcrawling, author = {Charu C. Aggarwal and Fatima Al-Garawi and Philip S. Yu}, title = {Intelligent Crawling On the World Wide Web with Arbitrary Predicates}, booktitle = {Proceedings of the Tenth International World Wide Web Conference}, year = {2001} } @InProceedings{ akaike77, author = "H. Akaike", title = "Information theory and an extension of the maximum likelihood principle", year = "1973", booktitle = "Proc. Second International Symposium on Information Theory", pages = "267--281" } @InProceedings{ allwein:00:multiclass, author = "E. L. Allwein and R. E. Schapire and Y. Singer", title = "Reducing Multiclass to Binary: {A} Unifying Approach for Margin Classifiers", booktitle = "Proc. 17th International Conf. on Machine Learning", publisher = "Morgan Kaufmann, San Francisco, CA", pages = "9--16", year = "2000" } @InProceedings{ amento:00:does, location = {Athens, Greece}, year = {2000}, publisher = {ACM Press}, title = {Does authority mean quality? predicting expert quality ratings of Web documents}, isbn = {1-58113-226-3}, pages = {296--303}, booktitle = {Proceedings of the 23rd annual international ACM SIGIR conference on Research and development in information retrieval}, doi = {http://doi.acm.org/10.1145/345508.345603}, author = {Brian Amento and Loren Terveen and Will Hill} } @InProceedings{ androutsopoulos:00:spam, author = {I. Androutsopoulos and J. Koutsias and K.V. Chandrinos and D. Spyropoulos}, title = {An experimental comparison of naive bayesian and keyword-based anti-spam filtering with personal e-mail messages}, booktitle = {Proceedings of the 23rd ACM SIGIR Annual Conference}, pages = {160--167}, year = 2000 } @Article{ apte:94:decisionrules, author = "C. Apt\'e and F. Damerau and S. M. Weiss", title = "Automated Learning of Decision Rules for Text Categorization", journal = "ACM Transactions on Information Systems", volume = 12, number = 3, pages = "233--251", month = jul, year = 1994, coden = "ATISET", issn = "1046-8188", bibdate = "Sat Jan 16 19:04:41 MST 1999", note = "Special Issue on Text Categorization.", url = "http://www.acm.org:80", abstract = "We describe the results of extensive experiments using optimized rule-based induction methods on large document collections. The goal of these methods is to discover automatically classification patterns that can be used for general document categorization or personalized filtering of free text. Previous reports indicate that human-engineered rule-based systems, requiring many man-years of developmental efforts, have been successfully built to `read' documents and assign topics to them. We show that machine-generated decision rules appear comparable to human performance, while using the identical rule-based representation. In comparison with other machine-learning techniques, results on a key benchmark from the Reuters collection show a large gain in performance, from a previously reported 67\% recall\slash precision breakeven point to 80.5\%. In the context of a very high-dimensional feature space, several methodological alternatives are examined, including universal versus local dictionaries, and binary versus frequency-related features.", acknowledgement=ack-nhfb, affiliation = "IBM T. J. Watson Research Cent", affiliationaddress="Yorktown Heights, NY, USA", classification= "461.4; 722.1; 723.4; 901.1.1; 902.2; 903.1", journalabr = "ACM Trans Inf Syst", keywords = "Classification (of information); Data acquisition; Data storage equipment; Decision support systems; Human engineering; Information retrieval systems; Knowledge based systems; Learning systems; Man machine systems; Performance; Reuters collection; Societies and institutions; Standards; Terminology; Text categorization" } @InProceedings{ arasu:02:pagerank, year = {2002}, title = {PageRank Computation and the Structure of the Web: Experiments and Algorithms}, booktitle = {Proceedings of the Eleventh International World Wide Web Conference}, author = {Arvind Arasu and Jasmine Novak and Andrew Tomkins and John Tomlin} } @InProceedings{ araujo:97:searching, author = "M. D. Ara\'ujo and G. Navarro and N. Ziviani", title = "Large text searching allowing errors", booktitle = "Proceedings of the 4th South American Workshop on String Processing", editor = "R. Baeza-Yates", volume = "", publisher = "Carleton University Press International Informatics Series", pages = "2-20", year = 1997 } @InProceedings{ armstrong:95:webwatcher, author = "Robert Armstrong and Dayne Freitag and Thorsten Joachims and Tom Mitchell", title = "{WebWatcher}: {A} Learning Apprentice for the World Wide Web", booktitle = "Proceedings of the 1995 AAAI Spring Symposium on Information Gathering from Heterogeneous, Distributed Environments", year = "1995", pages = "6--12", url = "http://www.cs.cmu.edu/afs/cs/project/theo-6/web-agent/www/webagent-plus.ps.Z" } @InProceedings{ aslam:00:bayesmetasearch, author = "Javed A. Aslam and Mark H. Montague", title = "Bayes optimal metasearch: a probabilistic model for combining the results.", pages = "379--381", editor = "Nicholas Belkin and Peter Ingwersen and Mun-Kwe Leong", booktitle = "Proceedings of the 23rd Annual International Conference on Research and Development in Information Retrieval ({SIGIR}-00)", month = jul # " ~24-28", series = "ACM SIGIR", volume = "special issue, v. 34", publisher = "ACM Press", address = "N.Y.", year = "2000" } @InProceedings{ aslam:01:modelsmetasearch, author = {Javed A. Aslam and Mark Montague}, title = {Models for metasearch}, booktitle = {Proceedings of the 24th annual international ACM SIGIR conference on Research and development in information retrieval}, year = {2001}, isbn = {1-58113-331-6}, pages = {276--284}, location = {New Orleans, Louisiana, United States}, doi = {http://doi.acm.org/10.1145/383952.384007}, publisher = {ACM Press} } @InProceedings{ attardi:99:context, author = "G. Attardi and A. Gull{\'{\i}} and F. Sebastiani", title = "Automatic {W}eb Page Categorization by Link and Context Analysis", booktitle = "Proceedings of THAI-99, 1st European Symposium on Telematics, Hypermedia and Artificial Intelligence", editor = "Chris Hutchison and Gaetano Lanzarone", year = "1999", address = "Varese, IT", pages = "105--119", url = "http://faure.iei.pi.cnr.it/~fabrizio/Publications/THAI99.pdf" , abstract = "Assistance in retrieving documents on the World Wide Web is provided either by search engines, through keyword-based queries, or by catalogues, which organize documents into hierarchical collections. Maintaining catalogues manually is becoming increasingly difficult, due to the sheer amount of material on the Web; it is thus becoming necessary to resort to techniques for the automatic classification of documents. Automatic classification is traditionally performed by extracting the information for representing a document (``indexing'') from the document itself. The paper describes the novel technique of categorization by context, which instead extracts useful information for classifying a document from the context where a URL referring to it appears. We present the results of experimenting with Theseus, a classifier that exploits this technique." } @Book{ barlow:75:reliability, year = {1975}, title = {Statistical Theory of Reliability and Life Testing}, publisher = {Holt, Rinehart and Winston}, author = {R.E. Barlow and F. Proshan} } @Book{ bellman:57:dp, year = {1957}, title = {Dynamic Programming}, address = {Princeton, NJ}, publisher = {Princeton University Press}, author = {R. E. Bellman} } @Misc{ bergman:00:deep, author = "M.K. Bergman", key = "BrightPlanet White Paper", title = "The {D}eep {W}eb: {S}urfacing {H}idden {V}alue", howpublished = "\url{http://www.completeplanet.com/Tutorials/DeepWeb/}", links = "title:www:http://www.completeplanet.com/Tutorials/DeepWeb/" , entered-by = "Sriram Raghavan", year = "2000" } @Misc{ berners-lee:94:uri, year = {1994}, url = {http://www.ietf.org/rfc/rfc1630.txt}, title = {{Universal Resource Identifiers in WWW: A Unifying Syntax for the Expression of Names and Addresses of Objects on the Network as used in the World-Wide Web}}, author = {T. Berners-Lee}, note = {RFC 1630. Available from \url{http://www.ietf.org/rfc/rfc1630.txt}} } @Misc{ berners-lee:98:uri, year = {1998}, url = {http://www.ietf.org/rfc/rfc2396.txt}, title = {{Uniform Resource Identifiers (URI): Generic Syntax}}, author = {T. Berners-Lee and R. Fielding and L. Masinter}, note = {RFC 2396. Available from \url{http://www.ietf.org/rfc/rfc2396.txt}} } @Misc{ bernes-lee:94:url, year = {1994}, url = {http://www.w3.org/pub/WWW/Addressing/rfc1738.txt}, title = {Uniform Resource Locators ({URL})}, author = {T. Bernes-Lee and L. Masinter and M. McCahill}, note = {RFC 1738. Available from \url{http://www.w3.org/Addressing/rfc1738.txt}} } @InProceedings{ bharat:98:distillation, location = {Melbourne, Australia}, year = {1998}, publisher = {ACM Press}, title = {Improved algorithms for topic distillation in a hyperlinked environment}, isbn = {1-58113-015-5}, pages = {104--111}, booktitle = {Proceedings of the 21st annual international ACM SIGIR conference on Research and development in information retrieval}, doi = {http://doi.acm.org/10.1145/290941.290972}, author = {Krishna Bharat and Monika R. Henzinger} } @InProceedings{ bharat:98:overlap, year = {1998}, url = {http://www7.scu.edu.au/programme/fullpapers/1937/com1937.htm} , title = {A technique for measuring the relative size and overlap of public web search engines}, address = {Brisbane, Australia}, pages = {379--388}, booktitle = {Proceedings of the 7th International World Wide Web Conference}, author = {K. Bharat and A. Broder} } @TechReport{ bianchini:01:pagerank, author = {M. Bianchini and M. Gori and F. Scarselli}, title = {Inside Google's Web Page Scoring System}, institution = {Dipartimento di Ingegneria dell'Informazione, Universit\`a di Siena}, year = {2001} } @InProceedings{ bianchini:02:pagerank, year = {2002}, title = {PageRank A Circuital Analysis}, booktitle = {Proceedings of the Eleventh International World Wide Web Conference}, author = {Monica Bianchini and Marco Gori and Franco Scarselli} } @InProceedings{ blum:98:cotraining, author = "Avrim Blum and Tom Mitchell", title = "Combining labeled and unlabeled data with co-training", pages = "92--100", isbn = "1-58113-057-0", booktitle = "Proceedings of the 11th Annual Conference on Computational Learning Theory ({COLT}-98)", month = jul # "~24--26", publisher = "ACM Press", address = "New York", year = "1998" } @Article{ bollacker:00:discovering, author = "Kurt Bollacker and Steve Lawrence and C. Lee Giles", title = "Discovering Relevant Scientific Literature on the Web", journal = "IEEE Intelligent Systems", volume = "15", number = "2", month = "March/April", pages = "42--47", year = "2000" } @InProceedings{ bollacker:98:citeseer, author = "Kurt D. Bollacker and Steve Lawrence and C. Lee Giles", title = "{CiteSeer}: An Autonomous {Web} Agent for Automatic Retrieval and Identification of Interesting Publications", pages = "116--123", isbn = "0-89791-983-1", editor = "Katia P. Sycara and Michael Wooldridge", booktitle = "Proceedings of the 2nd International Conference on Autonomous Agents (Agents'98)", month = may # "~9--13,", publisher = "ACM Press", address = "New York", year = "1998", url = "http://www.acm.org/pubs/articles/proceedings/ai/280765/p116-bollacker/p116-bollacker.pdf" } @InProceedings{ borodin:01:findauthoritieshubs, year = {2001}, title = {Finding authorities and hubs from link structures on the World Wide Web}, pages = {415--429}, booktitle = {Proceedings of the tenth international conference on World Wide Web}, author = {Allan Borodin and Gareth O. Roberts and Jeffrey S. Rosenthal and Panayiotis Tsaparas} } @InProceedings{ boyan:96:rl-web, year = {1996}, title = {A Machine Learning Architecture for Optimizing Web Search Engines}, booktitle = {Proceedings of the AAAI Workshop on Internet-Based Information Systems}, author = {J. Boyan and D. Freitag and T. Joachims} } @Proceedings{ bray:00:xml, month = {October}, year = {2000}, url = {http://www.w3.org/TR/REC-xml}, title = {Extensible Markup Language (XML) 1.0}, editor = {T. Bray and J. Paoli and C.M. Sperberg-McQueenm and E. Maler}, publisher = {W3 Consortium Recommendation}, note = {Available from \url{http://www.w3.org/TR/REC-xml}} } @InProceedings{ bray:96:measuring, year = {1996}, title = {Measuring the Web}, booktitle = {Proceedings of the fifth international conference on World Wide Web}, author = {Tim Bray} } @InProceedings{ brewington:00:dynamic, copyright = {the authors}, year = {2000}, url = {http://agent.cs.dartmouth.edu/papers/brewington:dynamic.ps.Z} , title = {How dynamic is the web?}, earlier = {brewington:jdynamic}, keyword = {WWW, scalability, information retrieval}, group = {agents, actcomm}, booktitle = {Proceedings of the Ninth International World Wide Web Conference}, publisher = {International World Wide Web Conference Committee (IW3C2)}, month = {May}, abstract = {Recent experiments and analysis suggest that there are about 800 million publicly-indexable web pages. However, unlike books in a traditional library, web pages continue to change even after they are initially published by their authors and indexed by search engines. This paper describes preliminary data on and statistical analysis of the frequency and nature of web page modifications. Using empirical models and a novel analytic metric of "up-to-dateness", we estimate the rate at which web search engines must re-index the web to remain current.}, author = {Brian Brewington and George Cybenko} } @Article{ brewington:00:keepup, journal = {IEEE Computer}, year = {2000}, url = {http://www.computer.org/computer/co2000/r5052abs.htm}, title = {Keeping up with the changing web}, month = {May}, copyright = {IEEE}, number = {5}, keyword = {WWW, scalability, information retrieval}, group = {agents, actcomm, cmc}, pages = {52--58}, publisher = {IEEE Computer Society Press}, author = {Brian Brewington and George Cybenko}, abstract = {Because information depreciates over time, keeping Web pages current presents new design challenges. This article quantifies what "current" means for Web search engines and estimates how often they must reindex the Web to keep current with its changing pages and structure. \par Most information--from a newspaper story to a temperature sensor measurement to a Web page--is dynamic. When monitoring an information source, when do our previous observations become stale and need refreshing? How can we schedule these refresh operations to satisfy a required level of currency without violating resource constraints--such as band-width or computing limitations on how much data can be observed in a given time? \par The authors investigate the trade-offs involved in monitoring dynamic information sources and discuss the Web in detail, estimating how fast documents change and exploring what constitutes a "current" Web index. For a simple class of Web-monitoring systems--search engines--they combine their idea of currency with actual measured data to estimate revisit rates.}, volume = {33} } @InProceedings{ brin:98:google, year = {1998}, title = {The Anatomy of a Large-Scale Hypertextual Web Search Engine}, pages = {107--117}, booktitle = {Seventh International World Wide Web Conference}, author = {S. Brin and L. Page} } @InProceedings{ broder:00:classifyconnect, location = {San Francisco, California, United States}, year = {2000}, publisher = {ACM Press}, title = {Improved classification via connectivity information}, annote = {Related to distillation of Bharat and Bayes nets of Silva }, isbn = {0-89871-453-2}, pages = {576--585}, booktitle = {Proceedings of the eleventh annual ACM-SIAM symposium on Discrete algorithms}, doi = {http://doi.acm.org/10.1145/338219.338610}, author = {Andrei Z. Broder and Robert Krauthgamer and Michael Mitzenmacher} } @InProceedings{ bruno:02:topk, year = {2002}, title = {# Evaluating Top-K Queries over Web-Accessible Databases,}, booktitle = {Proc. of the 18th IEEE International Conference on Data Engineering}, author = {N. Bruno, L. Gravano, and A. Marian} } @Article{ csq:jensen:lauritzen:ea:90:bayesian-updating, title = {Bayesian updating in recursive graphical models by local computations}, author = {F. V. Jensen and S. L. Lauritzen and K. G. Olosen}, journal = {Comput. Stat. Quarterly}, volume = {4}, year = {1990}, pages = {269--282} } @Book{ calculus:book, year = {1969}, title = {Calculus, Vol I and II.}, publisher = {John Wiley \& Sons}, author = {Tom M. Apostol} } @InProceedings{ chakrabarti:02:accelerated, location = {Honolulu, Hawaii, USA}, year = {2002}, doi = {http://doi.acm.org/10.1145/511446.511466}, title = {Accelerated focused crawling through online relevance feedback}, isbn = {1-58113-449-5}, local-url = {/Users/paolo/Documents/Research/Downloads/Internet book papers/Almaiden/Chakrabarti Accelerated Focused WWW 2002.pdf}, pages = {148--159}, booktitle = {Proceedings of the eleventh international conference on World Wide Web}, publisher = {ACM Press}, author = {S. Chakrabarti and K. Punera and M. Subramanyam} } @InProceedings{ chakrabarti:02:communities, location = {Honolulu, Hawaii, USA}, year = {2002}, publisher = {ACM Press}, title = {The structure of broad topics on the web}, isbn = {1-58113-449-5}, pages = {251--262}, booktitle = {Proceedings of the eleventh international conference on World Wide Web}, doi = {http://doi.acm.org/10.1145/511446.511480}, author = {Soumen Chakrabarti and Mukul M. Joshi and Kunal Punera and David M. Pennock} } @InProceedings{ chakrabarti:02:communities, location = {Honolulu, Hawaii, USA}, year = {2002}, publisher = {ACM Press}, title = {The structure of broad topics on the web}, isbn = {1-58113-449-5}, pages = {251--262}, booktitle = {Proceedings of the eleventh international conference on World Wide Web}, doi = {http://doi.acm.org/10.1145/511446.511480}, author = {Soumen Chakrabarti and Mukul M. Joshi and Kunal Punera and David M. Pennock} } @InProceedings{ chakrabarti:98:hypertextcategorization, author = {S. Chakrabarti and B. Dom and P. Indyk}, title = {Enhanced hypertext categorization using hyperlinks}, booktitle = {Proceedings of the 1998 ACM SIGMOD international conference on Management of data}, year = {1998}, isbn = {0-89791-995-5}, pages = {307--318}, location = {Seattle, Washington, United States}, doi = {http://doi.acm.org/10.1145/276304.276332}, publisher = {ACM Press} } @InProceedings{ chakrabarti:99:focused, month = {May}, year = {1999}, title = {Focused crawling: A new approach to topic-specific Web resource discovery.}, address = {Toronto}, local-url = {/Users/paolo/Documents/Research/InternetBook/Internet book papers/Almaiden/Chakrabarti focused.pdf}, booktitle = {Proceedings of the The Eighth International World Wide Web Conference}, author = {S. Chakrabarti and M. van den Berg and B. Dom} } @TechReport{ cho:00:estimating, year = {2000}, title = {Estimating Frequency of Change}, number = {DBPUBS-4 (available via http://dbpubs.stanford.edu/pub/2000-4)}, local-url = {/Users/paolo/Documents/Research/InternetBook/Internet book papers/Garcia Molina/Cho Estimating frequency change (TR 2002).pdf}, institution = {Stanford University}, author = {J. Cho and H. Garcia-Molina} } @InProceedings{ cho:00:synchronizing, year = {2000}, title = {Synchronizing a database to Improve Freshness}, local-url = {/Users/paolo/Documents/Research/Downloads/Internet book papers/Garcia Molina/Cho Synchrosize freshness SIGMOD 2000.pdf}, booktitle = {Proceedings of 2000 ACM International Conference on Management of Data (SIGMOD)}, author = {J. Cho and H. Garcia-Molina} } @InProceedings{ cho:02:change, year = {2002}, url = {http://rose.cs.ucla.edu/~cho/papers/cho-sampling.pdf}, title = {Effective Change Detection Using Sampling}, local-url = {/Users/paolo/Documents/Research/Downloads/Internet book papers/Garcia Molina/Cho Sampling VLDB 2002.pdf}, booktitle = {Proceedings of 28th International Conference on Very Large Databases (VLDB)}, author = {J. Cho and A. Ntoulas} } @InProceedings{ cho:02:parallel, year = {2002}, title = {Parallel Crawlers}, address = {Honolulu, Hawaii}, local-url = {/Users/paolo/Documents/Research/Downloads/Internet book papers/Garcia Molina/Parallel Crawlers www 2002.pdf}, booktitle = {Proceedings of the 11th World Wide Web conference (WWW11)}, author = {J. Cho and H. Garcia-Molina} } @InProceedings{ cho:98:ordering, year = {1998}, title = {Effcient crawling through URL ordering}, local-url = {/Users/paolo/Documents/Research/InternetBook/Internet book papers/Garcia Molina/Cho URL Ordering WWW7 1998.pdf}, booktitle = {Proceedings of the Seventh International World Wide Web Conference [WWW7]}, author = {J. Cho and H. Garcia-Molina and L. Page} } @InProceedings{ cohen:02:improving, author = {W. Cohen}, title = {Improving A Page Classifier with Anchor Extraction and Link Analysis}, booktitle = {Proceedings of Neural Information Processing Systems}, year = 2003 } @InProceedings{ cohen:95:textrelational, author = "William W. Cohen", title = "Text categorization and relational learning", booktitle = "Proceedings of ICML-95, 12th International Conference on Machine Learning", editor = "Armand Prieditis and Stuart J. Russell", address = "Lake Tahoe, US", year = "1995", pages = "124--132", publisher = "Morgan Kaufmann Publishers, San Francisco, US", url = "http://www.research.whizbang.com/~wcohen/postscript/ml-95-ir.ps" , abstract = "We evaluate the first order learning system FOIL on a series of text categorization problems. It is shown that FOIL usually forms classifiers with lower error rates and higher rates of precision and recall with a relational encoding than with a propositional encoding. We show that FOIL's performance can be improved by relation selection, a first order analog of feature selection. Relation selection improves FOIL's performance as measured by any of recall, precision, F-measure, or error rate. With an appropriate level of relation selection, FOIL appears to be competitive with or superior to existing propositional techniques." } @InProceedings{ cohen:96:email, author = "W. W. Cohen", title = "Learning Rules that Classify {E}-mail", booktitle = "AAAI Spring Symposium on Machine Learning in Information Access", editor = "M. Hearst and H. Hirsh", series = "1996 Spring Symposium Series", year = "1996", organization = "American Association for Artificial Intelligence", affiliation = "AT\&T Laboratories", url = "http://www.parc.xerox.com/istl/projects/mlia/mlia-papers.shtml" } @InCollection{ cohen:96:ilp, author = "W. W. Cohen", title = "Learning to Classify {E}nglish Text with {ILP} Methods", editor = "L. De Raedt", booktitle = "Advances in {I}nductive {L}ogic {P}rogramming", pages = "124--143", year = "1996", publisher = "IOS Press" } @InProceedings{ cohen:98:whirl, author = "William W. Cohen and Haym Hirsh", title = "Joins that generalize: text classification using {{\sc Whirl}}", booktitle = "Proceedings of KDD-98, 4th International Conference on Knowledge Discovery and Data Mining", editor = "Rakesh Agrawal and Paul E. Stolorz and Gregory Piatetsky-Shapiro", publisher = "AAAI Press, Menlo Park, US", year = "1998", address = "New York, US", pages = "169--173", url = "http://www.research.whizbang.com/~wcohen/postscript/kdd-98.ps" , abstract = "WHIRL is an extension of relational databases that can perform ``soft joins'' based on the similarity of textual identifiers; these soft joins extend the traditional operation of joining tables based on the equivalence of atomic values. This paper evaluates WHIRL on a number of inductive classification tasks using data from the World Wide Web. We show that although WHIRL is designed for more general similarity-based reasoning tasks, it is competitive with mature inductive classification systems on these classification tasks. In particular, WHIRL generally achieves lower generalization error than C4.5, RIPPER, and several nearest-neighbor methods. WHIRL is also fast-up to 500 times faster than C4.5 on some benchmark problems. We also show that WHIRL can be efficiently used to select from a large pool of unlabeled items those that can be classified correctly with high confidence." } @Article{ cohen:99:ripper, author = {W. W. Cohen and Y. Singer}, title = {Context-sensitive learning methods for text categorization}, journal = {ACM Transactions on Information Systems (TOIS)}, volume = {17}, number = {2}, year = {1999}, issn = {1046-8188}, pages = {141--173}, doi = {http://doi.acm.org/10.1145/306686.306688}, publisher = {ACM Press} } @InProceedings{ cohn:00:authoritative, year = {2000}, title = {Learning to Probabilistically Identify Authoritative Documents}, pages = {167--174}, booktitle = {Proc. 17th International Conf. on Machine Learning}, publisher = {Morgan Kaufmann, San Francisco, CA}, author = {David Cohn and Huan Chang} } @InProceedings{ cohn:01:missinglink, year = 2001, url = {http://www.cs.cmu.edu/~{ }cohn/papers/nips00.pdf, http://nips.djvuzone.org/djvu/nips13/CohnHofmann.djvu}, title = {The Missing Link --- {A} Probabilistic Model of Document Content and Hypertext Connectivity}, address = {Boston, MA}, editor = {T. K. Leen and T. G. Dietterich and V. Tresp}, booktitle = {Advances in Neural Information Processing Systems}, publisher = {MIT Press}, author = {D. Cohn and T. Hofmann} } @InProceedings{ cooper:91:misnomers, author = {William S. Cooper}, title = {Some inconsistencies and misnomers in probabilistic information retrieval}, booktitle = {Proceedings of the 14th annual international ACM SIGIR conference on Research and development in information retrieval}, year = {1991}, isbn = {0-89791-448-1}, pages = {57--61}, location = {Chicago, Illinois, United States}, doi = {http://doi.acm.org/10.1145/122860.122866}, publisher = {ACM Press} } @Book{ cormen:01:algorithms, year = {2001}, title = {Introduction to Algorithms}, address = {Cambridge, MA}, edition = {2nd}, publisher = {MIT Press}, author = {T. H. Cormen and C. E. Leiserson and R. L. Rivest and C. Stein} } @Article{ cortes:95:svm, author = {C. Cortes and V. Vapnik}, title = {Support Vector Networks}, journal = {Machine Learning}, year = {1995}, volume = {20}, pages = {1-25} } @Article{ cover:67:knn, author = "T. M. Cover and P. E. Hart", title = "Nearest neighbor pattern classification", journal = "IEEE Transactions on Information Theory", volume = "13", pages = "21--27", year = "1967" } @InProceedings{ crammer:00:ecoc, author = "K. Crammer and Y. Singer", title = "On the Learnability and Design of Output Codes for Multiclass Problems", booktitle = "Computational Learning Theory", pages = "35-46", year = "2000" } @InProceedings{ craswell:01:anchors, location = {New Orleans, Louisiana, United States}, year = {2001}, publisher = {ACM Press}, title = {Effective site finding using link anchor information}, isbn = {1-58113-331-6}, pages = {250--257}, booktitle = {Proceedings of the 24th annual international ACM SIGIR conference on Research and development in information retrieval}, doi = {http://doi.acm.org/10.1145/383952.383999}, author = {Nick Craswell and David Hawking and Stephen Robertson}, abstract = { Link-based ranking methods have been described in the literature and applied in commercial Web search engines. However, according to recent TREC experiments, they are no better than traditional content-based methods. We conduct a different type of experiment, in which the task is to find the main entry point of a specific Web site. In our experiments, ranking based on link anchor text is twice as effective as ranking based on document content, even though both methods used the same BM25 formula. We obtained these results using two sets of 100 queries on a 18.5 million document set and another set of 100 on a 0.4 million document set. This site finding effectiveness begins to explain why many search engines have adopted link methods. It also opens a rich new area for effectiveness improvement, where traditional methods fail. } } @Article{ craven:00:construct, journal = {Artificial Intelligence}, topic = {AI-and-the-internet;machine-learning; computational-ontology;intelligent-information-retrieval;}, year = {2000}, title = {Learning to Construct Knowledge Bases from the World Wide Web}, number = {1--2}, pages = {69--113}, author = {M. Craven and D. {di Pasquo} and D. Freitag and A. {McCallum} and T. Mitchell and K. Nigan and S. Slattery}, acontentnote = {Abstract: The World Wide Web is a vast source of information accessible to computers, but understandable only to humans. The goal of the research described here is to automatically create a computer understandable knowledge base whose content mirrors that of the World Wide Web. Such a knowledge base would enable much more effective retrieval of Web information, and promote new uses of the Web to support knowledge-based inference and problem solving. Our approach is to develop a trainable information extraction system that takes two inputs. The first is an ontology that defines the classes (e.g., company, person, employee, product) and relations (e.g., employed_by, produced_by) of interest when creating the knowledge base. The second is a set of training data consisting of labeled regions of hypertext that represent instances of these classes and relations. Given these inputs, the system learns to extract information from other pages and hyperlinks on the Web. This article describes our general approach, several machine learning algorithms for this task, and promising initial results with a prototype system that has created a knowledge base describing university people, courses, and research projects.}, volume = {118} } @Article{ craven:01:invention, title = "Relational Learning with Statistical Predicate Invention: Better Models for Hypertext", author = "Mark Craven and Sean Slattery", journal = "Machine Learning", issn = "0885-6125", volume = "43", number = "1/2", pages = "97--119", month = apr, year = "2001", abstract = "We present a new approach to learning hypertext classifiers that combines a statistical text-learning method with a relational rule learner. This approach is well suited to learning in hypertext domains because its statistical component allows it to characterize text in terms of word frequencies. whereas its relational component is able to describe how neighboring documents are related to each other by hyperlinks that connect them. We evaluate our approach by applying it to tasks that involve learning definitions for (i) classes of pages, (ii) particular relations that exist between pairs of pages, and (iii) locating a particular class of information in the internal structure of pages. Our experiments demonstrate that this new approach is able to learn more accurate classifiers than either of its constituent methods alone.", pubtype = "11" } @InProceedings{ craven:98:extract, author = "M. Craven and D. {DiPasquo} and D. Freitag and A.K. {McCallum} and T.M. Mitchell and K. Nigam and S. Slattery", title = "Learning to extract symbolic knowledge from the {W}orld {W}ide {W}eb", booktitle = "Proceedings of AAAI-98, 15th Conference of the American Association for Artificial Intelligence", publisher = "AAAI Press, Menlo Park, US", year = "1998", pages = "509--516", address = "Madison, US", note = "An extended version appears as~\cite{Craven00}", url = "http://www.cs.cmu.edu/afs/cs.cmu.edu/project/theo-11/www/wwkb/overview-aaai98.ps.gz" , abstract = "The World Wide Web is a vast source of information accessible to computers, but understandable only to humans. The goal of the research described here is to automatically create a computer understandable world wide knowledge base whose content mirrors that of the World Wide Web. Such a knowledge base would enable much more effective retrieval of Web information, and promote new uses of the Web to support knowledge-based inference and problem solving. Our approach is to develop a trainable information extraction system that takes two inputs: an ontology defining the classes and relations of interest, and a set of training data consisting of labeled regions of hypertext representing instances of these classes and relations. Given these inputs, the system learns to extract information from other pages and hyperlinks on the Web. This paper describes our general approach, several machine learning algorithms for this task, and promising initial results with a prototype system." } @InProceedings{ craven:98:foil, author = "M. Craven and S. Slattery and K. Nigam", title = "First-Order Learning for Web Mining", pages = "250--255", isbn = "3-540-64417-2", editor = "C. N{\'e}dellec and C. Rouveirol", booktitle = "Proceedings of the 10th European Conference on Machine Learning ({ECML}-98)", month = apr # "~21--23", series = "LNAI", volume = "1398", publisher = "Springer", address = "Berlin", year = "1998" } @Book{ cristianini:00:svm, title = {An Introduction to Support Vector Machines}, author = {N. Cristianini and J. Shawe-Taylor}, publisher = {Cambridge University Press}, year = {2000} } @InProceedings{ davison:00:locality, copyright = {(c) Copyright 2000 ACM}, year = {2000}, url = {http://www.acm.org/pubs/articles/proceedings/ir/345508/p272-davison/p272-davison.pdf} , title = {Topical Locality in the Web}, mrnumber = {C.IR.2000.272}, series = {WWW Information Retrieval}, pages = {272--279}, booktitle = {Proceedings of the 23rd Annual International ACM SIGIR Conference on Research and Development in Information Retrieval}, author = {Brian D. Davison} } @InProceedings{ davison:00:nepotistic, author = {B. D. Davison}, title = {Recognizing Nepotistic Links on the Web}, booktitle = {AAAI Workshop on Artificial Intelligence for Web Search}, year = {2000} } @Article{ day:83:isoosi, journal = {Proceedings of the IEEE}, year = {1983}, title = {The {OSI} Reference Model}, pages = {1334--1340}, author = {J.D. Day and H Zimmerman}, volume = {71} } @Article{ day:95:osi, journal = {ACM SIGCOMM Computer Communication Review}, doi = {http://doi.acm.org/10.1145/216701.216704}, year = {1995}, title = {The (un)revised OSI reference model}, author = {John Day}, number = {5}, issn = {0146-4833}, pages = {39--55}, publisher = {ACM Press}, volume = {25} } @InProceedings{ debra:94:fish, author = {P. {De Bra} and R. Post}, title = {Information retrieval in the {World Wide Web}: Making client-based searching feasible}, booktitle = {Proceedings of the First International World Wide Web Conference}, year = 1994 } @Misc{ deering:98:ipv6, year = {1998}, url = {http://www.ietf.org/rfc/rfc2460.txt}, title = {{Internet Protocol, Version 6 (IPv6) Specification}}, author = {S. Deering and R. Hinden}, note = {RFC 2460. Available from \url{http://www.ietf.org/rfc/rfc2460.txt}} } @Book{ delbimbo:99:visual, author = {A. {Del Bimbo}}, title = {Visual Information Retrieval}, publisher = {Morgan Kaufmann}, address = {San Francisco, California}, year = {1999} } @Article{ dietterich:95:ecoc, author = {T. G. Dietterich and G. Bakiri}, title = {Solving Multiclass Learning Problems via Error-Correcting Output Codes}, journal = {Journal of Artificial Intelligence Research}, year = {1995}, volume = {2}, pages = {263--286} } @InProceedings{ diligenti:02:linkanalysis, location = {Honolulu, Hawaii, USA}, year = {2002}, publisher = {ACM Press}, title = {Web page scoring systems for horizontal and vertical search}, isbn = {1-58113-449-5}, pages = {508--516}, booktitle = {Proceedings of the eleventh international conference on World Wide Web}, doi = {http://doi.acm.org/10.1145/511446.511512}, author = {Michelangelo Diligenti and Marco Gori and Marco Maggini} } @InProceedings{ diligenti:2000:focused, year = {2000}, url = {http://www.vldb.org/dblp/db/conf/vldb/DiligentiCLGG00.html} , title = {Focused Crawling Using Context Graphs}, editor = {Amr {El Abbadi} and Michael L. Brodie and Sharma Chakravarthy and Umeshwar Dayal and Nabil Kamel and Gunter Schlageter and Kyu-Young Whang}, address = {Los Altos, CA 94022, USA}, authorurl = {http://www.vldb.org/dblp/db/indices/a-tree/d/Diligenti:Michelangelo.html; http://www.vldb.org/dblp/db/indices/a-tree/c/Coetzee:Frans.html; http://www.vldb.org/dblp/db/indices/a-tree/l/Lawrence:Steve.html; http://www.vldb.org/dblp/db/indices/a-tree/g/Giles:C=_Lee.html; http://www.vldb.org/dblp/db/indices/a-tree/g/Gori:Marco.html} , isbn = {1-55860-715-3}, bibdate = {Fri Jan 12 07:50:24 MST 2001}, pages = {527--534}, booktitle = {{VLDB} 2000, Proceedings of 26th International Conference on Very Large Data Bases, September 10--14, 2000, Cairo, Egypt}, publisher = {Morgan Kaufmann Publishers}, author = {Michelangelo Diligenti and Frans Coetzee and Steve Lawrence and C. Lee Giles and Marco Gori} } @Article{ domingos:97:naive, author = "Pedro Domingos and Michael Pazzani", title = "On the optimality of the simple {B}ayesian classifier under zero-one loss", journal = "Machine Learning", volume = "29", year = "1997", pages = "103--130", publisher = "Kluwer Academic Publishers, Boston" } @Article{ dreilinger:97:metasearch, author = "Daniel Dreilinger and Adele E. Howe", title = "Experiences with Selecting Search Engines using Metasearch", journal = "ACM Transactions on Information Systems", volume = "15", number = "3", pages = "195--222", year = "1997", copyright = "(c) Copyright 1997 ACM", keywords = "Algorithms, Experimentation, Information retrieval, Machine learning, Search engine, WWW, H.3.3 Information storage and retrieval, Information search and retrieval, H.3.4 Information storage and retrieval, Systems and software", mrnumber = "J.TOIS.15.3.195", url = "http://www.acm.org/pubs/articles/journals/tois/1997-15-3/p195-dreilinger/p195-dreilinger.pdf" , abstract = "Search engines are among the most useful and high-profile resources on the Internet. The problem of finding information on the Internet has been replaced with the problem of knowing where search engines are, what they are designed to retrieve, and how to use them. This article describes and evaluates SavvySearch, a metasearch engine designed to intelligently select and interface with multiple remote search engines. The primary metasearch issue examined is the importance of carefully selecting and ranking remote search engines for user queries. We studied the efficacy of SavvySearch's incrementally acquired metaindex approach to selecting search engines by analyzing the effect of time and experience on performance. We also compared the metaindex approach to the simpler categorical approach and showed how much experience is required to surpass the simple scheme." } @Article{ drucker:99:spam, author = "H. Drucker and V. Vapnik and D. Wu", title = "Support vector machines for spam categorization", journal = "IEEE Transactions on Neural Networks", year = "1999", number = "5", volume = "10", pages = "1048--1054", url = "http://www.monmouth.edu/~drucker/SVM_spam_article_compete.PDF" , abstract = "We study the use of Support Vector Machines (SVMs) in classifying email as spam or nonspam by comparing it to three other classification algorithms: Ripper, Rocchio, and boosting decision trees. These four algorithms were tested on two different data sets: one data set where the number of features were constrained to the 1000 best features and another data set where the dimensionality was over 7000. SVMs performed best when using binary features. For both data sets, boosting trees and SVMs had acceptable test performance in terms of accuracy and speed. However, SVMs had significantly less training time." } @Book{ duda:73:pattern, author = {R. O. Duda and P. E. Hart}, title = {Pattern Classification and Scene Analysis}, publisher = {John Wiley and Sons}, address = {New York}, year = {1973} } @InProceedings{ dumais:98:svm, author = {S. Dumais and J. Platt and D. Heckerman and M. Sahami}, title = {Inductive learning algorithms and representations for text categorization}, booktitle = {Proceedings of the seventh international conference on Information and knowledge management}, year = {1998}, isbn = {1-58113-061-9}, pages = {148--155}, location = {Bethesda, Maryland, United States}, doi = {http://doi.acm.org/10.1145/288627.288651}, publisher = {ACM Press} } @InProceedings{ edwards:01:adaptive, year = {2001}, url = {http://www.www10.org/cdrom/papers/pdf/p210.pdf}, title = {An Adaptive Model for Optimizing Performance of an Incremental Web Crawler}, pages = {106--113}, booktitle = {Proceedings of the Tenth International World Wide Web Conference}, author = {Jenny Edwards and Kevin McCurley and John Tomlin} } @Article{ elias:75:gammacodes, author = {P. Elias}, title = {Universal codeword sets and representations of the integers}, journal = {IEEE Transactions on Information Theory}, year = 1975, volume = 21, number = 2, pages = {194--203}, month = {March} } @Article{ evgeniou:00:regularization, author = {T. Evgeniou and M. Pontil and T. Poggio}, title = {Regularization Networks and Support Vector Machines}, journal = {Advances in Computational Mathematics}, year = {2000}, volume = {13}, number = {}, month = {}, pages = {1-50} } @TechReport{ evgeniou:99:regularization, author = {T. Evgeniou and M. Pontil and T. Poggio}, title = {A unified framework for Regularization Networks and Support Vector Machines}, type = "A.I. Memo No.", number = 1654, institution = mitai, year = 1999 } @InProceedings{ faloutsos:01:zipf, title = {On Power-Law Relationships of the Internet Topology}, author = {M. Faloutsos and P. Faloutsos and C. Faloutsos}, abstract = {Despite the apparent randomness of the Internet, we discover some surprisingly simple power-laws of the Internet topology. These power-laws hold for three snapshots of the Internet, between November 1997 and December 1998, despite a 45\% growth of its size during that period. We show that our power-laws fit the real data very well resulting in correlation coefficients of 96\% or higher. Our observations provide a novel perspective of the structure of the Internet. The power-laws describe concisely skewed distributions of graph properties such as the node outdegree. In addition, these power-laws can be used to estimate important parameters such as the average neighborhood size, and facilitate the design and the performance analysis of protocols. Furthermore, we can use them to generate and select realistic topologies for simulation purposes.} } @Article{ faloutsos:84:signatures, author = {Chris Faloutsos and Stavros Christodoulakis}, title = {Signature files: an access method for documents and its analytical performance evaluation}, journal = {ACM Transactions on Information Systems (TOIS)}, volume = {2}, number = {4}, year = {1984}, issn = {1046-8188}, pages = {267--288}, doi = {http://doi.acm.org/10.1145/2275.357411}, publisher = {ACM Press} } @Misc{ fielding:99:http11, year = {1999}, url = {http://www.ietf.org/rfc/rfc2616.txt}, title = {{Hypertext Transfer Protocol -- HTTP/1.1}}, author = {R. Fielding and J. Gettys and J. Mogul and H. Frystyk and L. Masinter and P. Leach and T. Berners-Lee}, note = {RFC 2616. Available from \url{http://www.ietf.org/rfc/rfc2616.txt}} } @Article{ fienberg:99:bayesian, author = {S. E. Fienberg and M. A. Johnson and B. J. Junker}, title = {Classical multilevel and {Bayesian} approaches to population size estimation using multiple lists}, journal = {Journal of the Royal Statistical Society , Series A}, year = 1999, volume = 162, pages = {383--406} } @InProceedings{ flake:00:communities, location = {Boston, Massachusetts, United States}, year = {2000}, publisher = {ACM Press}, title = {Efficient identification of Web communities}, isbn = {1-58113-233-6}, pages = {150--160}, booktitle = {Proceedings of the sixth ACM SIGKDD international conference on Knowledge discovery and data mining}, doi = {http://doi.acm.org/10.1145/347090.347121}, author = {Gary William Flake and Steve Lawrence and C. Lee Giles} } @Article{ flake:02:self-organization, journal = {IEEE Computer}, year = {2002}, title = {Self-Organization of the Web and Identification of Communities}, number = {3}, local-url = {/Users/paolo/Documents/Research/Downloads/Internet book papers/Lawrence/Self organization web communities Computer 2002.pdf}, pages = {66--71}, author = {G. W. Flake and S. Lawrence and C. L. Giles and F. Coetzee}, volume = {35} } @InCollection{ fox:92:stoplists, author = {C. Fox}, title = {Lexical analysis and stoplists}, booktitle = {Information Retrieval: Data Structures and Algorithms}, publisher = {Prentice Hall}, year = 1992, editor = {W. B. Frakes and R. Baeza-Yates}, chapter = 7 } @InProceedings{ freund:96:adaboost, author = {Y. Freund and R. E. Schapire}, title = {Experiments with a new Boosting algorithm}, booktitle = {Proc. 13th International Conference on Machine Learning}, publisher = {Morgan Kaufmann}, year = {1996}, pages = {148--146} } @InProceedings{ friedman:96:local, author = {N. Friedman and M. Goldszmidt}, title = {Learning Bayesian Networks with Local Structure}, editor = {E. Horwitz and F. Jensen}, booktitle = {Proc. 12th Conf. on Uncertainty in Artificial Intelligence}, year = {1996}, address = {Portland, Oregon} } @Article{ friedman:97:bnclassifiers, author = "Nir Friedman and Dan Geiger and Moises Goldszmidt", title = "Bayesian Network Classifiers", journal = "Machine Learning", volume = "29", year = "1997", pages = "131--163" } @InProceedings{ friedman:99:relational, author = "N. Friedman and L. Getoor and D. Koller and A. Pfeffer", title = "Learning Probabilistic Relational Models", pages = "1300--1309", editor = "D. Thomas", booktitle = "Proceedings of the 16th International Joint Conference on Artificial Intelligence ({IJCAI}-99-Vol2)", month = jul # " ~31--" # aug # "~6", publisher = "Morgan Kaufmann Publishers", address = "S.F.", year = "1999" } @Article{ frisse:88:hypertext, author = {Mark,E. Frisse}, title = {Searching for information in a hypertext medical handbook}, journal = {Communications of the ACM}, volume = {31}, number = {7}, year = {1988}, issn = {0001-0782}, pages = {880--886}, doi = {http://doi.acm.org/10.1145/48511.48518}, publisher = {ACM Press} } @Article{ fuhr:92, author = "N Fuhr", title = "{Probabilistic Models in Information Retrieval}", journal = "The Computer Journal", year = "1992", volume = "35", number = "3", pages = "243--255" } @InProceedings{ furnas:88:lsa, location = {Grenoble, France}, year = {1988}, publisher = {ACM Press}, title = {Information retrieval using a singular value decomposition model of latent semantic structure}, isbn = {2-7061-0309-4}, local-url = {/Users/paolo/Documents/Research/InternetBook/Internet book papers/IR/Furnas etal Latent Semantics Analysis SIGIR 1988.pdf}, pages = {465--480}, booktitle = {Proceedings of the eleventh international conference on Research & development in information retrieval}, doi = {http://doi.acm.org/10.1145/62437.62487}, author = {G. W. Furnas and S. Deerwester and S. T. Dumais and T. K. Landauer and R. A. Harshman and L. A. Streeter and K. E. Lochbaum} } @Article{ garfield:55, year = {1955}, journal = {Science}, author = {E. Garfield}, title = {Citation Indexes for Science -- A New Dimension in Documentation Through Association of Ideas}, volume = {122}, pages = {108--111} } @Article{ garfield:72, year = {1972}, journal = {Science}, author = {E. Garfield}, title = {Citation analysis as a tool in journal evaluation}, volume = {178}, pages = {471--479} } @Booklet{ garner:67, author = {R. Garner}, title = {A computer oriented, graph theoretic analysis of Citation Index structures}, year = {1967}, publisher = {Drexel University Press}, address = {Philadelphia} } @InProceedings{ getoor:01:relational, author = "L. Getoor and N. Friedman and D. Koller and B. Taskar", title = "Learning Probabilistic Models of Relational Structure", booktitle = "Proc. 18th International Conf. on Machine Learning", publisher = "Morgan Kaufmann, San Francisco, CA", year = "2001", pages = "170--177" } @InProceedings{ ghani:00:ecocintext, author = {R. Ghani}, title = {Using Error-Correcting Codes for Text Classification}, booktitle = {Proc. 17th International Conference on Machine Learning}, year = 2000 } @InProceedings{ gibson:98:communities, location = {Pittsburgh, Pennsylvania, United States}, year = {1998}, publisher = {ACM Press}, title = {Inferring Web communities from link topology}, isbn = {0-89791-972-6}, pages = {225--234}, booktitle = {Proceedings of the ninth ACM conference on Hypertext and hypermedia : links, objects, time and spacestructure in hypermedia systems}, doi = {http://doi.acm.org/10.1145/276627.276652}, author = {David Gibson and Jon Kleinberg and Prabhakar Raghavan} } @InProceedings{ gibson:98:communities, location = {Pittsburgh, Pennsylvania, United States}, year = {1998}, publisher = {ACM Press}, title = {Inferring Web communities from link topology}, isbn = {0-89791-972-6}, pages = {225--234}, booktitle = {Proceedings of the ninth ACM conference on Hypertext and hypermedia : links, objects, time and spacestructure in hypermedia systems}, doi = {http://doi.acm.org/10.1145/276627.276652}, author = {D. Gibson and J. Kleinberg and P. Raghavan} } @Article{ gravano:1999:gioss, keywords = {digital libraries; distributed information retrieval; Internet search and retrieval; text databases}, journal = {ACM Transactions on Database Systems}, year = {1999}, url = {http://www.acm.org/pubs/citations/journals/tods/1999-24-2/p229-gravano/} , title = {{GlOSS}: text-source discovery over the {Internet}}, generalterms = {Measurement; Performance}, coden = {ATDSD3}, number = {2}, issn = {0362-5915}, bibdate = {Tue Sep 26 08:44:02 MDT 2000}, pages = {229--264}, subject = {Information Systems --- Information Storage and Retrieval --- General (H.3.0); Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3); Information Systems --- Information Storage and Retrieval --- Digital Libraries (H.3.7); Information Systems --- Database Management --- Systems (H.2.4): {\bf Textual databases}; Information Systems --- Information Storage and Retrieval (H.3)}, author = {Luis Gravano and H{\'e}ctor Garc{\'\i}a-Molina and Anthony Tomasic}, abstract = {The dramatic growth of the Internet has created a new problem for users: location of the relevant sources of documents. This article presents a framework for (and experimentally analyzes a solution to) this problem, which we call the {\em text-source discovery problem\/}. Our approach consists of two phases. First, each text source exports its contents to a centralized service. Second, users present queries to the service, which returns an ordered list of promising text sources. This article describes {\em GlOSS\/}, Glossary of Servers Server, with two versions: {\em bGlOSS\/}, which provides a Boolean query retrieval model, and {\em vGlOSS\/}, which provides a vector-space retrieval model. We also present {\em hGlOSS\/}, which provides a decentralized version of the system. We extensively describe the methodology for measuring the retrieval effectiveness of these systems and provide experimental evidence, based on actual data, that all three systems are highly effective in determining promising text sources for a given query.}, volume = {24} } @InProceedings{ guermeur:00:multiclass, author = {Y. Guermeur and A. Elisseeff and H. Paugam-Mousy}, title = {A new multi-class SVM based on a uniform convergence result}, booktitle = {Proceedings of IJCNN - International Joint Conference on Neural Networks}, year = {2000}, organization = {IEEE} } @InProceedings{ hammer:01:extracting, year = {1997}, title = {Extracting Semistructured Information from the Web}, local-url = {/Users/paolo/Documents/Research/Downloads/Internet book papers/Garcia Molina/Hammer Extracting Semistructured SIGMOD 1997.pdf}, booktitle = {Proceedings of the Workshop on Management of Semistructured Data held in conjunction with ACM SIGMOD'97}, author = {J. Hammer and H. Garcia-Molina and J. Cho and A. Crespo and R. Aranha} } @InProceedings{ han:01:knn, author = "E. H. Han and G. Karypis and V. Kumar", title = "Text Categorization Using Weight-Adjusted $k$-Nearest Neighbor Classification", booktitle = "Proceedings of PAKDD-01, 5th Pacific-Asia Conferenece on Knowledge Discovery and Data Mining", editor = "David Cheung and Qing Li and Graham Williams", year = "2001", publisher = "Springer Verlag, Heidelberg, DE", address = "Hong Kong, CN", note = "Published in the ``Lecture Notes in Computer Science'' series, number 2035", pages = "53--65", url = "http://link.springer.de/link/service/series/0558/papers/2035/20350053.pdf" } @Article{ harman:92:invertedfiles, author = {Donna Harman and R. Baeza-Yates and Edward Fox and W. Lee}, title = {Inverted files}, book = {Information retrieval: data structures and algorithms}, year = {1992}, isbn = {0-13-463837-9}, pages = {28--43}, publisher = {Prentice-Hall, Inc.} } @InProceedings{ haveliwala:02:pagerank, year = {2002}, title = {Topic-Sensitive PageRank}, booktitle = {Proceedings of the Eleventh International World Wide Web Conference}, author = {Taher H. Haveliwala} } @TechReport{ haveliwala:99:efficient, author = {T.H. Haveliwala}, title = {Efficient computation of PageRank}, institution = {Stanford University Database Group}, note = {Available at \url{http://dbpubs.stanford.edu/ pub/1999-31}}, year = {1999} } @Book{ heaps:78, author = {J. Heaps}, year = {1978}, title = {Information Retrieval--Computational and Theoretical Aspects}, publisher = {Academic Press, Inc.}, address = {New York, NY} } @InProceedings{ hersovici:98:shark, author = {M. Hersovici and M. Jacovi and Y. S. Maarek and D. Pelleg and M. Shtalhaim and S. Ur}, title = {The shark-search algorithm --- An application: Tailored Web site mapping}, booktitle = {Proceedings 7th International World-Wide Web Conference}, year = 1998 } @TechReport{ heydon:01:highperformance, month = {September}, year = {2001}, title = {High-performance web crawling.}, number = {SRC 173}, institution = {Compaq Systems Research Center}, author = {A. Heydon and M. Najork} } @Article{ heydon:99:mercator, journal = {World Wide Web}, year = {1999}, title = {Mercator: {A} Scalable, Extensible {W}eb Crawler}, number = {4}, pages = {219--229}, author = {Allan Heydon and Marc Najork}, note = {Available from \url{http://research.compaq.com/SRC/mercator/research.html}} , volume = {2} } @InProceedings{ hirai:00:webbase, keywords = {webbase, value filtering}, year = {2000}, url = {http://dbpubs.stanford.edu/pub/2000-51}, title = {WebBase: {A} repository of web pages}, author = {J. Hirai and S. Raghavan and H. Garcia-Molina and A. Paepcke}, comments = {In this paper, we study the problem of constructing and maintaining a large shared repository of web pages. We discuss the unique characteristics of such a repository, propose an architecture, and identify its functional modules. We focus on the storage manager module, and illustrate how traditional techniques for storage and indexing can be tailored to meet the requirements of a web repository. To evaluate design alternatives, we also present experimental results from a prototype repository called WebBase, that is currently being developed at Stanford University. Keywords : Repository, WebBase, Architecture, Storage management}, pages = {277--293}, booktitle = {Proceedings of the Ninth International World-Wide Web Conference}, entered-by = {Andreas Paepcke}, note = {Available at http://dbpubs.stanford.edu/pub/2000-51} } @InProceedings{ hofmann:00:web, author = "Thomas Hofmann", title = "Learning probabilistic models of the Web.", pages = "369--371", editor = "Nicholas Belkin and Peter Ingwersen and Mun-Kwe Leong", booktitle = "Proceedings of the 23rd Annual International Conference on Research and Development in Information Retrieval ({SIGIR}-00)", series = "ACM SIGIR", volume = "special issue, v. 34", publisher = "ACM Press", address = "N.Y.", year = "2000" } @Article{ hofmann:01:plsa, author = "Thomas Hofmann", title = "Unsupervised Learning by Probabilistic Latent Semantic Analysis", journal = "Machine Learning", volume = "42", number = "1/2", year = "2001", pages = "177--196", publisher = "Kluwer Academic Publishers, Boston" } @InProceedings{ hofmann:99:aspect, author = "Thomas Hofmann and Jan Puzicha and Michael I Jordan", title = "Learning from dyadic data", booktitle = "Advances in Neural Information Processing Systems 11: Proceedings of the 1998 Conference", year = "1999", editor = "Michael S. Kearns and Sara A. Solla and David Cohen", publisher = "MIT Press", address = "Cambridge, Massachusetts", isbn = "0-262-11245-0", pages = "466--472" } @Article{ huberman:1998:strong, journal = {Science}, year = {1998}, title = {Strong Regulartities in World Wide web Surfing}, number = {5360}, local-url = {/Users/paolo/Documents/Research/InternetBook/Internet book papers/Huberman/Strong Regularities Science 1998.pdf}, pages = {95--97}, author = {B. A. Huberman and P. L. T. Pirolli and J. E. Pitkow and R. M. Lukose}, volume = {280} } @Book{ iso:8879:sgml, year = {1986}, title = {Information processing --- Text and Office Systems --- Standard Generalized Markup Language ({SGML})}, address = {Geneva, Switzerland}, edition = {First}, publisher = {{International Organization for Standardization}}, author = {{ISO 8879}} } @InProceedings{ ipeirotis:01:persival, copyright = {(c) Copyright 2001 ACM}, year = {2001}, url = {http://www.acm.org/pubs/articles/proceedings/dl/379437/p454-ipeirotis/p454-ipeirotis.pdf} , title = {{PERSIVAL} Demo: Categorizing Hidden-Web Resources}, mrnumber = {C.DL.01.454}, series = {Demonstrations}, pages = {454}, booktitle = {JCDL'01: Proceedings of the 1st ACM/IEEE-CS Joint Conference on Digital Libraries}, author = {Panagiotis G. Ipeirotis and Luis Gravano and Mehran Sahami} } @InProceedings{ ipeirotis:2001:act, journal = {Lecture Notes in Computer Science}, year = {2001}, url = {http://link.springer-ny.com/link/service/series/0558/bibs/1997/19970245.htm; http://link.springer-ny.com/link/service/series/0558/papers/1997/19970245.pdf} , title = {Automatic Classification of Text Databases through Query Probing}, coden = {LNCSD9}, issn = {0302-9743}, bibdate = {Sat Feb 2 13:03:16 MST 2002}, pages = {245--??}, booktitle = {Proceedings of the ACM SIGMOD Workshop on the Web and Databases (WebDB'00), LNCS Series no. 1997}, author = {Panagiotis G. Ipeirotis and Luis Gravano and Mehran Sahami}, volume = {1997} } @InProceedings{ ittner:95:ocr, author = "D. J. Ittner and D. D. Lewis and D. D. Ahn", title = "Text categorization of low quality images", booktitle = "Proceedings of SDAIR-95, 4th Annual Symposium on Document Analysis and Information Retrieval", year = "1995", address = "Las Vegas, US", pages = "301--315", url = "http://www.research.att.com/~lewis/papers/ittner95.ps", abstract = "Categorization of text images into content-oriented classes would be a useful capability in a variety of document handling systems. Many methods can be used to categorize texts once their words are known, but OCR can garble a large proportion of words, particularly when low quality images are used. Despite this, we show for one data set that fax quality images can be categorized with nearly the same accuracy as the original text. Further, the categorization system can be trained on noisy OCR output, without need for the true text of any image, or for editing of OCR output. The use of a vector space classifier and training method robust to large feature sets, combined with discarding of low frequency OCR output strings are the key to our approach." } @InProceedings{ jaeger:97:rbn, author = "Manfred Jaeger", title = "Relational {Bayesian} Networks", pages = "266--273", isbn = "1-55860-485-5", editor = "Dan Geiger and Prakash Pundalik Shenoy", booktitle = "Proceedings of the 13th Conference on Uncertainty in Artificial Intelligence ({UAI}-97)", month = aug # "~1--3", publisher = "Morgan Kaufmann Publishers", address = "San Francisco", year = "1997" } @InProceedings{ joachims99, author = {T.~Joachims}, title = {Making Large--Scale {SVM} Learning Practical}, publisher = {{MIT} Press}, year = {1999}, pages = {169--184}, booktitle = {Advances in Kernel Methods --- Support Vector Learning}, editor = {B. Sch{\"o}lkopf and C. J. C.~Burges and A. J.~Smola}, address = {Cambridge, MA} } @Book{ joachims:02:svm, title = {Learning to Classify Text using Support Vector Machines}, author = {T. Joachims}, publisher = {Kluwer}, year = {2002} } @InProceedings{ joachims:97:rocchio, author = {T. Joachims}, title = {A Probabilistic Analysis of the Rocchio Algorithm with TFIDF for Text Categorization}, booktitle = {Proceedings of International Conference on Machine Learning}, year = 1997 } @InProceedings{ joachims:98:many, author = {T. Joachims}, title = {Text Categorization with Support Vector Machines: Learning with Many Relevant Features}, booktitle = {Proceedings of the European Conference on Machine Learning}, year = 1998, publisher = {Springer} } @InProceedings{ joachims:99:transductive, author = {T. Joachims}, title = {Transductive Inference for Text Classification using Support Vector Machines}, booktitle = {Proc. International Conference on Machine Learning (ICML)}, year = 1999 } @Article{ kautz:97:hidden, year = {1997}, author = {H. Kautz and B. Selman and M. Shah}, journal = {AI Magazine}, volume = 18, number = 2, pages = {27--36} } @Article{ kessler:63:coupling, author = {M.M. Kessler}, title = {Bibliographic coupling between scientific papers}, journal = {American Documentation}, volume = {14}, pages = {10--25}, year = {1963} } @InProceedings{ kira:92:featureselection, author = {K. Kira and L. A. Rendell}, title = {A practical approach to feature selection}, booktitle = {Proceedings of the Ninth International Conference on Machine Learning}, pages = {249--256}, year = 1992, publisher = {Morgan Kaufmann} } @InCollection{ kittler:86:feature, author = {J. Kittler}, title = {Feature selection and extraction}, booktitle = {Handbook of Pattern Recognition and Image Processing}, publisher = {Academic Press}, year = 1986, editor = {T. Y. Young and K. S. Fu}, chapter = 3 } @Article{ kleinberg:01:structure, journal = {Science}, year = {2001}, title = {The Structure of the Web}, number = {5548}, local-url = {/Users/paolo/Documents/Research/InternetBook/Internet book papers/Lawrence/Structure of the web Sicence 2001.pdf}, pages = {1849--1850}, author = {Jon Kleinberg and Steve Lawrence}, volume = {294} } @InProceedings{ kleinberg:98:authoritative, location = {San Francisco, California, United States}, year = {1998}, publisher = {ACM Press}, title = {Authoritative sources in a hyperlinked environment}, isbn = {0-89871-410-9}, pages = {668--677}, booktitle = {Proceedings of the ninth annual ACM-SIAM symposium on Discrete algorithms}, doi = {http://doi.acm.org/10.1145/314613.315045}, note = {A preliminary version of this paper appeared as IBM Research Report RJ 10076, May 1997}, author = {Jon M. Kleinberg} } @Article{ kleinberg:99:authoritative, journal = {Journal of the ACM}, year = {1999}, title = {Authoritative sources in a hyperlinked environment}, number = {5}, pages = {604--632}, author = {J. Kleinberg}, volume = {46} } @Article{ kleinberg:99:survey, journal = {ACM Computing Surveys (CSUR)}, year = {1999}, publisher = {ACM Press}, title = {Hubs, authorities, and communities}, number = {4es}, issn = {0360-0300}, pages = {5}, doi = {http://doi.acm.org/10.1145/345966.345982}, author = {Jon M. Kleinberg}, volume = {31} } @Article{ kobayashi:01:ir-web, journal = {ACM Computing Surveys}, year = {2000}, title = {Information Retrieval on the Web}, month = {June}, number = {2}, local-url = {/Users/paolo/Documents/Research/InternetBook/Internet book papers/Kobayashi Survey IR web ACMCS 2000.pdf}, pages = {144--173}, author = {M. Hobayashi and K. Takeda}, volume = {32} } @Article{ kohavi:97:wrappers, author = {R. Kohavi and G. John}, title = {Wrappers for feature subset selection}, journal = {Artificial Intelligence}, year = 1997, pages = {273--324} } @InProceedings{ koller:96:optimalfs, author = {D. Koller and N. Sahami}, title = {Toward Optimal Feature Selection}, booktitle = {Proceedings of the 13th International Conference on Machine Learning}, pages = {284--292}, year = 1996 } @InProceedings{ koller:97:hierarchically, author = "D. Koller and M. Sahami", title = "Hierarchically Classifying Documents Using Very Few Words", booktitle = "Proceedings of the Fourteenth International Conference on Machine Learning (ICML-97)", year = "1997", url = "http://dbpubs.stanford.edu/pub/1997-75", keywords = "categorization, clustering" } @Article{ koster:95:robots, journal = {ConneXions}, year = {1995}, url = {http://www.robotstxt.org/wc/threat-or-treat.html}, title = {Robots in the Web: threat or treat?}, month = {April}, number = {4}, author = {M. Koster}, abstract = {Robots have been operating in the World-Wide Web for over a year. In that time they have performed useful tasks, but also on occasion wreaked havoc on the networks. This paper investigates the advantages and disadvantages of robots, with an emphasis on robots used for resource discovery. New alternative resource discovery strategies are discussed and compared. It concludes that while current robots will be useful in the immediate future, they will become less effective and more problematic as the Web grows. }, volume = {9} } @InProceedings{ kraaij:02:entrypages, location = {Tampere, Finland}, year = {2002}, publisher = {ACM Press}, title = {The Importance of Prior Probabilities for Entry Page Search}, isbn = {1-58113-561-0}, pages = {27--34}, booktitle = {Proceeding of the twenty-fifth annual international conference on Research and development in information retrieval}, doi = {http://doi.acm.org/10.1145/564376.564383}, author = {Wessel Kraaij and Thijs Westerveld and Djoerd Hiemstra}, abstract = { An important class of searches on the world-wide-web has the goal to find an entry page (homepage) of an organisation. Entry page search is quite different from Ad Hoc search. Indeed a plain Ad Hoc system performs disappointingly. We explored three non-content features of web pages: page length, number of incoming links and URL form. Especially the URL form proved to be a good predictor. Using URL form priors we found over 70\% of all entry pages at rank 1, and up to 89\% in the top 10. Non-content features can easily be embedded in a language model framework as a prior probability. } } @InProceedings{ krishnamurthy:99:http1.1, year = {1999}, title = {Key Differences Between {HTTP}/1.0 and {HTTP}/1.1}, author = {Balachander Krishnamurthy and Jeffrey C. Mogul and David M. Kristol}, comments = {The HTTP/l.1 protocol is the result of four years of discussion and debate among a broad group of Web researchers and developers. It improves upon its phenomenally successful predecessor, HTTP/1.0, in numerous ways. We discuss the differences between HTTP/1.0 and HITP/l.l, as well as some of the rationale behind these changes.}, booktitle = {Proceedings of the Eighth International World-Wide Web Conference}, entered-by = {Rebecca Wesley} } @InProceedings{ kruger:00:deadliner, author = "Andries Kruger and C. Lee Giles and Frans Coetzee and Eric J. Glover and Gary William Flake and Steve Lawrence and Christian W. Omlin", title = "{DEADLINER}: Building a New Niche Search Engine.", pages = "272--281", editor = "Arvin Agah and Jamie Callan and Elke Rundensteiner", booktitle = "Proceedings of the 2000 {ACM} {CIKM} International Conference on Information and Knowledge Management ({CIKM}-00)", month = nov # "~6--11", publisher = "ACM Press", address = "N.Y.", year = "2000" } @InProceedings{ lam:98:instanceset, author = "W. Lam and C. Y. Ho", title = "Using a generalized instance set for automatic text categorization", booktitle = "Proceedings of SIGIR-98, 21st ACM International Conference on Research and Development in Information Retrieval", editor = "W. Bruce Croft and Alistair Moffat and Cornelis J. van Rijsbergen and Ross Wilkinson and Justin Zobel", publisher = "ACM Press, New York, US", year = "1998", address = "Melbourne, AU", pages = "81--89", url = "http://www.acm.org/pubs/articles/proceedings/ir/290941/p81-lam/p81-lam.pdf" } @InProceedings{ lang:95:newsweeder, author = {K. Lang}, title = {Newsweeder: Learning to filter news}, booktitle = {Proceedings of the 12th International Conference on Machine Learning}, pages = {331--339}, year = 1995 } @InProceedings{ langley:94:selection, author = {P. Langley}, title = {Selection of relevant features in machine learning}, booktitle = {Proceedings of the AAAI Fall Symposium on Relevance}, pages = {140--144}, year = 1994 } @Article{ lawrence:01:online, author = "Steve Lawrence", title = "Online or Invisible?", journal = "Nature", volume = "411", number = "6837", pages = "521", year = "2001" } @Article{ lawrence:98:inquirus, author = {S. Lawrence and C. L. Giles}, title = {Context and page analysis for improved {Web} search}, journal = {IEEE Internet Computing}, year = 1998, volume = 2, number = 4, pages = {38--46} } @Article{ lawrence:98:searching, journal = {Science}, year = {1998}, title = {Searching the World Wide Web}, local-url = {/Users/paolo/Documents/Research/Downloads/Internet book papers/NECI/search Science 1998.pdf}, pages = {98--100}, author = {Steve Lawrence and C. Lee Giles}, volume = {280} } @Article{ lawrence:99:accessibility, journal = {Nature}, year = {1999}, title = {Accessibility of Information on the Web}, number = {6740}, pages = {107--109}, author = {Steve Lawrence and C. Lee Giles}, volume = {400} } @Article{ lawrence:99:citeseer, author = "S. Lawrence and C. L. Giles and K. Bollacker", title = "Digital libraries and autonomous citation indexing", journal = "IEEE Computer", year = 1999, volume = "32", number = "6", pages = "67--71", note = "" } @InProceedings{ lawrence:99:indexing, author = {S. Lawrence and S. Bollacker and C. L. Giles}, title = {Indexing and retrieval of Scientific Literature}, booktitle = {Eighth International Conference on Information and Knowledge Management}, pages = {139--146}, year = 1999 } @Article{ lempel:01:salsa, journal = {ACM Transactions on Information Systems (TOIS)}, year = {2001}, publisher = {ACM Press}, title = {SALSA: the stochastic approach for link-structure analysis}, number = {2}, issn = {1046-8188}, pages = {131--160}, doi = {http://doi.acm.org/10.1145/382979.383041}, author = {R. Lempel and S. Moran}, abstract = { Today, when searching for information on the WWW, one usually performs a query through a term-based search engine. These engines return, as the query's result, a list of Web pages whose contents matches the query. For broad-topic queries, such searches often result in a huge set of retrieved documents, many of which are irrelevant to the user. However, much information is contained in the link-structure of the WWW. Information such as which pages are linked to others can be used to augment search algorithms. In this context, Jon Kleinberg introduced the notion of two distinct types of Web pages: hubs and authorities. Kleinberg argued that hubs and authorities exhibit a mutually reinforcing relationship: a good hub will point to many authorities, and a good authority will be pointed at by many hubs. In light of this, he dervised an algoirthm aimed at finding authoritative pages. We present SALSA, a new stochastic approach for link-structure analysis, which examines random walks on graphs derived from the link-structure. We show that both SALSA and Kleinberg's Mutual Reinforcement approach employ the same metaalgorithm. We then prove that SALSA is quivalent to a weighted in degree analysis of the link-sturcutre of WWW subgraphs, making it computationally more efficient than the Mutual reinforcement approach. We compare that results of applying SALSA to the results derived through Kleinberg's approach. These comparisions reveal a topological Phenomenon called the TKC effectwhich, in certain cases, prevents the Mutual reinforcement approach from identifying meaningful authorities. }, volume = {19} } @InProceedings{ levene:01:zipf, year = {2000}, title = {Zipfs Law for Web Surfers}, local-url = {/Users/paolo/Documents/Research/Downloads/Internet book papers/Levene Borges Loizon - Zipf's web surfers.pdf}, author = {M. Levene and J. Borges and G. Loizou}, abstract = {One of the main activities of web users, known as surfing, is to follow links. Lengthy navigation often leads to disorientation when users lose track of the context in which they are navigating and are unsure how to proceed in terms of the goal of their original query. Studying navigation patterns of web users is thus important, since it can lead us to a better understanding of the problems users face when they are surfing. We derive Zipfs rank frequency law (i.e. an inverse power law) from an absorbing Markov chain model of surfers behaviour assuming that less probable navigation trails are, on average, longer than more probable ones. In our model the probability of a trail is interpreted as the relevance (or value) of the trail. We apply our model to two scenarios: in the first the probability of a user terminating the navigation session is independent of the number of links he has followed so far, and in the second the probability of a user terminating the navigation session increases by a constant each time the user follows a link. We analyse these scenarios using two sets of experimental data sets showing that, although the first scenario is only a rough approximation of surfers behaviour, the data is consistent with the second scenario and can thus provide an explanation of surfers behaviour.} } @InProceedings{ lewis:92:naivebayes, author = {David D. Lewis}, title = {An evaluation of phrasal and clustered representations on a text categorization task}, booktitle = {Proceedings of the 15th annual international ACM SIGIR conference on Research and development in information retrieval}, year = {1992}, isbn = {0-89791-523-2}, pages = {37--50}, location = {Copenhagen, Denmark}, doi = {http://doi.acm.org/10.1145/133160.133172}, publisher = {ACM Press} } @InProceedings{ lewis:94:comparison, author = {D.D. Lewis and M. Ringuette}, title = {Comparison of two learning algorithms for text categorization}, booktitle = {Proc. 3rd Annual Symposium on Document Analysis and Information Retreval}, optpages = {}, optpublisher = {}, year = {1994} } @InProceedings{ lewis:94:heterogeneous, author = "D. D. Lewis and J. Catlett", title = "Heterogeneous uncertainty sampling for supervised learning", booktitle = "Proceedings of ICML-94, 11th International Conference on Machine Learning", editor = "W. W. Cohen and H. Hirsh", year = 1994, address = "New Brunswick, US", pages = "148--156", publisher = "Morgan Kaufmann Publishers, San Francisco, US", url = "http://www.research.att.com/~lewis/papers/lewis94e.ps", abstract = "Uncertainty sampling methods iteratively request class labels for training instances whose classes are uncertain despite the previous labeled instances. These methods can greatly reduce the number of instances that an expert need label. One problem with this approach is that the classifier best suited for an application may be too expensive to train or use during the selection of instances. We test the use of one classifier (a highly efficient probabilistic one) to select examples for training another (the C4.5 rule induction program). Despite being chosen by this heterogeneous approach, the uncertainty samples yielded classifiers with lower error rates than random samples ten times larger." } @InProceedings{ lewis:94:sequential, author = {David D. Lewis and William A. Gale}, title = {A sequential algorithm for training text classifiers}, booktitle = {Proceedings of the 17th annual international ACM SIGIR conference on Research and development in information retrieval}, year = {1994}, isbn = {0-387-19889-X}, pages = {3--12}, location = {Dublin, Ireland}, publisher = {Springer-Verlag New York, Inc.} } @Manual{ lewis:97:reuters, title = {Reuters-21578 text categorization test collection}, author = {D. D. Lewis}, month = {September}, year = 1997, note = {Documentation and data available at {http://www.daviddlewis.com/resources/testcollections/reuters21578/}} } @InProceedings{ lewis:98:atforty, author = {D. D. Lewis}, title = {Naive Bayes at Forty: The Independence Assumption in Information Retrieval}, booktitle = {Proceedings of the 10th European Conference on Machine Learning}, pages = {4--15}, year = 1998 } @InProceedings{ li:2002:improvinghits, location = {Honolulu, Hawaii, USA}, year = {2002}, publisher = {ACM Press}, title = {Improvement of HITS-based algorithms on web documents}, isbn = {1-58113-449-5}, pages = {527--535}, booktitle = {Proceedings of the eleventh international conference on World Wide Web}, doi = {http://doi.acm.org/10.1145/511446.511514}, author = {Longzhuang Li and Yi Shang and Wei Zhang}, abstract = { In this paper, we present two ways to improve the precision of HITS-based algorithms on Web documents. First, by analyzing the limitations of current HITS-based algorithms, we propose a new weighted HITS-based method that assigns appropriate weights to in-links of root documents. Then, we combine content analysis with HITS-based algorithms and study the effects of four representative relevance scoring methods, VSM, Okapi, TLS, and CDR, using a set of broad topic queries. Our experimental results show that our weighted HITS-based method performs significantly better than Bharat's improved HITS algorithm. When we combine our weighted HITS-based method or Bharat's HITS algorithm with any of the four relevance scoring methods, the combined methods are only marginally better than our weighted HITS-based method. Between the four relevance-scoring methods, there is no significant quality difference when they are combined with a HITS-based algorithm. } } @InProceedings{ lieberman:95:letizia, author = "Henry Lieberman", title = "Letizia: An Agent That Assists Web Browsing", pages = "924--929", isbn = "1-55860-363-8", editor = "Chris S. Mellish", booktitle = "Proceedings of the Fourteenth International Joint Conference on Artificial Intelligence (IJCAI-95)", month = aug, address = "Montreal, Quebec, Canada", publisher = "Morgan Kaufmann publishers Inc.: San Mateo, CA, USA", year = "1995", url = "http://lcs.www.media.mit.edu/people/lieber/Lieberary/Letizia/Letizia-AAAI/Letizia.ps" , abstract = "Letizia is a user interface agent that assists a user browsing the World Wide Web. As the user operates a conventional Web browser such as Netscape, the agent tracks user behavior and attempts to anticipate items of interest by doing concurrent, autonomous exploration of links from the user's current position. The agent automates a browsing strategy consisting of a best-first search augmented by heuristics inferring user interest from browsing behavior." } @Book{ liu:98:featureselection, author = {H. Liu and H. Motoda}, title = {Feature Selection for Knowledge Discovery and Data Mining}, publisher = {Kluwer Academic Publishers}, year = 1998 } @Article{ lovins:68:stemming, author = {J. B. Lovins}, title = {Development of a stemming algorithm}, journal = {Mechanical Translation and Computational Linguistics}, year = 1968, volume = 11, number = {1-2}, pages = {22--31} } @Article{ ml:ghahramani:jordan:97, author = {Zoubin Ghahramani and Michael I. Jordan}, title = {Factorial Hidden {M}arkov Models}, journal = {Machine Learning}, year = {1997}, optkey = {}, volume = {29}, optnumber = {}, pages = {245--274}, optmonth = {}, optnote = {}, optannote = {} } @InProceedings{ manber:90:suffixarrays, author = {Udi Manber and Gene Myers}, title = {Suffix arrays: a new method for on-line string searches}, booktitle = {Proceedings of the first annual ACM-SIAM symposium on Discrete algorithms}, year = {1990}, isbn = {0-89871-251-3}, pages = {319--327}, location = {San Francisco, California, United States}, publisher = {Society for Industrial and Applied Mathematics} } @InProceedings{ marchiori:97:hyper, year = {1997}, title = {The quest for correct information on the Web: Hyper search engines}, month = {April}, address = {Santa Clara, CA}, pages = {265-- 276}, booktitle = {Proceedings of the Sixth International World-Wide Web Conference}, author = {M. Marchiori} } @Article{ maron:61, author = {M. E. Maron}, title = {Automatic Indexing: An Experimental Inquiry}, journal = {Journal of the ACM (JACM)}, volume = {8}, number = {3}, year = {1961}, issn = {0004-5411}, pages = {404--417}, doi = {http://doi.acm.org/10.1145/321075.321084}, publisher = {ACM Press} } @Article{ maron:77, author = {M. Maron}, title = {On indexing, retrieval and the meaning of about}, journal = {Journal of the American Society for Information Science}, year = {1977}, volume = {28}, optnumber = {}, optpages = {} } @Article{ maron:82, author = {M. Maron}, title = {Associative search techniques versus probabalistic retrieval models}, journal = {Journal of the American Society for Information Science}, year = {1982}, volume = {33}, optnumber = {}, pages = {308--310} } @Article{ mccallum:2000:construction, journal = {Information Retrieval}, year = {2000}, url = {http://www.wkap.nl/article.pdf?266400}, title = {Automating the Construction of Internet Portals with Machine Learning}, number = {2}, issn = {1386-4564}, pages = {127--163}, publisher = {Kluwer Academic Publishers}, author = {Andrew K. McCallum and Kamal Nigam and Jason Rennie and Kristie Seymore}, abstract = {Domain-specific internet portals are growing in popularity because they gather content from the Web and organize it for easy access, retrieval and search. For example, www.campsearch.com allows complex queries by age, location, cost and specialty over summer camps. This functionality is not possible with general, Web-wide search engines. Unfortunately these portals are difficult and time-consuming to maintain. This paper advocates the use of machine learning techniques to greatly automate the creation and maintenance of domain-specific Internet portals. We describe new research in reinforcement learning, information extraction and text classification that enables efficient spidering, the identification of informative text segments, and the population of topic hierarchies. Using these techniques, we have built a demonstration system: a portal for computer science research papers. It already contains over 50,000 papers and is publicly available at www.cora.justresearch.com. These techniques are widely applicable to portal creation in other domains.}, volume = {3} } @InProceedings{ mccallum:98:comparison, author = {A. McCallum and K. Nigam}, title = {A comparison of event models for naive {B}ayes text classification}, booktitle = {AAAI/ICML-98 Workshop on Learning for Text Categorization}, pages = {41--48}, publisher = {AAAI Press}, year = {1998} } @Book{ mcclelland:86:pdb, author = {James L. McClelland and David E. Rumelhart}, address = {Cambridge, MA}, publisher = {MIT Press}, title = {Parallel Distributed Processing: {E}xplorations in the Microstructure of Cognition.}, year = {1986} } @Article{ melnik:01:distributed, journal = {ACM Transactions on Information Systems (TOIS)}, year = {2001}, doi = {http://doi.acm.org/10.1145/502115.502116}, title = {Building a distributed full-text index for the web}, number = {3}, issn = {1046-8188}, local-url = {c}, pages = {217--241}, publisher = {ACM Press}, author = {S. Melnik and S. Raghavan and B. Yang and H. Garcia-Molina}, volume = {19} } @InProceedings{ melnik:01:distributed-www, keywords = {Text-index Web WebBase}, year = {2001}, title = {Building a Distributed Full-Text Index for the Web}, author = {S. Melnik and S. Raghavan and B. Yang and H. Garcia-Molina}, booktitle = {Proceedings of the Tenth International World-Wide Web Conference}, entered-by = {Sriram Raghavan} } @Article{ menczer:00:agents, journal = {Machine Learning}, year = {2000}, title = {Adaptive Retrieval Agents: Internalizing Local Context and Scaling up to the Web}, number = {2/3}, pages = {203--242}, publisher = {Kluwer Academic Publishers, Boston}, author = {Filippo Menczer and Richard K. Belew}, volume = {39} } @InProceedings{ menczer:01:evaluating, year = {2001}, title = {Evaluating {Topic-Driven} Web Crawlers}, editor = {W. Bruce Croft and David J. Harper and Donald H. Kraft and Justin Zobel}, month = {~9--13}, address = {New York}, pages = {241--249}, booktitle = {Proceedings of the 24th Annual International {ACM} {SIGIR} Conference on Research and Development in Information Retrieval ({SIGIR}-01)}, publisher = {ACM Press}, author = {Filippo Menczer and Gautam Pant and Padmini Srinivasan and Miguel Ruiz} } @InProceedings{ menczer:97:arachnid, year = {1997}, url = {http://dollar.biz.uiowa.edu/~fil/Papers/ICML.ps}, title = {{ARACHNID}: Adaptive Retrieval Agents Choosing Heuristic Neighborhoods for Information Discovery}, pages = {227--235}, booktitle = {Machine Learning: Proceedings of the Fourteenth International Conference}, author = {Filippo Menczer} } @Book{ minc:73:nonnegative, year = {1973}, title = {Nonnegative Matrices}, author = {H. Minc}, publisher = {John Wiley and Sons}, address = {New York} } @Book{ mitchell:97, author = {T. Mitchell}, title = {Machine Learning}, publisher = {McGraw-Hill}, year = {1997} } @Article{ moffat:96:invertedfiles, author = {A. Moffat and J. Zobel}, title = {Self-indexing inverted files for fast text retrieval}, journal = {ACM Transactions on Information Systems}, year = 1996, volume = 14, number = 4, pages = {349--379} } @TechReport{ mogul:99:duplicates, year = {1999}, title = {A trace-based analysis of duplicate suppression in HTTP,.}, number = {99/2,}, institution = {Compaq Western Research Laboratory,}, author = {Jeffrey C. Mogul}, note = {Available from \url{http://www.research.digital.com/wrl/techreports/abstracts/99.2.html}} } @Article{ mori:92:ocr, author = {S. Mori and C. Suen and K. Yamamoto}, title = {Historical review of {OCR} research and development}, journal = {Proceedings of the IEEE}, year = 1992, volume = 80, pages = {1029--1058} } @InProceedings{ moulinier:96:symbolic, author = {I. Moulinier G. Raskinis and J. Ganascia}, title = {Text categorization: a symbolic approach}, booktitle = {Proceedings of the Fifth Annual Symposium on Document Analysis and Information Retrieval}, year = 1996 } @InProceedings{ moura:97:indexing, author = "E. S. Moura and G. Navarro and N. Ziviani", title = "Indexing compressed text", booktitle = "Proceedings of the 4th South American Workshop on String Processing", editor = "R. Baeza-Yates", volume = "", publisher = "Carleton University Press International Informatics Series", pages = "95-111", year = 1997 } @InProceedings{ najork:01:bfs, year = {2001}, title = {Breadth-first search crawling yields high-quality pages}, booktitle = {Proceedings of the Tenth International World Wide Web Conference}, author = {M. Najork and J.L. Wiener} } @TechReport{ neville-manning:96:prescript, author = {C. Neville-Manning and T. Reed}, title = {A Postscript to Plain Text Converter}, year = {1996}, note = {\url{http://www.nzdl.org/html/prescript.html}} } @InProceedings{ ng:01:discriminativegenerative, author = {A. Y. Ng and M. I. Jordan}, title = {On discriminative vs. generative classifiers: A comparison of logistic regression and naive Bayes.}, booktitle = {Advances in Neural Information Processing Systems 14}, year = {2002} } @InProceedings{ ng:01:linkanalysis, location = {New Orleans, Louisiana, United States}, year = {2001}, publisher = {ACM Press}, title = {Stable algorithms for link analysis}, isbn = {1-58113-331-6}, pages = {258--266}, booktitle = {Proceedings of the 24th annual international ACM SIGIR conference on Research and development in information retrieval}, doi = {http://doi.acm.org/10.1145/383952.384003}, author = {Andrew Y. Ng and Alice X. Zheng and Michael I. Jordan} } @InProceedings{ ng:01:prank, year = {2001}, title = {Link Analysis, Eigenvectors and Stability}, local-url = {/Users/paolo/Documents/Research/Downloads/Internet book papers/Ng Zheng Jordan link analysis ijcai01.pdf}, booktitle = {Seventh International Joint Conference on Artificial Intelligence}, author = {A.Y. Ng and A.X. Zheng and M.I. Jordan}, abstract = {The HITS and the PageRank algorithms are eigenvector methods for identifying authoritative or influential articles, given hyperlink or citation information. That such algorithms should give consistent answers is surely a desideratum, and in this paper, we address the question of when they can be expected to give stable rankings under small perturbations to the hyperlink patterns. Using tools from matrix perturbation theory and Markov chain theory, we provide conditions under which these methods are stable, and give specific examples of instability when these conditions are violated. We also briefly describe a modification to HITS that improves its stability.The HITS and the PageRank algorithms are eigenvector methods for identifying authoritative or influential articles, given hyperlink or citation information. That such algorithms should give consistent answers is surely a desideratum, and in this paper, we address the question of when they can be expected to give stable rankings under small perturbations to the hyperlink patterns. Using tools from matrix perturbation theory and Markov chain theory, we provide conditions under which these methods are stable, and give specific examples of instability when these conditions are violated. We also briefly describe a modification to HITS that improves its stability.} } @InProceedings{ nigam:00:cotraining, author = "Kamal Nigam and Rayid Ghani", title = "Analyzing the Effectiveness and Applicability of Co-training.", pages = "86--93", editor = "Arvin Agah and Jamie Callan and Elke Rundensteiner", booktitle = "Proceedings of the 2000 {ACM} {CIKM} International Conference on Information and Knowledge Management ({CIKM}-00)", month = nov # "~6--11", publisher = "ACM Press", address = "N.Y.", year = "2000" } @Article{ nigam:00:em, author = {K. Nigam and A. {McCallum} and S. Thrun and T. Mitchell}, title = {Text Classification from Labeled and Unlabeled Documents using {EM}}, journal = {Machine Learning}, volume = {39}, number = {2/3}, year = {2000}, pages = {103--134} } @Misc{ oneill:97:sampling, author = {E. T. O'Neill and P. D. {McClain} and B. F. Lavoie}, title = {A methodology for sampling the {World Wide Web}}, howpublished = {Annual Review of OCLC Research}, year = 1997, note = {\url{http://www.oclc.org/research/publications/arr/1997/oneill/o\%27neillar980213.htm}} } @Misc{ opendirectoryproject, author = {{The Open Directory Project}}, note = {\url{http://dmoz.org}} } @TechReport{ page:98:pagerank, year = {1998}, title = {The Pagerank citation ranking: Bringing order to the web}, institution = {Stanford University}, author = {L. Page and S. Brin and R. Motwani and T. Winograd}, note = {Available at \url{http://iwww-db.stanford.edu/~backrub/pageranksub.ps}} } @InProceedings{ papineni:01:idf, author = {K. Papineni}, title = {Why inverse document frequency?}, booktitle = {Proceedings of the North American Association for Computational Linguistics}, year = {2001}, pages = {25--32} } @InProceedings{ passerini:02:multiclass, year = {2002}, title = {From Margins to Probabilities in Multiclass Learning Problems}, editor = {F. van Harmelen}, booktitle = {Proc. 15th European Conf. on Artificial Intelligence}, author = {A. Passerini and M. Pontil and P. Frasconi} } @InProceedings{ pazzani:96:dependencies, author = {M.J. Pazzani}, title = {Searching for dependencies in Bayesian classifiers}, optcrossref = {}, optkey = {}, booktitle = {Proceedings of the Fifth International Workshop on Artificial Intelligence and Statistics}, pages = {239--248}, year = {1996}, opteditor = {}, optvolume = {}, optnumber = {}, optseries = {}, optaddress = {}, optmonth = {}, optorganization={}, publisher = {Springer-Verlag}, optnote = {}, optannote = {} } @Book{ pearl:88:probabilistic-reasoning, author = {Judea Pearl}, title = {Probabilistic Reasoning in Intelligent Systems : Networks of Plausible Inference}, publisher = {Morgan Kaufmann}, year = {1988} } @Book{ pearl:88:gm, author = {J. Pearl}, title = {Probabilistic Inference in Intelligent Systems. Networks of Plausible Inference}, publisher = {Morgan Kaufmann}, address = {San Mateo, CA}, year = {1988} } @Article{ pennock:02:winners, journal = {Proceedings of the National Academy of Sciences}, year = {2002}, title = {Winners Don't Take All: Characterizing the Competition for Links on the Web}, number = {8}, local-url = {/Users/paolo/Documents/Research/Downloads/Internet book papers/Lawrence/Winners dont take all pnas 2002.pdf}, pages = {5207--5211}, author = {D. M. Pennock and G. W. Flake and S. Lawrence and E. J. Glover and C. L. Giles}, volume = {99} } @Misc{ pew:02:engines, month = {July}, year = {2002}, title = {Search Engines}, author = {{Pew Internet Project Report}}, note = {Available at \url{http://www.pewinternet.org/reports/toc.asp?Report=64}} } @InProceedings{ platt99, author = {J. Platt}, title = {Fast Training of Support Vector Machines using Sequential Minimal Optimization}, publisher = {{MIT} Press}, year = {1999}, pages = {185--208}, booktitle = {Advances in Kernel Methods --- Support Vector Learning}, editor = {B. Sch{\"o}lkopf and C. J. C.~Burges and A. J.~Smola}, address = {Cambridge, MA} } @Article{ porter:80:stemming, author = "M.F. Porter", title = "An algorithm for suffix stripping", journal = "Program", volume = "14", year = "1980", pages = "130--137" } @Article{ quinlan:86:decisiontrees, author = {J.R. Quinlan}, year = {1986}, title = {Induction of Decision Trees}, journal = {Machine Learning}, volume = {1}, pages = {81--106}, publisher = {Kluwer Academic Publishers, Boston} } @Article{ quinlan:90:foil, author = "J. R. Quinlan", title = "Learning logical definitions from relations.", journal = "Machine Learning", volume = "5", number = "3", pages = "239--266", month = "?", year = "1990", keywords = "rule, rules, DTree" } @InProceedings{ rafiei:00:reputation, author = {D. Rafiei and A. Mendelzon}, title = {What is this Page Known for? Computing Web Page Reputations}, booktitle = {Proc. of the 9th World Wide Web Conference}, year = {2000} } @Proceedings{ raggett:99:html, month = {December}, year = {1999}, url = {http://www.w3.org/TR/html4/}, title = {{HTML 4.01 Specification}}, editor = {Dave Raggett and Arnaud Le Hors and Ian Jacobs}, publisher = {W3 Consortium Recommendation}, note = {Available from \url{http://www.w3.org/TR/html4/}} } @InProceedings{ raghavan:01:hidden, year = {2001}, title = {Crawling the Hidden Web}, address = {Orlando}, isbn = {1-55860-804-4}, pages = {129--138}, booktitle = {Proceedings of the 27th International Conference on Very Large Data Bases({VLDB} '01)}, publisher = {Morgan Kaufmann}, author = {S. Raghavan and H. Garcia-Molina} } @InProceedings{ rennie:99:reinforcement, year = {1999}, url = {http://www.watson.org/~jrennie/papers/icml99.ps.gz}, title = {Using reinforcement learning to spider the {W}eb efficiently}, address = {Bled, SL}, editor = {Ivan Bratko and Saso Dzeroski}, pages = {335--343}, booktitle = {Proceedings of ICML-99, 16th International Conference on Machine Learning}, publisher = {Morgan Kaufmann Publishers, San Francisco, US}, author = {Jason Rennie and Andrew Kachites McCallum}, abstract = {Consider the task of exploring the Web in order to find pages of a particular kind or on a particular topic. This task arises in the construction of search engines and Web knowledge bases. The paper argues that the creation of efficient Web spiders is best framed and solved by reinforcement learning, a branch of machine learning that concerns itself with optimal sequential decision making. One strength of reinforcement learning is that it provides a formalism for measuring the utility of actions that give benefit only in the future. We present an algorithm for learning a value function that maps hyperlinks to future discounted reward using a naive Bayes text classifier. Experiments on two real-world spidering tasks show a three-fold improvement in spidering efficiency over traditional breadth-first search, and up to a two-fold improvement over reinforcement learning with immediate reward only.} } @InProceedings{ ribeiro-neto:99:distributed, author = {Berthier Ribeiro-Neto and Edleno S. Moura and Marden S. Neubert and Nivio Ziviani}, title = {Efficient distributed algorithms to build inverted files}, booktitle = {Proceedings of the 22nd annual international ACM SIGIR conference on Research and development in information retrieval}, year = {1999}, isbn = {1-58113-096-1}, pages = {105--112}, location = {Berkeley, California, United States}, doi = {http://doi.acm.org/10.1145/312624.312663}, publisher = {ACM Press} } @Article{ robertson:76:bir, author = {S.E. Robertson and K. {Sparck Jones}}, title = {Relevance weighting of search terms}, journal = {Journal of the Americal Society for Information Science}, year = {1976}, pages = {129--146}, month = {May--June} } @Article{ robertson:77, author = "Stephen E. Robertson", title = "The probability ranking principle in {IR}", year = "1977", journal = "Journal of Documentation", volume = "33", pages = "294--304", note = "{Also reprinted in \cite{SparckJones:97}, pp. 281--286.}" } @Article{ robertson:77:prp, author = {S.E. Robertson}, title = {The probability ranking principle in {IR}}, journal = {Journal of Documentation}, year = {1977}, volume = {33}, number = {4}, pages = {294--304} } @InProceedings{ robertson:94:2poisson, author = {S. E. Robertson and S. Walker}, title = {Some simple effective approximations to the 2-Poisson model for probabilistic weighted retrieval}, booktitle = {Proceedings of the 17th annual international ACM SIGIR conference on Research and development in information retrieval}, year = {1994}, isbn = {0-387-19889-X}, pages = {232--241}, location = {Dublin, Ireland}, publisher = {Springer-Verlag New York, Inc.} } @Article{ rosenblatt:58:perceptron, author = {F. Rosenblatt}, title = {The perceptron: A probabilistic model for information storage and organization in the brain}, journal = {Psychological Review}, volume = {65}, pages = {386--408}, year = {1958} } @Book{ russell:95:ai, year = {1995}, title = {Artificial Intelligence: A Modern Approach}, publisher = {Prentice Hall}, author = {S. Russell and P. Norvig} } @InProceedings{ sahami:98:junk, author = {M. Sahami and S. Dumais and D. Heckerman and E. Horvitz}, title = {A Bayesian approach to filtering junk e-mail}, booktitle = {AAAI-98 Workshop on Learning for Text Categorization}, pages = {55--62}, year = 1998 } @Book{ salton:71:smart, author = {G. Salton}, title = {The SMART Retrieval System--Experiments in Automatic Document Processing}, publisher = {Prentice-Hall}, address = {Englewood Cliffs, N.J.}, year = {1971} } @Article{ salton:83:extended, author = {Gerard Salton and Edward A. Fox and Harry Wu}, title = {Extended Boolean information retrieval}, journal = {Communications of the ACM}, volume = {26}, number = {11}, year = {1983}, issn = {0001-0782}, pages = {1022--1036}, doi = {http://doi.acm.org/10.1145/182.358466}, publisher = {ACM Press} } @Book{ salton:83:ir, author = {G. Salton and M. J. McGill}, title = {Introduction to modern information retrieval}, publisher = {McGraw-Hill}, address = {New York}, year = {1983} } @InCollection{ saul:97:aggregate, author = "Lawrence Saul and Fernando Pereira", title = "Aggregate and Mixed-Order {M}arkov Models for Statistical Language Processing", booktitle = "Proceedings of the Second Conference on Empirical Methods in Natural Language Processing", publisher = "Association for Computational Linguistics", year = "1997", editor = "Claire Cardie and Ralph Weischedel", pages = "81--89", address = "Somerset, New Jersey", topic = "empirical-methods-in-nlp;corpus-statistics; n-gram-models;word-sequence-probabilities;" } @Article{ schapire:00:boostexter, author = {R. E. Schapire and Y. Freund}, title = {BoosTexter: A boosting-based system for text categorization}, journal = {Machine Learning}, volume = {39}, number = {2/3}, year = {2000}, pages = {135--168} } @Book{ schoelkopf:02:kernels, author = "B. Schoelkopf and A.J. Smola", title = "Learning with Kernels", publisher = "The MIT Press", year = "2002", address = "Cambridge, MA" } @Article{ sebastiani:02:survey, author = {F. Sebastiani}, title = {Machine learning in automated text categorization}, journal = {ACM Computing Surveys (CSUR)}, volume = {34}, number = {1}, year = {2002}, issn = {0360-0300}, pages = {1--47}, doi = {http://doi.acm.org/10.1145/505282.505283}, publisher = {ACM Press} } @Book{ seneta:81:nonnegative, author = {E. Seneta}, title = {Nonnegative Matrices and Markov Chains}, publisher = {Springer}, year = {1981}, address = {New York} } @InProceedings{ silva:00:beliefnets, location = {Athens, Greece}, year = {2000}, publisher = {ACM Press}, title = {Link-based and content-based evidential information in a belief network model}, isbn = {1-58113-226-3}, pages = {96--103}, booktitle = {Proceedings of the 23rd annual international ACM SIGIR conference on Research and development in information retrieval}, doi = {http://doi.acm.org/10.1145/345508.345554}, author = {Ilm{\'e}rio Silva and Berthier Ribeiro-Neto and P{\'a}vel Calado and Edleno Moura and N{\'i}vio Ziviani}, abstract = {This work presents an information retrieval model developed to deal with hyperlinked environments. The model is based on belief networks and provides a framework for combining information extracted from the content of the documents with information derived from cross-references among the documents. The information extracted from the content of the documents is based on statistics regarding the keywords in the collection and is one of the basis for traditional information retrieval (IR) ranking algorithms. The information derived from cross-references among the documents is based on link references in a hyperlinked environment and has received increased attention lately due to the success of the Web. We discuss a set of strategies for combining these two types of sources of evidential information and experiment with them using a reference collection extracted from the Web. The results show that this type of combination can improve the retrieval performance without requiring any extra information from the users at query time. In our experiments, the improvements reach up to 59\% in terms of average precision figures. } } @Article{ small:73:cocitation, author = {H.G. Small}, title = {Co-citation in the Scientific Literature: A New Measure of the Relationship Between Two Documents}, journal = {Journal of the American Society for Information Science}, volume = {24}, pages = {265--269}, year = {1973} } @Book{ sparckjones:97, editor = "Karen Sparck Jones and Peter Willett", title = "Readings in information retrieval", year = "1997", publisher = "Morgan Kaufmann", address = "San Mateo, US" } @Proceedings{ sperberg-mcqueen:02:tei, year = {2002}, url = {http://www.tei-c.org/}, title = {{TEI P4: Guidelines for Electronic Text Encoding and Interchange}}, address = {Oxford, Providence, Charlottesville, Bergen}, editor = {C.M. Sperberg-McQueen and Lou Burnard}, publisher = {Text Encoding Initiative Consortium}, note = {Available from \url{http://www.tei-c.org/}} } @Article{ stallman:97:right, author = {R. Stallman}, title = {The Right to Read}, journal = {Communications of the ACM}, year = 1997, volume = 40, number = 2 } @Book{ sutton:98:rl, year = {1998}, title = {Reinforcement Learning: An Introduction}, address = {Cambridge, MA}, publisher = {MIT Press}, author = {R. S. Sutton and A. G. Barto} } @Article{ tnn:bengio:frasconi:96:input-output-hmms, author = {Y. Bengio and P. Frasconi}, title = {Input-Output {HMM's} for Sequence Processing}, journal = {IEEE Trans. on Neural Networks}, type = {Paper}, year = {1996}, volume = {7}, pages = {1231--1249} } @Book{ vapnik82, author = "V. N. Vapnik", title = "Estimation of Dependences Based on Empirical Data", publisher = "Springer-Verlag", address = "Berlin", year = 1982 } @Book{ vapnik:98:slt, author = "V. N. Vapnik", title = "Statistical Learning Theory", publisher = "Wiley", address = "New York", year = 1998 } @InProceedings{ voorhees:01:hirelevant, location = {New Orleans, Louisiana, United States}, year = {2001}, publisher = {ACM Press}, title = {Evaluation by highly relevant documents}, isbn = {1-58113-331-6}, pages = {74--82}, booktitle = {Proceedings of the 24th annual international ACM SIGIR conference on Research and development in information retrieval}, doi = {http://doi.acm.org/10.1145/383952.383963}, author = {Ellen M. Voorhees}, abstract = { Given the size of the web, the search engine industry has argued that engines should be evaluated by their ability to retrieve highly relevant pages rather than all possible relevant pages. To explore the role highly relevant documents play in retrieval system evaluation, assessors for the \mbox{TREC-9} web track used a three-point relevance scale and also selected best pages for each topic. The relative effectiveness of runs evaluated by different relevant document sets differed, confirming the hypothesis that different retrieval techniques work better for retrieving highly relevant documents. Yet evaluating by highly relevant documents can be unstable since there are relatively few highly relevant documents. TREC assessors frequently disagreed in their selection of the best page, and subsequent evaluation by best page across different assessors varied widely. The discounted cumulative gain measure introduced by J\"{a}rvelin and Kek\"{a}l\"{a}inen increases evaluation stability by incorporating all relevance judgments while still giving precedence to highly relevant documents. } } @InProceedings{ walker:02:linksandpower, author = {Jill Walker}, title = {Links and power: the political economy of linking on the Web}, booktitle = {Proceedings of the thirteenth conference on Hypertext and hypermedia}, year = {2002}, isbn = {1-58113-477-0}, pages = {72--73}, location = {College Park, Maryland, USA}, doi = {http://doi.acm.org/10.1145/513338.513358}, publisher = {ACM Press} } @Book{ wall:96:perl, author = {L. Wall and T. Christiansen and R. L. Schwartz}, title = {Programming Perl}, publisher = {O'Reilly \& Associates}, year = 1996, edition = {2nd} } @Book{ wasserman:94:social, year = {1994}, title = {Social Network Analysis}, address = {Cambridge}, publisher = {Cambridge University Press}, author = {S. Wasserman and K. Faust} } @Article{ weiss:99:maximizing, author = {S. M. Weiss and C. Apte and F. J. Damerau and D. E. Johnson and F. J. Oles and T. Goetz and T. Hampp}, title = {Maximizing Text-Mining Performance}, journal = {IEEE Intelligent Systems}, year = 1999, volume = 14, number = 4, pages = {63--69} } @InProceedings{ wiener:95:neural, author = "Erik D. Wiener and Jan O. Pedersen and Andreas S. Weigend", title = "A neural network approach to topic spotting", booktitle = "Proceedings of SDAIR-95, 4th Annual Symposium on Document Analysis and Information Retrieval", year = "1995", address = "Las Vegas, US", pages = "317--332", url = "http://www.stern.nyu.edu/~aweigend/Research/Papers/TextCategorization/Wiener.Pedersen.Weigend_SDAIR95.ps" , abstract = "This paper presents an application of nonlinear neural networks to topic spotting. Neural networks allow us to model higher-order interaction between document terms and to simultaneously predict multiple topics using shared hidden features. In the context of this model, we compare two approaches to dimensionality reduction in representation: one based on term selection and another based on Latent Semantic Indexing (LSI). Two different methods are proposed for improving LSI representations for the topic spotting task. We find that term selection and our modified LSI representations lead to similar topic spotting performance, and that this performance is equal to or better than other published results on the same corpus." } @InProceedings{ witten:96:dl, author = {I. H. Witten and C. Neville-Manning and S. J. Cunningham}, title = {Building a digital library for computer science research: technical issues}, booktitle = {Proceedings Australasian Computer Science Conference}, year = 1996, address = {Melbourne, Australia} } @Book{ witten:99:gigabytes, author = {I. H. Witten and A. Moffat and T. C. Bell}, title = {Managing gigabytes (2nd ed.): compressing and indexing documents and images}, year = {1999}, isbn = {1-55860-570-3}, publisher = {Morgan Kaufmann Publishers Inc.} } @InProceedings{ worf:02:optimalcrawling, year = {2002}, title = {Optimal Crawling Strategies for Web Search Engines}, booktitle = {Proceedings of the Eleventh International World Wide Web Conference}, author = {J.L. Wolf and M.S. Squillante and P.S. Yu and J. Sethuraman and L. Ozsen} } @Article{ yang:02:hypertextcategorization, author = {Y. Yang and S. Slattery and R. Ghani}, title = {A study of approaches to hypertext categorization}, journal = {Journal of Intelligent Information Systems}, year = {2002}, volume = 18, number = 2, pages = {219--241} } @InProceedings{ yang:97:comparative, author = {Y. Yang and J. Pedersen}, title = {A comparative study on feature selection in text categorization}, booktitle = {Proceedings of the Fourteenth International Conference on Machine Learning}, pages = {412--420}, year = 1997 } @Article{ yang:99:evaluation, author = {Y. Yang}, title = {An evaluation of statistical approaches to text categorization}, journal = {Information Retrieval}, volume = 1, number = {1/2}, year = {1999}, pages = {69--90} } @InProceedings{ yang:99:reexamination, author = "Yiming Yang and Xin Liu", title = "A re-examination of text categorization methods", booktitle = "Proceedings of SIGIR-99, 22nd ACM International Conference on Research and Development in Information Retrieval", editor = "Marti A. Hearst and Fredric Gey and Richard Tong", publisher = "ACM Press, New York, US", address = "Berkeley, US", year = "1999", pages = "42--49", url = "http://www.cs.cmu.edu/~yiming/papers.yy/sigir99.ps" } @InProceedings{ zelikovitz:01:lsiforcategorization, author = {S. Zelikovitz and H. Hirsh}, title = {Using LSI for Text Classification in the Presence of Background Text}, booktitle = {Proceedings for the Conference on Information and Knowledge Management}, year = 2001 } @InProceedings{ zhang:00:unlabeled, author = {T. Zhang and F. J. Oles}, title = {A probability analysis on the value of unlabeled data for classification problems}, booktitle = {Proceedings of the International Conference on Machine Learning}, pages = {1191--1198}, year = 2000 } @InCollection{ abello98, author = "J. Abello and A. Buchsbaum and J. Westbrook", title = "A functional approach to external graph algorithms", booktitle = "Proceedings of the 6th European Symposium on Algorithms", volume = "", number = "", pages = "332-343", year = 1998 } @Article{ abumostafa89, author = "Y. S. Abu-Mostafa", title = "The Vapnik-Chervonenkis Dimension: Information versus Complexity in Learning", journal = "Neural Computation", year = 1989, volume = "1", number = "3", pages = "312-317" } @Book{ achacoso92, author = "T. B. Achacoso and W. S. Yamamoto", title = "AY's Neuroanatomy of {\it C. elegans} for Computation", publisher = "CRC Press", address = "Boca Raton, FL", year = 1992, pages = "" } @Article{ ackley85, author = "D. H. Ackley and G. E. Hinton and T. J. Sejnowski", title = "A learning algorithm for {B}oltzmann machines", journal = "Cognitive Science", year = 1985, volume = "9", number = "", pages = "147-169" } @Book{ aczel75, author = "J. Aczel and Z. Daroczy", title = "On measures of information and their characterizations", year = 1975, publisher = "Academic Press", address = "New York" } @Article{ adamic01, author = "L.A. Adamic and R. M. Lukose and A. R. Puniyani and B. A. Huberman", title = "Search in power-law networks", journal = "Phys. Rev. E", year = 2001, volume = "64", number = "", pages = "46135" } @Book{ aho86, author = "A. V. Aho and R. Sethi and J. D. Ullman", title = "Compilers. Principles, Techniques, and Tools", publisher = "Addison-Wesley", address = "Reading, MA", year = 1986, pages = "" } @Article{ aiello02, author = "W. Aiello and F. Chung and L. Lu", title = "A random graph model for power law graphs", journal = "", volume = "", number = "", pages = "", note = "Technical Report", year = 2002 } @Article{ aji00, author = "S. M. Aji and R. J. McEliece", title = "The generalized distributive law", journal = "IEEE Transactions on Information Theory", year = 2000, volume = "46", number = "2", pages = "325-343" } @Unpublished{ aji97, author = "S. M. Aji and R. J. McEliece", title = "The generalized distributive law", note = "Technical Report, Department of Electrical Engineering, California Institute of Technology", year = 1997 } @Article{ akaike74, author = "H. Akaike", title = "A new look at the statistical model identification", journal = "IEEE Transactions on Automatic Control", year = 1974, volume = "19", number = "6", pages = "716-723" } @Article{ albert00, author = {R. Albert and A. Barab\'asi}, title = {Topology of evolving networks: local events and universality}, type = {}, journal = {Physical Review Letters}, volume = {85}, number = {}, pages = {5234-5237}, year = {2000} } @Article{ albert00a, author = "R. Albert and H. Jeong and A. Barabsi", title = "Error and attack tolerance of complex networks", journal = "Nature", year = 2000, volume = "406", number = "", pages = "378-382" } @Article{ albert99, author = {R. Albert and H. Jeong and A. Barab\'asi}, title = {Diameter of the {W}orld-{W}ide {W}eb}, type = {}, journal = {Nature}, volume = {401}, number = {}, pages = {130}, year = {1999} } @Article{ amaral00, author = "L. A. N. Amaral and A. Scala and M. Barth\'el\'emy and H. E. Stanley", title = "Classes of small-world networks", journal = "Proceedings of the National Academy of Sciences", volume = "97", number = "21", pages = "11149-11152", year = 2000 } @Article{ amari93, author = "S. Amari and N. Murata", title = "Statistical theory of learning curves under entropic loss criterion", journal = "Neural Computation", year = 1993, volume = "5", pages = "140-153" } @Article{ amari98, author = "S. Amari", title = "Natural gradient works efficiently in learning", journal = "Neural Computation", year = 1998, volume = "10", number = "", pages = "251-276" } @InProceedings{ anderson01, author = "C. R. Anderson and P. Domingos and D. Weld", title = "Adaptive {W}eb navigation for wireless devices", booktitle = "Proceedings of the 17th International Joint Conference on Artificial Intelligence", volume = "", publisher = "Morgan Kaufmann", address = "San Francisco, CA", number = "", pages = "879--884", year = 2001 } @InCollection{ anderson01, author = "C. R. Anderson and P. Domingos and D. Weld", title = "Adaptive {W}eb navigation for wireless devices", booktitle = "Proceedings.of the 17th International Joint Conference on Artificial Intelligence", volume = "", publisher = "Morgan Kaufmann", address = "San Francisco, CA", number = "", pages = "879--884", year = 2001 } @InProceedings{ anderson02, author = "C. R. Anderson and P. Domingos and D. Weld", title = "Relational Markov Models and their Application to Adaptive Web Navigation", booktitle = "Proceedings of the Eighth International Conference on Knowledge Discovery and Data Mining", pages = "143-152", year = "2002", publisher = "ACM Press" } @InCollection{ anderson95, author = "Charles H. Anderson", title = "Unifying perspectives on neuronal codes and processing", booktitle = "Proceedings of the XIX International Workshop on Condensed Matter Theories", volume = "", editor = "", publisher = "", address = "Caracas, Venezuela", year = 1995 } @Article{ ansari02, author = "A. Ansari and C. Mela", title = "E-Customization", journal = "Journal of Marketing Research", year = 2002, volume = "", number = "", pages = "", note = "To appear" } @Article{ ansari02, author = "A. Ansari and C. Mela", title = "E-Customization", journal = "Journal of Marketing Research", year = 2002, volume = "", number = "", pages = "", note = "To appear" } @InProceedings{ appelt95, author = "D. Appelt and J. Hobbs and J. Bear and D. Israel and M. Kameyama and A. Kehler and D. Martin and K. Meyers and M. Tyson", title = "SRI International FASTUS system: MUC-6 test results and analysis", booktitle = "Proceedings of the Sixth Message Underdstanding Conference (MUC-6)", publisher = "Morgan Kaufmann", address = "San Francisco, CA", year = "1995", pages = "237-248", url = "citeseer.nj.nec.com/appelt95sri.html" } @Article{ arnold89, author = "B. C. Arnold and S. J. Press", title = "Compatible conditional distributions", journal = "Journal of the American Statistical Association", year = 1989, volume = "84", number = "", pages = "152-156" } @Book{ baldibrunak01, author = "P. Baldi and S. Brunak", title = "Bioinformatics: the machine learning approach", year = 2001, publisher = "MIT Press", address = "Cambridge, MA", note = "Second edition." } @InCollection{ baldiinfo02, author = "P. Baldi", title = "A computational theory of surprise", booktitle = "Information, Coding, and Mathematics", editor = "M. Blaum and P. G. Farrell and H. C. A. van Tilborg", publisher = "Kluwer Academic Publishers", pages = "1-25", address = "Boston", year = 2002 } @Article{ baik99, author = "J. Baik and P. Deift and K. Johansson", title = "On the distribution of the length of the longest increasing subsequence of random permutations", journal = "Journal of the American Mathematical Society", year = 1999, volume = "12", number = "4", pages = "1119-1178" } @Article{ ball92, author = "F. G. Ball and J. A. Rice", title = "Stochastic models for ion channels: introduction and bibliography", journal = "Mathematical Bioscience", year = 1992, volume = "112", number = "2", pages = "189-206 " } @Article{ baluja00, author = "Shumeet Baluja and Vibhu Mittal and Rahul Sukthankar", title = "{Applying Machine Learning for High Performance Named-Entity Extraction}", journal = "Computational Intelligence", year = "2000", month = "November", volume = "16", issue = "4", url = "citeseer.nj.nec.com/article/baluja00applying.html" } @Article{ barabasi99a, author = {A. Barab\'asi and R. Albert}, title = {Emergence of scaling in random networks}, type = {}, journal = {Science}, volume = {286}, number = {}, pages = {509-512}, year = {1999} } @Article{ barabasi99b, author = {A. Barab\'asi and R. Albert and H. Jeong}, title = {Mean-field theory for scale-free random networks}, type = {}, journal = {Physica A}, volume = {272}, number = {}, pages = {173-187}, year = {1999} } @Article{ barabasiparasitic01, author = "A. Barab\'asi and V. W. Freeh and H. Jeong and J. B. Brockman", title = "Parasitic Computing", journal = "Nature", year = 2001, volume = "412", number = "", pages = "894-897" } @Book{ barbour92, author = "A. D. Barbour and L. Holst and S. Janson", title = "Poisson Approximation", publisher = "Oxford University Press", address = "Oxford", year = 1992, pages = "" } @Book{ barnett82, author = "V. Barnett", title = "Comparative statistical inference", year = 1982, publisher = "John Wiley", address = "New York" } @Article{ barthelemy99, author = "M. Barth\'el\'emy and L. A. N. Amaral", title = "Small-world networks: evidence for a crossover picture", journal = "Phys. Rev. Lett.", volume = "82", number = "", pages = "3180-3183", year = 1999 } @Article{ bass69, author = "F. M. Bass", title = "A new product growth model for consumer durables", journal = "Management Science", year = 1969, volume = "15", number = "", pages = "215--227", note = "" } @Article{ bass69, author = "F. M. Bass", title = "A new product growth model for consumer durables", journal = "Management Science", year = 1969, volume = "15", number = "", pages = "215--227", note = "" } @Article{ baum72, author = "L. E. Baum", title = "An inequality and associated maximization technique in statistical estimation for probabilistic functions of {M}arkov processes", journal = "Inequalities", year = 1972, volume = "3", pages = "1-8" } @Article{ baum72, author = "L. E. Baum", title = "An inequality and associated maximization technique in statistical estimation for probabilistic functions of markov processes", journal = "Inequalities", year = 1972, volume = "3", pages = "1-8" } @InCollection{ belotti97, author = "V. Belotti and Y. Rogers", title = "From web press to web pressure: multimedia representations and multimedia publishing", booktitle = "Proceedings of Human Factors in Computing Systems: CHI '97", volume = "", publisher = "ACM Press", address = "", number = "", pages = "279--286", year = 1997 } @InCollection{ belotti97, author = "V. Belotti and Y. Rogers", title = "From web press to web pressure: multimedia representations and multimedia publishing", booktitle = "Proceedings of Human Factors in Computing Systems: CHI '97", volume = "", publisher = "ACM Press", address = "", number = "", pages = "279--286", year = 1997 } @InCollection{ benedetto01, author = "M. G. Di Benedetto and P. Baldi", title = "A model for self-organizing large-scale wireless networks", booktitle = "Proceedings of the International Workshop on 3G Infrastructure and Services, Athens, Greece, July 2-3", publisher = "", pages = "210-213", address = "", year = 2001 } @InCollection{ benedetto01, author = "M. G. Di Benedetto and P. Baldi", title = "A model for self-organizing large-scale wireless networks", booktitle = "Proceedings of the International Workshop on 3G Infrastructure and Services, Athens, Greece, July 2-3", publisher = "", pages = "210-213", address = "", year = 2001 } @InCollection{ bengio95, author = "Y. Bengio and P. Frasconi", title = "An input-output {HMM} architecture", booktitle = "Advances in Neural Information Processing Systems", volume = "7", note = "(IEEE)", editor = "J. D. Cowan and G. Tesauro and J. Alspector", publisher = "Morgan Kaufmann", address = "San Francisco, CA", year = 1995 } @Book{ berger80, author = "J. Berger", title = "Statistical Decision Theory, Foundations, Concepts, and Methods", publisher = "Springer Verlag", address = "New York", year = 1980, pages = "" } @Book{ berger85, author = "J. O. Berger", title = "Statistical decision theory and Bayesian analysis", year = 1985, publisher = "Springer-Verlag", address = "New York" } @Book{ berger85, author = "J. O. Berger", title = "Statistical decision theory and Bayesian analysis", year = 1985, publisher = "Springer-Verlag", address = "New York" } @Article{ berry92, author = "M. W. Berry", title = "Large scale singular value computations", journal = "Journal of Supercomputer Applications", volume = "6", pages = "13--49", year = "1992" } @Article{ berry92, author = "M. W. Berry", title = "Large scale singular value computations", journal = "Journal of Supercomputer Applications", volume = "6", pages = "13--49", year = "1992" } @Book{ berry99, author = "Michael W. Berry and Murray Browne", title = "Understanding Search Engines: Mathematical Modeling and Text Retrieval", publisher = "Society for Industrial and Applied Mathematics", address = "Philadelphia, PA", year = "1999" } @Book{ bertsekas95, author = "D. Bertsekas", title = "Dynamic Programming and Optimal Control", year = 1995, publisher = "Athena Scientific", address = "Belmont, MA" } @Article{ besag74, author = "J. Besag", title = "Spatial interaction and the statistical analysis of lattice systems", journal = "J. Royal Statis. Soc. B", year = 1974, volume = "36", pages = "192-225" } @Article{ besag95, author = "J. Besag and P. Green and D. Higdon and K. Mengersen", title = "Bayesian computation and stochastic systems", journal = "Statis. Sci.", year = 1995, volume = "10", pages = "3-66" } @InCollection{ billsus98, author = "D. Billsus and M. Pazzani", title = "Learning collaborative information filters", booktitle = "Proceedings of the International Conference on Machine Learning", volume = "", publisher = "Morgan Kaufmann", address = "San Francisco, CA", number = "", pages = "46--54", year = 1998 } @InCollection{ billsus98, author = "D. Billsus and M. Pazzani", title = "Learning collaborative information filters", booktitle = "Proceedings of the International Conference on Machine Learning", volume = "", publisher = "Morgan Kaufmann", address = "San Francisco, CA", number = "", pages = "46--54", year = 1998 } @Book{ bisho95, booktitle = "", author = "C. Bishop", year = 1995, publisher = "", address = "", volume = "", number = "", pages = "" } @InCollection{ bishop99, author = "C. M. Bishop", title = "Bayesian {PCA}", booktitle = "Advances in Neural Information Processing Systems", volume = "11", pages = "382-388", editor = "M. S. Kearns and S. A. Solla and D. A. Cohn", publisher = "The MIT Press", address = "Cambridge, MA", year = 1999 } @Book{ blahut87, author = "R. E. Blahut", title = "Principles and practice of information theory", year = 1987, publisher = "Addison-Wesley", address = "Reading, MA" } @Article{ blatt96, author = "M. Blatt and S. Wiseman and E. Domany", title = "Super-paramagnetic clustering of data", journal = "Physical Review Letters", year = 1996, volume = "76", number = "", pages = "3251-3254" } @InCollection{ blei02a, author = "D. Blei and A. Y. Ng and M. I. Jordan", title = "Latent {D}irichlet allocation", booktitle = "Advances in Neural Information Processing Systems 14", volume = "", editor = "T. Dietterich and S. Becker and Z. Ghahramani", publisher = "Morgan Kaufmann", address = "San Francisco, CA", number = "", pages = "", year = 2002 } @InCollection{ blei02a, author = "D. Blei and A. Y. Ng and M. I. Jordan", title = "Latent {D}irichlet allocation", booktitle = "Advances in Neural Information Processing Systems 14", volume = "", editor = "T. Dietterich and S. Becker and Z. Ghahramani", publisher = "Morgan Kaufmann", address = "San Francisco, CA", number = "", pages = "", year = 2002 } @InCollection{ blei02b, author = "D. Blei and A. Y. Ng and M. I. Jordan", title = "Hierarchical {B}ayesian models for applications in information retrieval", booktitle = "Bayesian Statistics 7", volume = "", editor = "J. M Bernardo and M. Bayarri and J. O. Berger and A. P. Dawid and D. Heckerman and A. F. M. Smith and M. West", publisher = "Oxford University Press", address = "", number = "", pages = "", year = 2002 } @InCollection{ blei02b, author = "D. Blei and A. Y. Ng and M. I. Jordan", title = "Hierarchical {B}ayesian models for applications in information retrieval", booktitle = "Bayesian Statistics 7", volume = "", editor = "J. M Bernardo and M. Bayarri and J. O. Berger and A. P. Dawid and D. Heckerman and A. F. M. Smith and M. West", publisher = "Oxford University Press", address = "", number = "", pages = "", year = 2002 } @InProceedings{ bikel97, author = "Daniel M. Bikel and Scott Miller and Richard Schwartz and Ralph Weischedel", title = "Nymble: a high-performance learning name-finder", booktitle = "Proceedings of ANLP-97", pages = {194--201}, year = "1997", url = {citeseer.nj.nec.com/bikel97nymble.html} } @Article{ bollacker99, author = "S. Lawrence and C. L. Giles and K. Bollacker", title = "Digital libraries and autonomous citation indexing", journal = "IEEE Computer", year = 1999, volume = "32", number = "6", pages = "67--71", note = "" } @Article{ bollobas01, author = "B. Bollob\'as and O. Riordan and J. Spencer and G. Tusn\'ady", title = "The degree sequence of a scale-free random graph process", journal = "Random Structures and Algorithms", volume = "18", number = "3", pages = "279-290", year = 2001 } @Article{ bollobas02, author = "B. Bollob\'{a}s and O. Riordan", title = "The diameter of a scale-free random graph", journal = "Combinatorica", volume = "", number = "", pages = "", note = "In press", year = 2002 } @Article{ bollobas82, author = "B. Bollob\'as and W. Fernandez de la Vega", title = "The diameter of random regular graphs", journal = "Combinatorica", volume = "2", number = "", pages = "125-134", note = "", year = 1982 } @Book{ bollobas85, author = "B. Bollob\'{a}s", title = "Random Graphs", publisher = "Academic Press", address = "London", year = 1985, pages = "" } @Book{ bollobas85, author = "Bela Bollobas", title = "Random Graphs", publisher = "Academic Press", address = "London", year = 1985, pages = "" } @Book{ box92, author = "G. E. P. Box and G. C. Tiao", title = "Bayesian inference in statistical analysis", year = 1992, note = "(First Edition in 1973)", publisher = "John Wiley and Sons", address = "New York" } @InProceedings{ brand02, author = "M.E. Brand", title = "Incremental Singular Value Decomposition of Uncertain Data with Missing Values", booktitle = "Proceedings of the European Conference on Computer Vision (ECCV): Lecture Notes in Computer Science", publisher = "Springer Verlag", pages = "707--720", year = "2002" } @InProceedings{ brand02, author = "M.E. Brand", title = "Incremental Singular Value Decomposition of Uncertain Data with Missing Values", booktitle = "Proceedings of the European Conference on Computer Vision (ECCV): Lecture Notes in Computer Science", publisher = "Springer Verlag", pages = "707--720", year = "2002" } @InCollection{ breese98, author = "J. S. Breese and D. Heckerman and C. Kadie", title = "Empirical analysis of predictive algorithms for collaborative filtering", booktitle = "Proceedings of the Fourteenth Conference on Uncertainty in Artificial Intelligence", volume = "", publisher = "Morgan Kaufmann", address = "San Francisco, CA", number = "", pages = "43--52", year = 1998 } @InCollection{ breese98, author = "J. S. Breese and D. Heckerman and C. Kadie", title = "Empirical analysis of predictive algorithms for collaborative filtering", booktitle = "Proceedings of the Fourteenth Conference on Uncertainty in Artificial Intelligence", volume = "", publisher = "Morgan Kaufmann", address = "San Francisco, CA", number = "", pages = "43--52", year = 1998 } @InCollection{ brin98, author = "S. Brin and L. Page", title = "The anatomy of a large-scale hypertextual ({W}eb) search engine", booktitle = "Proceedings of the Seventh Internation World Wide Web Conference (WWW7)/Computer Networks", publisher = "Elsevier", address = "Amsterdam", pages = "1-7", year = 1998, volume = "30" } @InCollection{ broder00, author = "A. Broder and R. Kumar and F. Maghoul and P. Raghavan and S. Rajagopalan and R. Stata and A. Tomikns and J. Wiener", title = "Graph structure in the {W}eb", booktitle = "Proceedings of the Ninth Internation World Wide Web Conference (WWW9)/Computer Networks", pages = "1-6", volume = "33", publisher = "Elsevier", address = "Amsterdam", year = 2000 } @Book{ brown86, author = "L. D. Brown", title = "Fundamentals of Statistical Exponential Families", publisher = "Institute of Mathematical Statistics", address = "Hayward, CA", year = 1986, pages = "" } @InCollection{ bu02, author = "T. Bu and D. Towsley", title = "On distinguishing between {I}nternet power law topology generators", booltitle = "Proceedings of IEEE INFOCOM", volume = "", number = "", pages = "", year = 2002 } @Unpublished{ bucklin02, author = "R. E. Bucklin and C. Sismeiro", title = "A model of web site browsing behavior estimated on clickstream data", journal = "", year = 2002, volume = "", number = "", pages = "", note = "Submitted for publication" } @Unpublished{ bucklin02, author = "R. E. Bucklin and C. Sismeiro", title = "A model of web site browsing behavior estimated on clickstream data", journal = "", year = 2002, volume = "", number = "", pages = "", note = "Submitted for publication" } @Article{ buhmann93, author = "J. Buhmann and H. Kuhnel", title = "Vector quantization with complexity costs", journal = "IEEE Transactions on Information Theory", year = 1993, volume = "39", number = "", pages = "1133-1145" } @Article{ buntine92, author = "W. Buntine", title = "Learning classification trees", journal = "Statistics and Computing", year = 1992, volume = "2", number = "", pages = "63--73", note = "" } @Article{ buntine92, author = "W. Buntine", title = "Learning classification trees", journal = "Statistics and Computing", year = 1992, volume = "2", number = "", pages = "63--73", note = "" } @Article{ buntine96, author = "W. Buntine", title = "A guide to the literature on learning probabilistic networks from data", journal = "IEEE Trans. on Knowledge and Data Eng.", year = 1996, volume = "8", pages = "195-210" } @Article{ burges98, author = "C. J. C. Burges", title = "A tutorial on support vector machines for pattern recognition", journal = "Data Mining and Knowledge Discovery", year = 1998, volume = "2", number = "", pages = "121-167" } @InCollection{ byrne99, author = "M. D. Byrne and B. E. John and N. S. Wehrle and D. C. Crow", title = "The tangled web we wove: a taskonomy of {WWW} use", booktitle = "Proceedings of CHI'99: Human Factors in Computing Systems", volume = "", publisher = "ACM Press", address = "New York", number = "", pages = "544-551", year = 1999 } @InCollection{ byrne99, author = "M. D. Byrne and B. E. John and N. S. Wehrle and D. C. Crow", title = "The tangled web we wove: a taskonomy of {WWW} use", booktitle = "Proceedings of CHI'99: Human Factors in Computing Systems", volume = "", publisher = "ACM Press", address = "New York", number = "", pages = "544-551", year = 1999 } @Article{ cadez00, author = "I. V. Cadez and D. Heckerman and P. Smyth and C. Meek and S. White", title = "Model-Based Clustering and Visualization of Navigation Patterns on a {W}eb Site", journal = "Data Mining and Knowledge Discovery", year = "2003", volume = "", number = "", pages = "", note = "in press" } @Article{ cadez00, author = "I. V. Cadez and D. Heckerman and P. Smyth and C. Meek and S. White", title = "Model-Based Clustering and Visualization of Navigation Patterns on a {W}eb Site", journal = "Data Mining and Knowledge Discovery", year = "2003", volume = "", number = "", pages = "", note = "in press" } @InProceedings{ califf98, author = "M. E. Califf and R. J. Mooney", title = "Relational Learning of Pattern-Match Rules for Information Extraction", booktitle = "Working Notes of {AAAI} Spring Symposium on Applying Machine Learning to Discourse Processing", publisher = "AAAI Press", address = "Menlo Park, CA", pages = "6--11", year = "1998", url = "citeseer.nj.nec.com/califf97relational.html" } @Article{ callaway01, author = "D. S. Callaway and J. E. Hopcroft and J. M. Kleinberg and M. E. J. Newman and S. H. Strogatz", title = "Are randomly grown graphs really random?", journal = "Phys. Rev. E", year = 2001, volume = "64", number = "", pages = "041902" } @Article{ cardie97, author = "Claire Cardie", title = "Empirical Methods in Information Extraction", journal = "AI Magazine", volume = "18", number = "4", pages = "65-80", year = "1997", url = "citeseer.nj.nec.com/cardie97empirical.html" } @Article{ carlson99, author = "J. M. Carlson and J. Doyle", title = "Highly optimized tolerance: a mechanism for power laws in designed systems", journal = "Physical Review E", year = 1999, volume = "60", number = "2", pages = "1412-1427" } @Article{ castelli95, author = { V. Castelli and T. Cover }, title = { On the exponential value of labeled samples }, journal = { Pattern Recognition Letters}, volume = { 16}, pages = { 105--111}, year = "1995" } @Article{ castelli95, author = { V. Castelli and T. Cover }, title = { On the exponential value of labeled samples }, journal = { Pattern Recognition Letters}, volume = { 16}, pages = { 105--111}, year = "1995" } @Article{ catledge95, author = "L. D. Catledge and J. Pitkow", title = "Characterizing browsing strategies in the {W}orld-{W}ide {W}eb", journal = "Computer Networks and ISDN Systems", year = 1995, volume = "27", number = "", pages = "1065--1073", note = "" } @Article{ catledge95, author = "L. D. Catledge and J. Pitkow", title = "Characterizing browsing strategies in the {W}orld-{W}ide {W}eb", journal = "Computer Networks and ISDN Systems", year = 1995, volume = "27", number = "", pages = "1065--1073", note = "" } @Book{ chaitin87, author = "G. J. Chaitin", title = "Algorithmic Information Theory", year = 1987, publisher = "Cambridge University Press", address = "Cambridge, UK" } @Article{ chakrabarti99, author = "S. Chakrabarti and B. Dom and D. GIbson and J. Kleinberg and S. R. Kumar and P. Raghavan and S. Rajagopalan and A. Tomkins", title = "Mining the link structure of the {W}orld {W}ide {W}eb", journal = "IEEE Computer", year = 1999, volume = "", number = "", pages = "" } @Article{ charniak91, author = "E. Charniak", title = "Bayesian networks without tears", journal = "AI Mag.", year = 1991, volume = "12", pages = "50-63", note = "" } @Article{ cheeseman88, author = "P. Cheeseman", title = "An inquiry into computer understanding", journal = "Comput. Intell.", year = 1988, volume = "4", pages = "57-142", note = "{W}ith discussion" } @InProceedings{ chen96, author = "S. F. Chen and J. Goodman", title = "An Empirical Study of Smoothing Techniques for Language Modeling", booktitle = "Proceedings of the Thirty-Fourth Annual Meeting of the Association for Computational Linguistics", publisher = "Morgan Kaufmann", address = "San Francisco", editor = "Arivind Joshi and Martha Palmer", pages = "310--318", year = "1996" } @InProceedings{ chen96, author = "S. F. Chen and J. Goodman", title = "An Empirical Study of Smoothing Techniques for Language Modeling", booktitle = "Proceedings of the Thirty-Fourth Annual Meeting of the Association for Computational Linguistics", publisher = "Morgan Kaufmann", address = "San Francisco", editor = "Arivind Joshi and Martha Palmer", pages = "310--318", year = "1996" } @InProceedings{ chickering97, author = "Chickering, David Maxwell and Heckerman, David and Meek, Christopher ", title = "A Bayesian Approach to Learning Bayesian Networks with Local Structure", booktitle = "Uncertainty in Artificial Intelligence: Proceedings of the Thirteenth Conference (UAI-1997)", publisher = "Morgan Kaufmann Publishers", address = "San Francisco, CA", year = "1997", pages = "80-89" } @Article{ cho95, author = "Sung-Bae Cho and Jin H. Kim", title = "An {HMM/MLP} architecture for sequence recognition ", journal = "Neural Computation", year = 1995, volume = "7", pages = "358-369" } @Article{ chung01a, author = "F. Chung and Mark Garrett and R. Graham and D. Shallcross", title = "Distance realization problems with applications to {I}nternet tomography", journal = "Journal of Computer and System Sciences", volume = "63", number = "3", pages = "432-448", year = 2001 } @Article{ chung01b, author = "F. Chung and L. Lu", title = "The diameter of random sparse graphs", journal = "Advances in Applied Mathematics", volume = "26", number = "", pages = "257-279", year = 2001 } @Article{ chung02, author = "F. Chung and R. Graham", title = "Sparse quasi-random graphs", journal = "Combinatorica", volume = "", number = "", pages = "", note = "In press", year = 2002 } @Article{ chung02a, author = "F. Chung and L. Lu", title = "Connected components in random graphs with given expected degree sequences", journal = "UCSD Technical Report", volume = "", number = "", pages = "", year = 2002 } @Article{ chung89, author = "E. R. K. Chung and R. L. Graham and R. M. Wilson", title = "Quasi-random graphs", journal = "Combinatorica", year = 1989, volume = "9", number = "", pages = "345-362" } @InProceedings{ clarke00, author = "I. Clarke and O. Sandberg and B. Wiley and T. W. Hong", location = "Berkeley, CA, USA", year = 2000, title = "Freenet: a distributed anonymous information storage and retrieval system", pages = "311-320", booktitle = "International Workshop on Design Issues in Anonymity and Unobservability" } @Article{ clarke02, author = "I. Clarke and S. G. Miller and T. W. Hong and O. Sandberg and B. Wiley", title = "Protecting free expression online with {F}reenet", journal = "IEEE Internet Computing", volume = "6", number = 1, month = jan./feb., year = 2002, page = "40--49" } @Article{ cockburn02, author = "A. Cockburn and B. McKenzie", title = "What do {W}eb users do? An empirical analysis of {W}eb use", journal = "International Journal of Human-Computer Studies", year = 2002, volume = "54", number = "", pages = "903--922" } @Article{ cockburn02, author = "A. Cockburn and B. McKenzie", title = "What do {W}eb users do? An empirical analysis of {W}eb use", journal = "International Journal of Human-Computer Studies", year = 2002, volume = "54", number = "", pages = "903--922", note = "" } @Article{ cohen99, author = "W. W. Cohen and R. E. Schapire and Y. Singer", title = "Learning to order things", journal = "Journal of Artificial Intelligence Research", year = 1999, volume = "10", number = "", pages = "243--270", note = "" } @Article{ cohen99, author = "W. W. Cohen and R. E. Schapire and Y. Singer", title = "Learning to order things", journal = "Journal of Artificial Intelligence Research", year = 1999, volume = "10", number = "", pages = "243--270", note = "" } @Misc{ cohennips02, author = "William Cohen and Andrew McCallum", title = "Information Extraction from the World Wide Web", year = "2002", address = "Vancouver, CA", note = "Tutorial presented at the 15th Neural Information Processing Conference (NIPS-15)" } @Article{ cohenquass00, author = "William Cohen and Andrew McCallum and Dallan Quass", title = "Learning to understand the Web", journal = "IEEE Data Engineering Bulletin", volume = "23", pages = "17--24", year = "2000" } @Article{ cooley99, author = "R. Cooley and B. Mobasher and J. Srivastava", title = "Data preparation for mining {W}orld {W}ide {W}eb browsing patterns", journal = "Knowledege and Information Systems", year = 1999, volume = "1", number = "", pages = "5--32", note = "" } @InCollection{ jordan99, booktitle = "Learning in Graphical Models", editor = "M. I. Jordan", publisher = "The MIT Press", address = "Cambridge, MA", year = 1999 } @Article{ cooley99, author = "R. Cooley and B. Mobasher and J. Srivastava", title = "Data preparation for mining {W}orld {W}ide {W}eb browsing patterns", journal = "Knowledege and Information Systems", year = 1999, volume = "1", number = "", pages = "5--32", note = "" } @Article{ cooper01, author = "C. Cooper and A. Frieze", title = "A general model of web graphs", journal = "", volume = "", number = "", pages = "", note = "Technical Report", year = 2001 } @Article{ cooper90, author = "G. F. Cooper", title = "The computational complexity of probabilistic inference using {B}ayesian belief networks", journal = "Art. Intell.", year = 1990, volume = "42", pages = "393-405", note = "" } @Book{ cormen01, author = "T. H. Cormen and C. E. Leiserson and R. L. Rivest and C. Stein", title = "Introduction to Algorithms", publisher = "MIT Press", note = "Second Edition", address = "Cambridge, MA", year = 2001, pages = "" } @InCollection{ cottrell95, author = "Fu-Sheng Tsung and G. W. Cottrell", title = "Phase-Space Learning", booktitle = "Advances in Neural Information Processing Systems", volume = "7", note = "(IEEE)", editor = "J. D. Cowan and G. Tesauro and J. Alspector", publisher = "Morgan Kaufmann", address = "San Francisco, CA", year = 1995 } @Book{ cover91, author = "T. M. Cover and J. A. Thomas", title = "Elements of Information Theory", year = 1991, publisher = "John Wiley", address = "New York" } @Book{ cover91, author = "T. M. Cover and J. A. Thomas", title = "Elements of Information Theory", year = 1991, publisher = "John Wiley", address = "New York" } @Article{ cox64, author = "R. T. Cox", title = "Probability, frequency and reasonable expectation", journal = "Am. J. Phys.", year = 1964, volume = "14", pages = "1-13" } @Article{ cox64, author = "R. T. Cox", title = "Probability, frequency and reasonable expectation", journal = "Am. J. Phys.", year = 1964, volume = "14", pages = "1-13" } @Article{ cox64, author = "R.T. Cox", title = "Probability, frequency and reasonable expectation", journal = "American Journal of Physics", year = 1964, volume = "14", pages = "1-13" } @Article{ cox64, author = "R.T. Cox", title = "Probability, frequency and reasonable expectation", journal = "American Journal of Physics", year = 1964, volume = "14", pages = "1-13" } @Book{ cristianini00, booktitle = "An introduction to support vector machines", author = "N. Cristianini and J. Shawe-Taylor", year = 2000, publisher = "Cambridge University Press", address = "Cambridge, UK", volume = "", number = "", pages = "" } @Book{ daley99, author = "D. J. Daley and J. Gani", title = "Epidemic Modeling: An Introduction", publisher = "Cambridge University Press", address = "Cambridge, UK", year = 1999 } @Book{ daley99, author = "D. J. Daley and J. Gani", title = "Epidemic Modeling: An Introduction", publisher = "Cambridge University Press", address = "Cambridge, UK", year = 1999 } @Article{ dawid92, author = "A. P. Dawid", title = "Applications of a general propagation algorithm for probabilistic expert systems", journal = "Stat. and Comp.", year = 1992, volume = "2", pages = "25-36", note = "" } @Article{ dayan95, author = "P. Dayan and G. E. Hinton and R. M. Neal and R. S. Zemel", title = "The {H}elmholtz Machine", journal = "Neural Computation", year = 1995, volume = "7", number = "5", pages = "889-904", note = "" } @Article{ dechter99bucket, author = "Rina Dechter", title = "Bucket Elimination: A Unifying Framework for Reasoning", journal = "Artificial Intelligence", volume = "113", number = "1-2", pages = "41-85", year = "1999", url = "citeseer.nj.nec.com/article/dechter99bucket.html" } @Article{ dechter99bucket, author = "Rina Dechter", title = "Bucket Elimination: A Unifying Framework for Reasoning", journal = "Artificial Intelligence", volume = "113", number = "1-2", pages = "41-85", year = "1999", url = "citeseer.nj.nec.com/article/dechter99bucket.html" } @Article{ dempster77, author = "A. P. Dempster and N. M. Laird and D. B. Rubin", title = "Maximum likelihood from incomplete data via the EM algorithm", journal = "Journal Royal Statistical Society", year = 1977, volume = "B39", pages = "1-22" } @InCollection{ deshpande01, author = "M. Deshpande and G. Karypis", title = "Selective {M}arkov models for predicting {W}eb-page accesses", booktitle = "Proceedings of the SIAM Conference on Data Mining", year = 2001, volume = "", publisher = "SIAM Press", address = "", number = "", editor = "", pages = "", note = "" } @InCollection{ deshpande01, author = "M. Deshpande and G. Karypis", title = "Selective {M}arkov models for predicting {W}eb-page accesses", booktitle = "Proceedings of the SIAM Conference on Data Mining", year = 2001, volume = "", publisher = "SIAM Press", address = "", number = "", editor = "", pages = "", note = "" } @Misc{ dhillon01efficient, author = "I. Dhillon and J. Fan and Y. Guan", title = "Efficient Clustering of Very Large Document Collections", text = "Dhillon, I., Fan, J., and Guan, Y. Efficient Clustering of Very Large Document Collections. In R. Grossman, G. Kamath, and R. Naburu, editors, Data Mining for Scientific and Engineering Applications, Kluwer Academic Publishers, 2001.", year = "2001", url = "citeseer.nj.nec.com/dhillon01efficient.html" } @InCollection{ dhillon01efficient, author = "I. Dhillon and J. Fan and Y. Guan", title = "Efficient Clustering of Very Large Document Collections", editor = " R. Grossman and C. Kamath and R. Naburu", booktitle = "Data Mining for Scientific and Engineering Applications", publisher = "Kluwer Academic Publishers", year = "2001", pages = " ", url = "citeseer.nj.nec.com/dhillon01efficient.html" } @InCollection{ dhillon01efficient, author = "I. Dhillon and J. Fan and Y. Guan", title = "Efficient Clustering of Very Large Document Collections", editor = " R. Grossman and C. Kamath and R. Naburu", booktitle = "Data Mining for Scientific and Engineering Applications", publisher = "Kluwer Academic Publishers", year = "2001", pages = " ", url = "citeseer.nj.nec.com/dhillon01efficient.html" } @Article{ dhillon:modha:concept, author = { I. S. Dhillon and D. S. Modha }, title = { Concept decompositions for large sparse text data using clustering }, journal = { Machine Learning}, volume = { 42}, number = { 1}, pages = { 143--175}, year = "2001", url = {citeseer.nj.nec.com/article/dhillon01concept.html} } @Article{ dhillon:modha:concept, author = { I. S. Dhillon and D. S. Modha }, title = { Concept decompositions for large sparse text data using clustering }, journal = { Machine Learning}, volume = { 42}, number = { 1}, pages = { 143--175}, year = "2001", url = {citeseer.nj.nec.com/article/dhillon01concept.html} } @Article{ diaconis91, author = "P. Diaconis and D. Stroock", title = "Geometric bounds for eigenvalues of {M}arkov chains", journal = "Annals of Applied Probability", year = 1991, volume = "1", number = "", pages = "36-61" } @Article{ dijkstra59, author = "E. D. Dijkstra", title = "A note on two problem in connexion with graphs", journal = "Numerische Mathematik", year = 1959, volume = "1", number = "", pages = "269-271" } @InProceedings{ dill01selfsimilarity, author = "Stephen Dill and S. Ravi Kumar and Kevin S. McCurley and Sridhar Rajagopalan and D. Sivakumar and Andrew Tomkins", title = "Self-similarity in the Web", booktitle = "The {VLDB} Journal", pages = "69-78", year = "2001", url = "citeseer.nj.nec.com/dill01selfsimilarity.html" } @InProceedings{ dirichlet, author = {M. P. Brown and R. Hughey and A. Krogh and I. S. Mian and K. Sj\"olander and D. Haussler}, title = "Using {D}irichlet Mixture Priors to Derive Hidden {M}arkov Models for Protein Families", booktitle = "Proc. of First Int. Conf. on Intelligent Systems for Molecular Biology ", address = "Wash. D.C.", publisher = "AAAI Press", editor = "L. Hunter, D. Searls, J. Shavlik", pages = "47-55", year = 1993, month = jul } @InProceedings{ dirichlet, author = {M. P. Brown and R. Hughey and A. Krogh and I. S. Mian and K. Sj\"olander and D. Haussler}, title = "Using {D}irichlet Mixture Priors to Derive Hidden {M}arkov Models for Prot ein Families", booktitle = "Proc. of First Int. Conf. on Intelligent Systems for Molecular Biology ", address = "Wash. D.C.", publisher = "AAAI Press", editor = "L. Hunter, D. Searls, J. Shavlik", pages = "47-55", year = "1993", month = jul } @Unpublished{ dmoz, url = {http://www.dmoz.org/}, title = {The Open Directory Project (http://www.dmoz.org/)} } @InCollection{ domingos01, author = "P. Domingos and M. Richardson", title = "Mining the network value of customers", booktitle = "Proceedings of the ACM Seventh International Conference on Knowledge Discovery and Data Mining", volume = "", publisher = "ACM Press", address = "New York", number = "", pages = "57--66", year = 2001 } @InCollection{ domingos01, author = "P. Domingos and M. Richardson", title = "Mining the network value of customers", booktitle = "Proceedings of the ACM Seventh International Conference on Knowledge Discovery and Data Mining", volume = "", publisher = "ACM Press", address = "New York", number = "", pages = "57--66", year = 2001 } @Article{ duane87, author = "S. Duane and A. D. Kennedy and B. J. Pendleton and D. Roweth", title = "Hybrid {M}onte {C}arlo", journal = "Phys. Letters B", year = 1987, volume = "195", pages = "216-222", note = "" } @Book{ duda73, title = "Pattern Classification and Scene Analysis", author = "R. O. Duda and P. E. Hart", year = 1973, publisher = "Wiley", address = "New York, NY", volume = "", number = "", pages = "" } @InProceedings{ dumais00hierarchical, author = "Susan T. Dumais and Hao Chen", title = "Hierarchical classification of {W}eb content", booktitle = "Proceedings of {SIGIR}-00, 23rd {ACM} International Conference on Research and Development in Information Retrieval", publisher = "ACM Press, New York, US", address = "Athens, GR", editor = "Nicholas J. Belkin and Peter Ingwersen and Mun-Kew Leong", pages = "256--263", year = "2000", url = "citeseer.nj.nec.com/dumais00hierarchical.html" } @InProceedings{ dumais00hierarchical, author = "Susan T. Dumais and Hao Chen", title = "Hierarchical classification of {W}eb content", booktitle = "Proceedings of {SIGIR}-00, 23rd {ACM} International Conference on Research and Development in Information Retrieval", publisher = "ACM Press, New York, US", address = "Athens, GR", editor = "Nicholas J. Belkin and Peter Ingwersen and Mun-Kew Leong", pages = "256--263", year = "2000", url = "citeseer.nj.nec.com/dumais00hierarchical.html" } @Book{ durbin98, author = {R. Durbin and S. Eddy and A. Krogh and G. Mitchison}, title = {Biological Sequence Analysis. Probabilistic Models of Proteins and Nucleic Acids}, publisher = {Cambridge University Press}, year = {1998} } @Article{ erdos59, author = "P. Erd{o}s and A. R\'{e}nyi", title = "On random graphs", journal = "Publ. Math. Debrecen", volume = "6", number = "", pages = "290-291", year = 1959 } @Article{ erdos60, author = "P. Erd{o}s and A. R\'{e}nyi", title = "On the evolution of random graphs", journal = "Publ. Math. Inst. Hung. Acad. Sci", volume = "5", number = "", pages = "17-61", year = 1960 } @Book{ everitt84, author = "B. S. Everitt", title = "An Introduction to Latent Variable Models", publisher = "Chapman and Hall", address = "London and New York", year = 1984, pages = "" } @Book{ everitt84, author = "B. S. Everitt and D. J. Hand", title = "Finite Mixture Distributions", publisher = "Chapman and Hall", address = "London and New York", year = 1981, pages = "" } @InCollection{ fagin00, author = "R. Fagin and A. Karlin and J. Kleinberg and P. Raghavan and S. Rajagopalan and R. Rubinfeld and M. Sudan and A. Tomkins", title = "Random walks with ``back buttons''", booktitle = "Proceedings of the ACM Symposium on Theory of Computing", year = 2000, volume = "", publisher = "ACM Press", address = "New York", number = "", editor = "", pages = "484--493" } @InCollection{ fagin00, author = "R. Fagin and A. Karlin and J. Kleinberg and P. Raghavan and S. Rajagopalan and R. Rubinfeld and M. Sudan and A. Tomkins", title = "Random walks with ``back buttons''", booktitle = "Proceedings of the ACM Symposium on Theory of Computing", year = 2000, volume = "", publisher = "ACM Press", address = "New York", number = "", editor = "", pages = "484--493" } @InCollection{ faloutsos99, author = "M. Faloutsos and P. Faloutsos and C. Faloutsos", title = "On power-law relationships of the internet topology", booktitle = "Proceedings of the ACM SIGCOM Conference, Cambridge, MA", volume = "", number = "", pages = "", year = 1999 } @Book{ feller68, author = "W. Feller", title = "An Introduction to Probability Theory and its Applications", publisher = "John Wiley \& Sons", address = "New York", year = 1968, volume = "1", note = "Third Edition", pages = "" } @Book{ feller71, author = "W. Feller", title = "An Introduction to Probability Theory and its Applications", publisher = "John Wiley \& Sons", address = "New York", year = 1971, volume = "2", note = "Second Edition", pages = "" } @Article{ fermi49, author = "E. Fermi", title = "On the origin of the cosmic radiation", journal = "Physical Review", year = 1949, volume = "75", number = "8", pages = "1169-1174" } @Article{ fermi49, author = "E. Fermi", title = "On the origin of the cosmic radiation", journal = "Physical Review", year = 1949, volume = "75", number = "8", pages = "1169-1174" } @Article{ fill91, author = "J. A. Fill", title = "Eigenvalue bounds on convergence to stationarity for nonreversible {M}arkov chains with an application to an exclusion process", journal = "Annals of Applied Probability", year = 1991, volume = "1", number = "", pages = "62-87" } @Article{ flake02, author = "G. W. Flake and S. Lawrence and C. L. Giles and F. Coetzee", title = "Self-organization and identification of communities", journal = "IEEE Computer", volume = "35", number = "3", pages = "66-71", note = "In press", year = 2002 } @Article{ fraley02, author = "C. Fraley and A. E. Raftery", title = "Model-based clustering, discriminant analysis, and density estimation", journal = "Journal of the American Statistical Association", volume = "97", pages = "611--631", year = "2002" } @Article{ fraley02, author = "C. Fraley and A. E. Raftery", title = "Model-based clustering, discriminant analysis, and density estimation", journal = "Journal of the American Statistical Association", volume = "97", pages = "611--631", year = "2002" } @InProceedings{ freitag00information, author = "Dayne Freitag and Andrew McCallum", title = "Information Extraction with {HMM} Structures Learned by Stochastic Optimization", booktitle = "{AAAI}/{IAAI}", pages = "584-589", year = "2000", url = "citeseer.nj.nec.com/freitag00information.html" } @InProceedings{ freitag98b, author = "Dayne Freitag", title = "Multistrategy learning for information extraction", booktitle = "Proceedings of the 15th International Conference on Machine Learning", publisher = "Morgan Kaufmann", address = "San Francisco, CA", pages = "161--169", year = "1998", url = "citeseer.nj.nec.com/freitag98multistrategy.html" } @InProceedings{ freitag98information, author = "Dayne Freitag", title = "Information Extraction from {HTML}: Application of a General Machine Learning Approach", booktitle = "Proceedings of AAAI-98", publisher = "AAAI Press", address = "Menlo Park, CA", pages = "517-523", year = "1998", url = "citeseer.nj.nec.com/freitag98information.html" } @Book{ frey98, author = {B. J. Frey}, title = {Graphical Models for Machine Learning and Digital Communication}, publisher = {MIT Press}, year = {1998} } @Article{ friedman97, author = "J. H. Friedman", title = "On bias, variance, 0/1 loss, and the curse of dimensionality", journal = "Data Mining and Knowledge Discovery", year = 1997, volume = "1", pages = "55-77" } @Article{ frigessi93, author = "A. Frigessi and P. Di Stefano and C. R. Hwang and S. J. Sheu", title = "Convergence rate of the {G}ibbs sampler, the {M}etropolis algorithm and other single-site updating dynamics", journal = "Journal of the Royal Statistical Society", year = 1993, volume = "55", pages = "205-219" } @Book{ galambos87, author = "J. Galambos", title = "The Asymptotic Theory of Extreme Order Statistics", publisher = "Robert E. Krieger Publishing Company", note = " Second Edition", address = "Malabar, FL", year = 1987, pages = "" } @Book{ galambos87, author = "J. Galambos", title = "The Asymptotic Theory of Extreme Order Statistics", publisher = "Robert E. Krieger Publishing Company", note = " Second Edition", address = "Malabar, FL", year = 1987, pages = "" } @InCollection{ gelbukh01, author = "A. Gelbukh and G. Sidorov", title = "Zipf and {H}eaps laws' coefficients depend on language", booktitle = "Proceedings of the 2001 Conference on Intelligent Text Processing and Computational Linguistics", volume = "", publisher = "Springer Verlag", pages = "332-335", address = "", year = 2001 } @Article{ gelman93, author = "A. Gelman and T. P. Speed", title = "Characterizing a joint probability distribution by conditionals", journal = "Journal of the Royal Statistical Society B", year = 1993, volume = "55", number = "1", pages = "185-188" } @Book{ gelman95, author = "A. Gelman and J. B. Carlin and H. S. Stern and D. B. Rubin", title = "Bayesian Data Analysis", year = 1995, publisher = "Chapman and Hall", address = "London" } @Book{ gelman95, author = "A. Gelman and J. B. Carlin and H. S. Stern and D. B. Rubin", title = "Bayesian Data Analysis", year = 1995, publisher = "Chapman and Hall", address = "London" } @Book{ gelman95, author = "A. Gelman and J. B. Carlin and H. S. Stern and D. B. Rubin", title = "Bayesian Data Analysis", publisher = "Chapman and Hall", address = "", year = 1995 } @Book{ gelman95, author = "A. Gelman and J. B. Carlin and H. S. Stern and D. B. Rubin", title = "Bayesian Data Analysis", publisher = "Chapman and Hall", address = "", year = 1995 } @Article{ geman84, author = "S. Geman and D. Geman", title = "Stochastic relaxation, {G}ibbs distributions and the {B}ayesian restoration of images", journal = "IEEE Trans. Pattern Anal. and Machine Intell.", year = 1984, volume = "6", pages = "721-741" } @Article{ geman92, author = "S. Geman and E. Bienenstock and R. Doursat", title = "Neural networks and the bias/variance dilemma", journal = "Neural Computation", year = 1992, volume = "4", number = "1", pages = "1-58" } @Article{ geyer92, author = "C. J. Geyer", title = "Practical {M}arkov Chain {M}onte {C}arlo", journal = "Statis. Sci.", year = 1992, volume = "7", pages = "473-511" } @Article{ ghahramani97a, author = "Z. Ghahramani and M. I. Jordan", title = "Factorial hidden {M}arkov models", journal = "Machine Learning", year = 1997, volume = "", pages = "" } @InCollection{ ghahramani97b, author = "Z. Ghahramani", title = "Learning dynamic {B}ayesian networks", booktitle = "Adaptive Processing of Temporal Information. Lecture Notes in Artifical Intelligence", editor = "M. Gori and C. L. Giles", publisher = "Springer Verlag", address = "Heidelberg", year = 1998, note = "In press" } @TechReport{ gibbs97, author = "M. Gibbs and D. J. C. MacKay", title = "Efficient implementation of {G}aussian processes", institution = "Technical report Cavendish Laboratory", address = "Cambridge, UK", year = 1997, volume = "", pages = "" } @Unpublished{ gibbs97, author = "M. N. Gibbs and D.J.C. MacKay", title = "Efficient implementation of {G}aussian processes", note = "Technical Report, Cavendish Laboratory, Cambridge, UK", year = 1997 } @Article{ gilbert59, author = "E. N. Gilbert", title = "Random graphs", journal = "Ann. Math. Statist", year = 1959, volume = "30", number = "", pages = "1141-1144" } @Article{ gilbert97, author = "N. Gilbert", title = "A simulation of the structure of academic science", journal = "Sociological Research Online", year = 1997, volume = "2", number = "2", pages = "" } @Article{ gilks93, author = "W. R. Gilks and D. G. Clayton and D. J. Spiegelhalter and N. G. Best and A. J. McNeil and L. D. Sharples and A. J. Kirby", title = "Modelling complexity: {A}pplications of {G}ibbs sampling in medicine", journal = "J. R. Statis. Soc.", year = 1993, volume = "55", pages = "39-52" } @Article{ gilks94, author = "W. R. Gilks and A. Thomas and D. J. Spiegelhalter", title = "A language and program for complex {B}ayesian modelling", journal = "The Statistician", year = 1994, volume = "43", pages = "69-78" } @Article{ gnutella02, author = "Matei Ripeanu and Ian Foster and Adriana Iamnitchi", title = "Mapping the {G}nutella Network: Properties of Large-Scale Peer-to-Peer Systems and Implications for System Design", editor = "Li Gong", journal = "IEEE Internet Computing Journal", publisher = "Springer-Verlag", volume = "6", number = "1", month = "Aug", year = 2002 } @Article{ goller96, author = "C. Goller and A. Kuchler", title = "Learning task-dependent distributed structure-representations by backpropagation through structure", journal = "IEEE International Conference on Neural Networks", year = 1996, volume = "", number = "", pages = "347-352" } @Article{ goller97, author = "C. Goller", title = "A connectionist approach for learning search-control heuristics for automated deduction systems", journal = "Ph.D. Thesis, Tech. Univ. Munich, Computer Science", year = 1997, volume = "", number = "", pages = "" } @Article{ gormbaldi98, author = "A. Gorm and P. Baldi and S. Brunak and Y. Chauvin", title = "DNA Structure in Human RNA Polymerase II Promoters", journal = "Journal of Molecular Biology", year = 1998, volume = "281", number = "", pages = "663-673" } @Article{ gormbaldi98, author = "A. Gorm and P. Baldi and S. Brunak and Y. Chauvin", title = "DNA Structure in Human {RNA} Polymerase {II} Promoters", journal = "Journal of Molecular Biology", year = 1998, volume = "281", number = "", pages = "663-673" } @Article{ gottlieb98, author = "J. P. Gottlieb and M. Kusunoki and M. E. Goldberg", title = "The representation of visual salience in monkey parietal cortex", journal = "Nature", year = 1998, volume = "391", number = "", pages = "481-484" } @Book{ gradshteyn80, author = "I. S. Gradshteyn and I. M. Ryzhik", title = "Table of integrals, series, and products", publisher = "Academic Press", address = "New York", year = 1980, pages = "" } @Article{ granger89, author = "C. Granger", title = "Combining forecasts-twenty years later", journal = "Journal of Forecasting", year = 1989, volume = "8", number = "", pages = "167-173", note = "" } @Book{ greenberg93, author = "S. Greenberg", title = "The Computer User as Toolsmith: the Use, Reuse, and Organization or Computer-Based Tools", publisher = "Cambridge University Press", address = "", year = 1993 } @Book{ greenberg93, author = "S. Greenberg", title = "The Computer User as Toolsmith: the Use, Reuse, and Organization or Computer-Based Tools", publisher = "Cambridge University Press", address = "", year = 1993 } @Article{ gregory92, author = "P. C. Gregory and T. J. Loredo", title = "A new method for the detection of a periodic signal of unknown shape and period", journal = "Astrophys. J.", year = 1992, volume = "398", pages = "146-168" } @InCollection{ gull88, author = "S.F. Gull", title = "Bayesian inductive inference and maximum entropy", booktitle = "Maximum entropy and Bayesian methods in science and engineering", editor = "G. J. Erickson and C. R. Smith", publisher = "Kluwer", address = "Dordrecht", pages = "53-74", year = 1988 } @InCollection{ gull89, author = "S.F. Gull", title = "Developments in maximum entropy data analysis", booktitle = "Maximum entropy and Bayesian methods", editor = "J. Skilling", publisher = "Kluwer", address = "Dordrecht", pages = "53-71", year = 1989 } @InCollection{ gull89, author = "S.F. Gull", title = "Developments in maximum entropy data analysis", booktitle = "Maximum entropy and Bayesian methods", editor = "J. Skilling", publisher = "Kluwer", address = "Dordrecht", pages = "53-71", year = 1989 } @InCollection{ gull89, author = "S.F. Gull", title = "Developments in maximum entropy data analysis", booktitle = "Maximum Entropy and Bayesian Methods", editor = "J. Skilling", publisher = "Kluwer", address = "Dordrecht", pages = "53-71", year = 1989 } @Article{ hampsonbaldi01, author = "S. Hampson and D. Kibler and P. Baldi", title = "Distribution patterns of locally over-represented $k$-mers in non-coding yeast {DNA}", journal = "", year = 2001, volume = "", number = "", pages = "", note = "Submitted." } @Article{ hampsonbaldi02, author = "S. Hampson and D. Kibler and P. Baldi", title = "Distribution patterns of over-represented $k$-mers in non-coding yeast {DNA}", journal = "Bioinformatics", year = 2002, volume = "18", number = "4", pages = "611-626", note = "" } @InCollection{ hampsonismb00, author = "S. Hampson and P. Baldi and D. Kibler and S. Sandmeyer", title = "Analysis of yeast's {ORF}s upstream regions by parallel processing, microarrays, and computational methods", booktitle = "Proceedings of the 2000 Conference on Intelligent Systems for Molecular Biology (ISMB00), La Jolla, CA", publisher = "AAAI Press", pages = "190-201", address = "Menlo Park, CA", year = 2000 } @InCollection{ hampsonismb00, author = "S. Hampson and P. Baldi and D. Kibler and S. Sandmeyer", title = "Analysis of yeast's {ORF}s upstream regions by parallel processing, microarrays, and computational methods", booktitle = "Proceedings of the 2000 Conference on Intelligent Systems for Molecular Biology (ISMB00), La Jolla, CA", publisher = "AAAI Press", pages = "190-201", address = "Menlo Park, CA", year = 2000 } @Book{ han01, author = "J. Han and M. Kamber", title = "Data Mining: Concepts and Techniques", publisher = "Morgan Kaufmann", address = "San Francisco, CA", year = 2001 } @Book{ han01, author = "J. Han and M. Kamber", title = "Data Mining: Concepts and Techniques", publisher = "Morgan Kaufmann", address = "San Francisco, CA", year = 2001 } @Book{ hand01, author = "D. J. Hand and H. Mannila and P. Smyth", title = "Principles of Data Mining", publisher = "MIT Press", address = "Cambridge, MA", year = 2001 } @Book{ hand01, author = "D. J. Hand and H. Mannila and P. Smyth", title = "Principles of Data Mining", publisher = "MIT Press", address = "Cambridge, MA", year = 2001 } @Article{ hansen90, author = "L. Hansen and P. Salamon", title = "Neural network ensembles", journal = "IEEE Transactions on Pattern Analysis and Machine Intelligence", year = 1990, volume = "12", number = "10", pages = "993-1001" } @Book{ hastie01, author = "T. Hastie and R. Tibshirani and J. Friedman", title = "Elements of Statistical Learning: Data Mining, Inference, and Prediction", publisher = "Springer Verlag", address = "New York", year = 2001 } @Book{ hastie01, author = "T. Hastie and R. Tibshirani and J. Friedman", title = "Elements of Statistical Learning: Data Mining, Inference, and Prediction", publisher = "Springer Verlag", address = "New York", year = 2001 } @Article{ hastings70, author = "W. K. Hastings", title = " Monte {C}arlo sampling methods using {M}arkov chains and their applications", journal = "Biometrika", year = 1970, volume = "57", pages = "97-109" } @Article{ heckerman00, author = "D. Heckerman and D. M. Chickering and C. Meek and R. Rounthwaite and C. Kadie", title = "Dependency networks for inference, collaborative filtering, and data visualization", journal = "Journal of Machine Learning Research", year = 2000, volume = "1", number = "", pages = "49--75", note = "" } @Article{ heckerman00, author = "D. Heckerman and D. M. Chickering and C. Meek and R. Rounthwaite and C. Kadie", title = "Dependency networks for inference, collaborative filtering, and data visualization", journal = "Journal of Machine Learning Research", year = 2000, volume = "1", number = "", pages = "49--75", note = "" } @Article{ heckerman95, author = "D. Heckerman and D. Geiger and D. M. Chickering", title = "Learning {B}ayesian networks: the combination of knowledge and statistical data", journal = "Machine Learning", year = 1995, volume = "20", pages = "197-243" } @InCollection{ heckerman95, author = "D. Heckerman", title = "A Tutorial on Learning Bayesian Networks", booktitle = "Technical Report MSR-TR-95-06", editor = "", publisher = "Microsoft Corporation", address = "Redmond, WA", pages = "", year = 1995 } @Article{ heckerman97, author = "D. Heckerman", title = "{B}ayesian networks for data mining", journal = "Data Mining and Knowl. Discov.", year = 1997, volume = "1", pages = "79-119" } @Article{ heckerman97, author = "D. Heckerman", title = "{B}ayesian networks for data mining", journal = "Data Mining and Knowledge Discovery", year = 1997, volume = "1", pages = "79-119" } @InCollection{ heckerman98, author = {D. Heckerman}, title = {A tutorial on learning with {Bayesian} networks}, booktitle = {Learning in Graphical Models}, pages = {}, publisher = {Kluwer}, year = {1998}, editor = {M.I. Jordan}, address = {Dordrecht} } @Book{ hertz91, author = "J. Hertz and A. Krogh and R.G. Palmer", title = "Introduction to the Theory of Neural Computation", publisher = "Addison Wesley", address = "Redwood City, CA", year = 1991 } @InCollection{ hinton94, author = "G. E. Hinton and R. S. Zemel", title = "Autoencoders, minimum description length and {H}elmholtz free energy", booktitle = "Advances in Neural Information Processing Systems 6", editor = "J. D. Cowan and G. Tesauro and J. Alspector", publisher = "Morgan Kaufmann", address = "San Francisco, CA", pages = "3-10", year = 1994 } @Article{ hinton95, author = "G. E. Hinton and P. Dayan and B. J. Frey and R. M. Neal", title = "The wake-sleep algorithm for unsupervised neural networks", journal = "Science", year = 1995, volume = "268", number = "5214", pages = "1158-1161" } @InCollection{ hofmann99, author = "T. Hoffman", title = "Probabilistic latent semantic indexing", booktitle = "Proceedings of the 22nd Annual International ACM SIGIR Conference on Research and Development in Information Retrieval", volume = "", publisher = "ACM Press", address = "New York", number = "", pages = "50--57", year = 1999 } @InCollection{ hofmann99, author = "T. Hoffman", title = "Probabilistic latent semantic indexing", booktitle = "Proceedings of the 22nd Annual International ACM SIGIR Conference on Research and Development in Information Retrieval", volume = "", publisher = "ACM Press", address = "New York", number = "", pages = "50--57", year = 1999 } @Article{ hornik90, author = "K. Hornik and M. Stinchcombe and H. White", title = "Universal approximation of an unknown function and its derivatives using multilayer feedforward networks", journal = "Neural Networks", year = 1990, volume = "3", number = "", pages = "551-560" } @Article{ hornik94, author = "K. Hornik and M. Stinchcombe and H. White and P. Auer", title = "Degree of approximation results for feedforward networks approximating unknown mapppings and their derivatives", journal = "Neural Computation", year = 1994, volume = "6", number = "", pages = "1262-1275" } @InCollection{ huber67, author = "P. J. Huber", title = "The behavior of maximum likelihood estimates under nonstandard conditions", booktitle = "Proceedings Fifth Berkeley Symposium on Mathematical Statistics and Probability", editor = "", publisher = "", address = "", volume = "1", pages = "221-233", year = 1967 } @Book{ huberman01, author = "B. A. Huberman", title = "The Laws of the Web", publisher = "MIT Press", address = "Cambridge, MA", year = 2001 } @Article{ huberman98, author = "B. A. Huberman and P. L. T. Pirolli and J. E. Pitkow and R. M. Lukose", title = "Strong regularities in {W}orld {W}ide {W}eb surfing", journal = "Science", year = 1998, volume = "280", number = "", pages = "95--97", note = "" } @Article{ huberman98, author = "B. A. Huberman and P. L. T. Pirolli and J. E. Pitkow and R. M. Lukose", title = "Strong regularities in {W}orld {W}ide {W}eb surfing", journal = "Science", year = 1998, volume = "280", number = "", pages = "95--97", note = "" } @Article{ huberman99, author = "B. A. Huberman and L. A. Adamic", title = "Growth dynamics of the {W}orld {W}ide {W}eb", journal = "Nature", volume = "401", number = "", pages = "131", year = 1999 } @Article{ hunter74, author = "J. Hunter and R. Shotland", title = "Treating data collected by the small world method as a {M}arkov process", journal = "Social Forces", year = 1974, volume = "52", number = "", pages = "321" } @Unpublished{ ijcai99, author = {P. Baldi and S. Brunak and P. Frasconi and G. Pollastri and G. Soda}, title = {Bidirectional Dynamics for Protein Secondary Structure Prediction}, note = {IJCAI-99 workshop on neural, symbolic, and reinforcement methods for sequence learning (unpublished).}, optkey = {}, optmonth = {}, year = {1999}, optannote = {} } @Article{ isham81, author = "V. Isham", title = "An introduction to spatial point processes and {M}arkov random fields", journal = "Internat. Statist. Rev.", year = 1981, volume = "49", pages = "21-43" } @Article{ itti01, author = "L. Itti and C. Koch", title = "Computational modelling of visual attention", journal = "Nature Reviews Neuroscience", year = 2001, volume = "2", number = "", pages = "194-203" } @InCollection{ jaakkola97, author = "T. S. Jaakkola and I. Jordan", title = "Recursive algorithms for approximating probabilities in graphical models", booktitle = "Advances in Neural Information Processing Systems", editor = "M. C. Mozer and M. I. Jordan and T. Petsche", publisher = "MIT Press", address = "Cambridge, MA", volume = "9", pages = "487-493", year = 1997 } @InCollection{ jaakkolaismb99, author = "T. S. Jaakkola and M. Diekhans and D. Haussler", title = "Using the {F}isher kernel method to detect remore protein homologies", booktitle = "Proceedings of the Seventh International Conference on Intelligent Systems for Molecular Biology (ISMB99)", volume = "", editor = "T. Lengauer and R. Schneider and P. Bork and D. Brutlag and J. Glasgow and H. W. Mewes and R. Zimmer", pages = "149-155", publisher = "AAAI Press", address = "Menlo Park, CA", year = 1999 } @InCollection{ jaakkolanips99, author = "T. S. Jaakkola and D. Haussler", title = "Exploiting generative models in discriminative classifiers", booktitle = "Advances in Neural Information Processing Systems", volume = "11", pages = "487-493", editor = "M. S. Kearns and S. A. Solla and D. A. Cohn", publisher = "The MIT Press", address = "Cambridge, MA", year = 1999 } @Article{ jacobs91, author = "R. A. Jacobs and M. I. Jordan and S. J. Nowlan and G. E. Hinton", title = "Adaptive mixtures of local experts", journal = "Neural Computation", year = 1991, volume = "3", pages = "79-87" } @Article{ jacobs91, author = "R.A. Jacobs and M.I. Jordan and S.J. Nowlan and G.E. Hinton", title = "Adaptive mixtures of local experts", journal = "Neural Computation", year = 1991, volume = "3", pages = "79-87" } @Article{ janiszewski98, author = "C. Janiszewski", title = "The influence of display characteristics on visual exploratory behavior", journal = "Journal of Consumer Research", year = 1998, volume = "25", number = "", pages = "290--301", note = "" } @Article{ janiszewski98, author = "C. Janiszewski", title = "The influence of display characteristics on visual exploratory behavior", journal = "Journal of Consumer Research", year = 1998, volume = "25", number = "", pages = "290--301", note = "" } @Article{ jansen98, author = "B. J. Jansen and A. Spink and J. Bateman and T. Saracevic", title = " Real-life information retrieval: a study of user queries on the {W}eb", journal = "SIGIR Forum", year = 1998, volume = "32", number = "", pages = "5--17", note = "" } @Article{ jansen98, author = "B. J. Jansen and A. Spink and J. Bateman and T. Saracevic", title = " Real-life information retrieval: a study of user queries on the {W}eb", journal = "SIGIR Forum", year = 1998, volume = "32", number = "", pages = "5--17", note = "" } @Book{ jaynes03, author = "E. T. Jaynes", title = "Probability Theory: The Logic of Science", year = 2003, publisher = "Cambridge University Press", address = "Cambridge, UK", note = "In press" } @Article{ jaynes57a, author = "E. T. Jaynes", title = "Information theory and statistical mechanics", journal = "Physical Review", year = 1957, volume = "106", number = "4", pages = "620-630" } @Article{ jaynes57b, author = "E. T. Jaynes", title = "Information theory and statistical mechanics. {II}", journal = "Physical Review", year = 1957, volume = "108", number = "2", pages = "171-190" } @Article{ jaynes57b, author = "E. T. Jaynes", title = "Information theory and statistical mechanics. {II}", journal = "Physical Review", year = 1957, volume = "108", number = "2", pages = "171-190" } @Article{ jaynes68, author = "E. T. Jaynes", title = "Prior probabilities", journal = "IEEE Trans. Systems Sci. Cybernet.", year = 1968, volume = "4", pages = "227-241" } @InCollection{ jaynes86, author = "E. T. Jaynes", title = "Bayesian methods: General background", booktitle = "Maximum entropy and Bayesian methods in statistics", editor = "J. H. Justice", publisher = "Cambridge University Press", address = "Cambridge", pages = "1-25", year = 1986 } @InCollection{ jaynes86, author = "E.T. Jaynes", title = "Bayesian methods: General background", booktitle = "Maximum Entropy and Bayesian Methods in Statistics", editor = "J.H. Justice", publisher = "Cambridge University Press", address = "Cambridge", pages = "1-25", year = 1986 } @InCollection{ jaynes86, author = "E.T. Jaynes", title = "Bayesian methods: General background", booktitle = "Maximum Entropy and Bayesian Methods in Statistics", editor = "J.H. Justice", publisher = "Cambridge University Press", address = "Cambridge", pages = "1-25", year = 1986 } @Unpublished{ jaynes95, author = "E. T. Jaynes", title = "Probability Theory: The Logic of Science", year = 1994, note = "Unpublished." } @Article{ jeffreys92, author = "W. H. Jeffreys and J. O. Berger", title = "Ockham's razor and {B}ayesian analysis", journal = "Am. Sci.", year = 1992, volume = "80", pages = "64-72" } @Article{ jensen90, author = "F. V. Jensen and S. L. Lauritzen and K. G. Olesen", title = "Bayesian updating in causal probabilistic networks by local computations", journal = "Comput. Statist. Quart.", year = 1990, volume = "4", pages = "269-282" } @Book{ jensen96, author = "F. V. Jensen", title = "An Introduction to {B}ayesian Networks", year = 1996, publisher = "Springer Verlag", address = "New York" } @Article{ jeong00, author = {H. Jeong and B. Tomber and R. Albert and Z.N. Oltvai and A.-L. Barab\'asi}, title = {The large-scale organization of metabolic networks}, type = {}, journal = {Nature}, volume = {407}, number = {}, pages = {651-654}, note = {in press}, year = {2000} } @InCollection{ jordan97, author = "M. I. Jordan and Z. Ghahramani and L. K. Saul", title = "Hidden {M}arkov decision trees", booktitle = "Advances in Neural Information Processing Systems", editor = "M. C. Mozer and M. I. Jordan and T. Petsche", publisher = "MIT Press", address = "Cambridge, MA", volume = "9", pages = "501-507", year = 1997 } @Book{ jordan99, author = {M. I. Jordan }, note = {Edited Volume}, title = {Learning in Graphical Models}, publisher = {MIT Press}, year = {1999} } @Book{ jordan99, author = {M. I. Jordan (Editor)}, title = {Learning in Graphical Models}, publisher = {MIT Press}, year = {1999} } @Book{ jumarie90, author = "G. Jumarie", title = "Relative information", year = 1990, publisher = "Springer Verlag", address = "New York" } @Article{ kaelbling96, author = "L. P. Kaelbling and M. L. Littman and A. W. Moore", title = "Reinforcement learning: a survey", journal = "Journal of Artificial Intelligence Research", year = 1996, volume = "4", pages = "237-285" } @Book{ karlin68, author = "S. Karlin", title = "Total Positivity", publisher = "Stanford University Press", address = "Stanford, CA", year = 1968, pages = "" } @Article{ karlin81a, author = "S. Karlin and Y. Rinott", title = "Entropy inequalities for classes of probability distributions. I. The univariate case", journal = "Adv. Appl. Prob.", year = 1981, volume = "13", number = "", pages = "93-112" } @Article{ karlin81b, author = "S. Karlin and Y. Rinott", title = "Entropy inequalities for classes of probability distributions. II. The multivariate case", journal = "Adv. Appl. Prob.", year = 1981, volume = "13", number = "", pages = "325-351" } @InCollection{ kask01, author = {K. Kask and R. Dechter}, title = {Branch and bound with mini-bucket heuristics}, booktitle = {Proceedings International Joint Conference on Artificial Intelligence (IJCAI99)}, pages = {426-433}, publisher = {}, year = {1999}, editor = {}, address = {} } @Book{ kearns94, booktitle = "An introduction to computational learning theory", author = "M. J. Kearns and U. V. Vazirani", year = 1994, publisher = "MIT Press", address = "Cambridge, MA", volume = "", number = "", pages = "" } @InCollection{ keeler91, author = "J.D. Keeler and D. E. Rumelhart and Wee-Kheng Leow", title = "Integrated Segmentation and Recognition of Hand-Printed Numerals", booktitle = "Advances in Neural Information Processing Systems", volume = "3", editor = "R. Lippmann and J. Moody and D. Touretzky", publisher = "Morgan Kaufmann", address = "San Mateo, CA", pages = "557-563", year = 1991 } @Article{ killworth78, author = " P. Killworth and H. Bernard", title = "Reverse small world experiment", journal = "Social Networks", year = 1978, volume = "1", number = "", pages = "159" } @Article{ kirkpatrick83, author = "S. Kirkpatrick and C. D. Gelatt and M. P. Vecchi", title = "Optimization by simulated annealing", journal = "Science", year = 1983, volume = "220", pages = "671-680" } @InCollection{ kleinberg00, author = "J. Kleinberg", title = "The small-world phenomenon: an algorithmic perspective", booktitle = "Proceedings of the 32nd ACM Symposium on the Theory of Computing", publisher = "", pages = "", note = "", address = "", year = 2000 } @Article{ kleinberg00a, author = "J. Kleinberg", title = "Navigation in a small world", journal = "Nature", year = 2000, volume = "406", number = "", pages = "845" } @InCollection{ kleinberg01, author = "J. Kleinberg", title = "Small-world phenomena and the dynamic of information", booktitle = "Advances in Neural Information Processing Systems (NIPS)", publisher = "MIT Press", pages = "", note = "", number = "14", address = "Cambridge, MA", year = 2001 } @Article{ kleinberg01a, author = "J. Kleinberg and S. Lawrence", title = "The structure of the {W}eb", journal = "Science", year = 2001, volume = "294", number = "", pages = "1849-1850" } @InCollection{ kleinberg99z, author = "J. M. Kleinberg and R. Kumar and P. Raghavan and S. Rajagopalan and A. Tomkins", title = "The {W}eb as a graph: measurements, models, and methods", booktitle = "Proceedings of the International Conference on Combinatorics and Computing", publisher = "", address = "", pages = "", year = 1999, volume = "", number = "" } @Article{ korte78, author = "C. Korte and S. Milgram", title = "Acquaintance networks between racial groups: {A}pplication of the small world metho method", journal = "J. Personality and Social Psych.", year = 1978, volume = "15", number = "", pages = "101" } @Article{ krogh94, author = "A. Krogh and M. Brown and I. S. Mian and K. Sj{\"o}lander and D.~Haussler", title = "Hidden {Markov} models in computational biology: {A}pplications to protein modeling", optcrossref = "", optkey = "", journal = "J. Mol. Biol.", year = "1994", optvolume = "", pages = "1501--1531", optmonth = "", optnote = "", optannote = "" } @InCollection{ krogh95a, author = "A. Krogh and J. Vedelsby", title = " Neural network ensembles, cross validation and active learning", booktitle = "Advances in Neural Information Processing Systems", volume = "7", editor = "G. Tesauro and D. S. Touretzky and T. K. Leen", publisher = "MIT Press", address = "Cambridge, MA", pages = "231-238", year = 1995 } @Book{ kullback68, author = "S. Kullback", title = "Information theory and statistics", year = 1968, note = "(First Edition in 1959)", publisher = "Dover", address = "New York" } @InCollection{ kumar00, author = "S. R. Kumar and P. Raghavan and S. Rajagopalan and D. Sivakumar and A. Tomkins and E. Upfal", title = "Stochastic models for the {W}eb graph", booktitle = "Proceedings of the 41st Annual Symposium on the Foundations of Computer Science", publisher = "", address = "", pages = "", year = 2000, volume = "", number = "" } @InCollection{ kumar99, author = "S. R. Kumar and P. Raghavan and S. Rajagopalan and A. Tomkins", title = "Extracting large-scale knowledge bases from the web", booktitle = "Proceedings of the 25th VLDB Conference", publisher = "", address = "", pages = "", year = 1999, volume = "", number = "" } @InCollection{ kumar99b, author = "S. R. Kumar and P. Raghavan and S. Rajagopalan and A. Tomkins", title = "Trawling the {W}eb for emerging cyber communities", booktitle = "Proceedings of the 8th World Wide Web Conference", publisher = "", address = "", pages = "", year = 1999, volume = "", number = "" } @InProceedings{ kushmerick97wrapper, author = "Nicholas Kushmerick and Daniel S. Weld and Robert B. Doorenbos", title = "Wrapper Induction for Information Extraction", booktitle = "International Joint Conference on Artificial Intelligence ({IJCAI})", pages = "729--737", year = "1997", url = "citeseer.nj.nec.com/kushmerick97wrapper.html" } @InProceedings{ lafferty01conditional, author = "John Lafferty and Andrew McCallum and Fernando Pereira", title = "Conditional Random Fields: {P}robabilistic Models for Segmenting and Labeling Sequence Data", booktitle = "Proc. 18th International Conf. on Machine Learning", publisher = "Morgan Kaufmann, San Francisco, CA", pages = "282--289", year = "2001", url = "citeseer.nj.nec.com/article/lafferty01conditional.html" } @Article{ lanczos50, author = "C. Lanczos", year = "1950", title = "An Iteration Method for the Solution of the Eigenvalue Problem of Linear Differential and Integral Operators", journal = "Journal of Research of the National Bureau of Standards", volume = "45", pages = "255--282" } @Article{ lapedes86, author = "A. Lapedes and R. Farber", title = "A self-optimizing, nonsymmetrical neural net for content addressable memory and pattern recognition", journal = "Physica", year = 1986, volume = "22D", pages = "247-259" } @InCollection{ lau99, author = "T. Lau and E. Horvitz", title = "Patterns of search: analyzing and modeling {W}eb query refinement", booktitle = "Proceedings of the Seventh International Conference on User Modeling", volume = "", publisher = "Springer-Verlag", address = "", number = "", pages = "119--128", year = 1999 } @InCollection{ lau99, author = "T. Lau and E. Horvitz", title = "Patterns of search: analyzing and modeling {W}eb query refinement", booktitle = "Proceedings of the Seventh International Conference on User Modeling", volume = "", publisher = "Springer-Verlag", address = "", number = "", pages = "119--128", year = 1999 } @Article{ lauritzen88, author = "S. L. Lauritzen and D. J. Spiegelhalter", title = "Local computations with probabilities on graphical structures and their application to expert systems", journal = "J. Royal Statis. Soc. B", year = 1988, volume = "50", pages = "157-224" } @Article{ lauritzen90, author = "S. L. Lauritzen and A. P. Dawid and B. N. Larsen and H. G. Leimer", title = "Independence properties of directed {M}arkov fields", journal = "Networks", year = 1990, volume = "20", pages = "491-505" } @Book{ lauritzen96, author = "S. L. Lauritzen", title = "Graphical Models", year = 1996, publisher = "Oxford University Press", address = "Oxford, UK" } @Book{ lauritzen96, author = "S. L. Lauritzen", title = "Graphical Models", year = 1996, publisher = "Oxford University Press", address = "Oxford" } @Book{ lauritzen96, author = {S. L. Lauritzen}, title = {Graphical Models}, publisher = {Oxford University Press}, year = {1998} } @Article{ lawrence99, author = {S. Lawrence and C. L. Giles}, title = {Acccessibility of information on the web}, type = {}, journal = {Nature}, volume = {400}, number = {}, pages = {107-109}, year = {1999} } @InCollection{ lecun90, author = "Y. Le Cun and B. Boser and J. Denker and D. Henderson and R. Howard and W. Hubbard and L. Jackel", title = "Handwritten digit recognition with a back-propagation network", booktitle = "Advances in Neural Information Processing Systems", editor = "D. Touretzky", publisher = "Morgan Kaufmann", address = "San Mateo, CA", pages = "396-404", year = 1990 } @InCollection{ lecun90, author = "Y. Le Cun and B. Boser and J. Denker and D. Henderson and R. Howard and W. Hubbard and L. Jackel", title = "Handwritten digit recognition with a back-propagation network", booktitle = "Advances in Neural Information Processing Systems", editor = "D. Touretzky", publisher = "Morgan Kaufmann", address = "San Mateo, CA", pages = "396-404", year = 1990 } @Misc{ leek97information, author = "T. R. Leek", title = "Information extraction using hidden {M}arkov models", address = "UC San Diego", howpublished = "Master's thesis, UC San Diego", year = "1997", url = "citeseer.nj.nec.com/leek97information.html" } @Article{ letsche97, author = "Todd A. Letsche and Michael W. Berry", title = "Large-Scale Information Retrieval with Latent Semantic Indexing", journal = "Information Sciences", volume = "100", number = "1-4", pages = "105-137", year = "1997", url = "citeseer.nj.nec.com/letsche97largescale.html" } @Article{ letsche97, author = "Todd A. Letsche and Michael W. Berry", title = "Large-Scale Information Retrieval with Latent Semantic Indexing", journal = "Information Sciences", volume = "100", number = "1-4", pages = "105-137", year = "1997", url = "citeseer.nj.nec.com/letsche97largescale.html" } @Article{ lewicki94, author = "M. S. Lewicki", title = "Bayesian Modeling and Classification of Neural Signals", journal = "Neural Computation", year = 1994, volume = "6", pages = "1005-1030" } @Unpublished{ li02, author = "S. Li and A. Montgomery and K. Srinivasan and J. L. Liechty", title = "Predicting online purchase conversion using {W}eb path analysis", journal = "", year = 2002, volume = "", number = "", pages = "", note = "Graduate School of Industrial Administration, Carnegie Mellon University, Pittsburgh, PA, preprint" } @Unpublished{ li02, author = "S. Li and A. Montgomery and K. Srinivasan and J. L. Liechty", title = "Predicting online purchase conversion using {W}eb path analysis", journal = "", year = 2002, volume = "", number = "", pages = "", note = "Graduate School of Industrial Administration, Carnegie Mellon University, Pittsburgh, PA, preprint" } @Article{ li92, author = "W. Li", title = "Random texts exhibit {Z}ipf's-law-like word frequency dsitribution", journal = "IEEE Transactions on Information Theory", year = 1992, volume = "38", number = "6", pages = "1842-1845" } @Article{ li92, author = "W. Li", title = "Random texts exhibit {Z}ipf's-law-like word frequency dsitribution", journal = "IEEE Transactions on Information Theory", year = 1992, volume = "38", number = "6", pages = "1842-1845" } @Article{ linsker89, author = "R. Linsker", title = "How to generate ordered maps by maximizing the mutual information between input and output signals", journal = "Neural Computation", year = 1989, volume = "1", number = "", pages = "402-411" } @Book{ little87, author = "R. J. A. Little and D. B. Rubin", title = "Statistical Analysis with Missing Data", publisher = "John Wiley", address = "", year = 1987 } @Book{ little87, author = "R. J. A. Little and D. B. Rubin", title = "Statistical Analysis with Missing Data", publisher = "John Wiley", address = "", year = 1987 } @Article{ longbaldibio, author = "A. D. Long and H. J. Mangalam and B. Y. Chan and L. Tolleri and G. W. Hatfield and P. Baldi", title = "Global gene expression profiling in {\it Escherichia coli} {K}12: {I}mproved statistical inference from {DNA} microarray data using analysis of variance and a {B}ayesian statistical framework ", journal = "Journal of Biological Chemistry", year = 2001, volume = "276", number = "23", pages = "19937-19944" } @Article{ longbaldibio, author = "A. D. Long and H. J. Mangalam and B. Y. Chan and L. Tolleri and G. W. Hatfield and P. Baldi", title = "Global gene expression profiling in {\it Escherichia coli} {K}12: {I}mproved statistical inference from {DNA} microarray data using analysis of variance and a {B}ayesian statistical framework ", journal = "Journal of Biological Chemistry", year = 2001, volume = "276", number = "23", pages = "19937-19944" } @Article{ mackay92, author = "D. J. C. MacKay", title = "Bayesian interpolation", journal = "Neural Computation", year = 1992, volume = "4", pages = "415-447" } @Article{ mackay92, author = "D.J.C. MacKay", title = "Bayesian interpolation", journal = "Neural Computation", year = 1992, volume = "4", number = "3", pages = "415-447" } @Article{ mackay95, author = "D. J. C. MacKay and L. Peto", title = "A Hierarchical {D}irichlet Language Model", journal = "Natural Language Engineering", volume = "1", number = "3", pages = "1--19", year = "1995" } @Article{ mackay95, author = "D. J. C. MacKay and L. Peto", title = "A Hierarchical {D}irichlet Language Model", journal = "Natural Language Engineering", volume = "1", number = "3", pages = "1--19", year = "1995" } @Article{ mackay95p, author = "D. J. C. MacKay and L. C. Bauman Peto", title = "A hierarchical {D}irichlet language model", journal = "Nat. Lang. Eng.", year = 1995, volume = "1", pages = "1-19" } @Unpublished{ mackay97a, author = "M. N. Gibbs and D.J.C. MacKay", title = "Efficient implementation of {G}aussian processes", note = "Draft manuscript", year = 1997 } @Unpublished{ mackay97b, author = "M. N. Gibbs and D.J.C. MacKay", title = "Variational {G}aussian process classifiers", note = "Draft manuscript", year = 1997 } @Article{ mackay99, author = "D. J. C. MacKay", title = "Comparison of approximate methods for handling hyperparameters", journal = "Neural Computation", year = 1999, volume = "11", pages = "1035-1068" } @Article{ mackaynn92, author = "D. J. C. MacKay", title = "A practical {B}ayesian framework for back-propagation networks", journal = "Neural Computation", year = 1992, volume = "4", pages = "448-472" } @Article{ mackaynn92, author = "D.J.C. MacKay", title = "A practical {B}ayesian framework for back-propagation networks", journal = "Neural Computation", year = 1992, volume = "4", number = "3", pages = "448-472" } @InProceedings{ mackaynn94, key = "", author = "D. J. C. MacKay", title = "Density Networks and their Application to Protein Modelling", booktitle = "Maximum Entropy and {B}ayesian Methods", editor = "J. Skilling and S. Sibisi", publisher = "Kluwer", address = "Dordrecht", year = "1996", pages = "259-268" } @InProceedings{ mackaynn94, key = "", author = "D. J. C. MacKay", title = "Density Networks and their Application to Protein Modelling", booktitle = "Maximum Entropy and {B}ayesian Methods, {C}ambridge 1994", editor = "J. Skilling and S. Sibisi", publisher = "Kluwer", address = "Dordrecht", year = "1996", pages = "259-268", annote = "Date submitted: ; Date accepted: ; Collaborating institutes: MRC Laboratory of Molecular Biology, Cambridge" } @Article{ mackaynn95, author = "D. J. C. MacKay", title = "Bayesian Neural Networks and Density Networks", journal = "Nucl. Inst. Meth. Phys. Res. A", volume = 354, year = 1995, pages = "73-80" } @Article{ mackaynn95, author = "D. J. C. MacKay", title = "Bayesian Neural Networks and Density Networks", journal = "Nuclear Instruments and Methods in Physics Research, Section A", volume = 354, number = 1, year = 1995, pages = "73-80", annote = "Date submitted: 1994; Date accepted: 1994; Collaborating institutes: none" } @Article{ mackayt97, author = "D. J. C. MacKay and R. J. McEliece and J. F. Cheng", title = "Turbo decoding as an instance of {P}earl's belief propagation algorithm", journal = "IEEE J. Sel. Areas Commun.", year = 1997, volume = "", pages = "", note = "In press" } @Article{ mahmoud95, author = "H. M. Mahmoud and R. T. Smythe", title = "A survey of recursive trees", journal = "Theory Probability Math Statist", year = 1995, volume = "51", number = "", pages = "1-27" } @Book{ mandelbrot77, author = "B. Mandelbrot", title = "Fractals: Form, Chance, and Dimension", publisher = "Freeman", address = "New York", year = 1977, volume = "", note = "", pages = "" } @Article{ maslov02, author = "S. Maslov and K. Sneppen", title = "Specificity and Stability in Topology of Protein Networks", journal = "Science", year = 2002, volume = "296", number = "5569", pages = "910-913" } @InProceedings{ mateescu-sosonkina-thompson, author = "Mateescu, G. and Sosonkina, M. and Thompson, P.", title = "A new model for probabilistic information retrieval on the web", booktitle = "Workshop on Web Analytics", editor = "Ghosh, J. and Srivastava, J.", publisher = "Second SIAM International Conference on Data Mining", pages = "17-27", year = "2002", address = "Arlington, VA" } @InProceedings{ mcallum00maximum, author = "Andrew Mc{C}allum and Dayne Freitag and Fernando Pereira", title = "Maximum Entropy {M}arkov Models for Information Extraction and Segmentation", booktitle = "Proc. 17th International Conf. on Machine Learning", publisher = "Morgan Kaufmann, San Francisco, CA", pages = "591--598", year = "2000", url = "citeseer.nj.nec.com/mccallum00maximum.html" } @Article{ mccallum00automating, author = "Andrew K. McCallum and Kamal Nigam and Jason Rennie and Kristie Seymore", title = "Automating the Construction of Internet Portals with Machine Learning", journal = "Information Retrieval", volume = "3", number = "2", publisher = "Kluwer Academic Publishers", pages = "127--163", year = "2000", url = "citeseer.nj.nec.com/196313.html" } @InProceedings{ mccallum00efficient, author = "Andrew McCallum and Kamal Nigam and Lyle H. Ungar", title = "Efficient clustering of high-dimensional data sets with application to reference matching", booktitle = "Proceedings of the Sixth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining", pages = "169-178", year = "2000", publisher = "ACM Press", address = "New York", url = "citeseer.nj.nec.com/article/mccallum00efficient.html" } @InProceedings{ mccallum00efficient, author = "Andrew McCallum and Kamal Nigam and Lyle H. Ungar", title = "Efficient clustering of high-dimensional data sets with application to reference matching", booktitle = "Proceedings of the Sixth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining", pages = "169-178", year = "2000", publisher = "ACM Press", address = "New York", url = "citeseer.nj.nec.com/article/mccallum00efficient.html" } @Article{ mccann98, author = "K. McCann and A. Hastings and G. R. Huxel", title = "Weak trophic interactions and the balance of nature", journal = "Nature", year = 1998, volume = "395", number = "", pages = "794-798" } @Book{ mccullagh89, author = "P. McCullagh and J. A. Nelder", title = "Generalized Linear Models", year = 1989, publisher = "Chapman and Hall", address = "London, UK" } @InCollection{ mceliece02, author = {R. J. McEliece and M. Yildirim}, title = {Belief propagation on partially ordered sets}, booktitle = {Mathematical Sistems Theory in Biology, Communications, and Finance}, pages = {}, publisher = {IMA}, year = {2002}, editor = {D. Gilliam and J. Rosenthal}, address = {University of Minnesota} } @Book{ mceliece77, author = "R. J. McEliece", title = "The Theory of Information and Coding", year = 1977, publisher = "Addison-Wesley Publishing Company", address = "Reading, MA" } @Book{ mclachlan00, author = "G. McLachlan and D. Peel", title = "Finite Mixture Models", publisher = "Wiley", year = 2000, volume = "", note = "", pages = "" } @Book{ mclachlan00, booktitle = "Finite Mixture Models", author = "G. McLachlan and D. Peel", year = 2000, publisher = "Joh Wiley and Sons", address = "New York, NY", volume = "", number = "", pages = "" } @Article{ mclysaght02, author = "A. McLysaght and B. Gaut and S. Hampson and P. Baldi", title = "LineUp: Statistical Detection of Chromosomal Homology with Applications to Plant Comparative Genomics", journal = "", year = 2002, volume = "", number = "", note = "submitted", pages = "" } @Book{ mena99, author = "J. Mena", title = "Data Mining your Website", publisher = "Digital Press", address = "Boston", year = 1999 } @Book{ mena99, author = "J. Mena", title = "Data Mining your Website", publisher = "Digital Press", address = "Boston", year = 1999 } @InCollection{ meng92, author = "X. L. Meng and D. B. Rubin", title = "Recent extensions to the {EM} algorithm", booktitle = "Bayesian statistics", editor = "J. M. Bernardo and J. O. Berger and A. P. Dawid and A. F. M. Smith", publisher = "Oxford University Press", address = "Oxford", volume = "4", pages = "307-320", year = 1992 } @Article{ metropolis53, author = "N. Metropolis and A. W. Rosenbluth and M. N. Rosenbluth and A. H. Teller and E. Teller", title = "Equations of state calculations by fast computing machines", journal = "J. Chem. Phys.", year = 1953, volume = "21", pages = "1087-1092" } @Article{ micheli01, author = "A. Micheli and A. Sperduti and A. Starita and A. M. Bianucci", title = "Analysis of the internal representations developed by neural networks for structures applied to quantitative structure-activity relationship studies of benzodiazepines", journal = "J. Chem. Inf. Comput. Sci.", year = 2001, volume = "41", number = "", pages = "202-218" } @Article{ milgram67, author = "S. Milgram", title = "The small world problem", journal = "Psychology Today", year = 1967, volume = "1", number = "", pages = "61" } @Article{ milo02, author = "R. Milo and S. Shen-Orr and S. Itzkovitz and N. Kashtan and D. Chklovskii and U. Alon", title = "Network motifs: simple building blocks of complex networks", journal = "Science", year = 2002, volume = "298", number = "", pages = "824-827" } @InCollection{ mitzenmacher01, author = "M. Mitzenmacher", title = "A brief history of generative models for power law and log normal distributions", booltitle = "Proceedings of the 39th Annual Allerton Conference on Communication, Control, and Computing", volume = "", number = "", pages = "182-191", year = 2001 } @Article{ mitzenmacher02, author = "M. Mitzenmacher", title = "A brief history of generative models for power law and lognormal distributions", journal = "", volume = "", number = "", pages = "", note = "Technical Report", year = 2002 } @Article{ modestino92, author = "J. M. Modestino and J. Zhang", title = "A {M}arkov random field model-based approach to image interpretation", journal = "IEEE Trans. Pattern Anal. Machine Intell.", year = 1992, volume = "14", pages = "606-615" } @Book{ moll88, author = "R. N. Moll and M. A. Arbib and A. J. Kfoury", title = "An Introduction to Formal Language Theory", publisher = "Springer-Verlag", address = "New York", year = 1988, volume = "", note = "", pages = "" } @Article{ montgomery01, author = "A. L. Montgomery", title = "Applying quantitative marketing techniques to the Internet", journal = "Interfaces", year = 2001, volume = "30", number = "", pages = "90--108", note = "" } @Article{ montgomery01, author = "A. L. Montgomery", title = "Applying quantitative marketing techniques to the Internet", journal = "Interfaces", year = 2001, volume = "30", number = "", pages = "90--108", note = "" } @InCollection{ mooney00, author = "R. J. Mooney and L. Roy", title = "Content-based book recommending using learning for text categorization", booktitle = "Proceedings of the Fifth ACM Conference on Digital Libraries", volume = "", publisher = "ACM Press", address = "New York", number = "", pages = "195--204", year = 2000 } @InCollection{ mooney00, author = "R. J. Mooney and L. Roy", title = "Content-based book recommending using learning for text categorization", booktitle = "Proceedings of the Fifth ACM Conference on Digital Libraries", volume = "", publisher = "ACM Press", address = "New York", number = "", pages = "195--204", year = 2000 } @Article{ muller96, author = "K. R. Muller and M. Finke and N. Murata and K. Schulten and S. Amari", title = "A numerical study on learning curves in stochastic multilayer feedforward networks", journal = "Neural Computation", year = 1996, volume = "8", pages = "1085-1106" } @InCollection{ murata93, author = "N. Murata and S. Yoshizawa and S. Amari", title = "Learning curves, model selection and complexity of neural networks", booktitle = "Advances in Neural Information Processing Systems 5", editor = "S. J. Hanson and J. D. Cowan and C. Lee Giles", publisher = "Morgan Kaufmann", address = "San Mateo, CA", pages = "607-614", year = 1993 } @InCollection{ nardis02, author = "L. De Nardis and M. G. Di Benedetto and P. Baldi", title = "Ad-hoc networking in {UWB} systems", booktitle = "International Workshop on 3G Infrastructure and Services, Athens, Greece, July 2-3", publisher = "", pages = "", note = "Submitted", address = "", year = 2002 } @InCollection{ nardis02, author = "L. De Nardis and M. G. Di Benedetto and P. Baldi", title = "Ad-hoc networking in {UWB} systems", booktitle = "International Workshop on 3G Infrastructure and Services, Athens, Greece, July 2-3", publisher = "", pages = "", note = "Submitted", address = "", year = 2002 } @InCollection{ nardis02a, author = "L. De Nardis and P. Baldi and M. G. Di Benedetto", title = "{UWB} Ad-Hoc Networks", booktitle = "Proceedings of the 2002 IEEE Conference on Ultra Wideband Systems and Technologies (UWBST2002)", publisher = "", pages = "", note = "", address = "Baltimore, MD", year = 2002 } @Article{ neal92, author = "Radford M. Neal", title = "Connectionist learning of belief networks", journal = "Artificial Intelligence", year = 1992, volume = "56", number = "", pages = "71-113" } @Article{ neal92, author = "Radford M. Neal", title = "Asymmetric parallel boltzmann machines are belief networks", journal = "Neural Computation", year = 1992, volume = "4", pages = "832-834" } @Unpublished{ neal93a, author = "R. M. Neal", title = "Probabilistic inference using {M}arkov chain {M}onte {C}arlo methods", note = "Technical report. Department of Computer Science, University of Toronto", year = 1993 } @Unpublished{ neal94, author = "R. M. Neal", title = "Priors for Infinite Networks", note = "Technical Report CRG-TR-94-1, Department of Computer Science, University of Toronto", year = 1994 } @PhDThesis{ neal95, author = "R. M. Neal", title = "Bayesian learning for neural networks", school = "Department of Computer Science, University of Toronto", year = 1995 } @Book{ neal96, author = "R. M. Neal", title = "Bayesian learning for neural networks", year = 1996, publisher = "Springer-Verlag", address = "New York" } @Book{ neal96, booktitle = "Bayesian Learning for Neural Networks", author = "R. M. Neal", year = 1996, publisher = "Springer Verlag", address = "New York, NY", volume = "", number = "", pages = "" } @Unpublished{ neal97, author = "R. M. Neal", title = "Monte {C}arlo implementation of {G}aussian process models for {B}ayesian regression and classification", note = "Technical Report no. 9702. Department of Statistics, University of Toronto", year = 1997, volume = "", pages = "" } @Unpublished{ neal97, author = "R. M. Neal", title = "Monte {C}arlo implementation of {G}aussian process models for {B}ayesian regression and classification", note = "Technical Report No. 9702, Department of Computer Science, University of Toronto", year = 1997 } @Article{ newman00, author = "M. E. J. Newman and C. Moore and D. J. Watts", title = "Mean-field solution of the small-world network model", journal = "Phys. Rev. Lett.", year = 2000, volume = "84", number = "", pages = "3201-3204" } @Book{ norumelhart86, booktitle = "Parallel Distributed Processing", editors = "J. L. McClelland and D. E. Rumelhart and the PDP Research Group", year = 1986, publisher = "MIT Press", address = "Cambridge, MA" } @Article{ nothdurft00, author = "H. Nothdurft", title = "Salience from feature contrast: additivity across dimensions", journal = "Vision Research", year = 2000, volume = "40", number = "", pages = "41183-1201" } @Article{ olshausen93, author = "B. A. Olshausen and C. H. Anderson and D. C. Van Essen", title = "A neurobiological model of visual attention and invariant pattern recognition based on dynamic routing of information", journal = "The Journal of Neuroscience", year = 1993, volume = "13", number = "11", pages = "4700-4719" } @Article{ oltvai02, author = "Z. N. Oltvai and A. Barab\'asi", title = "Life's complexity pyramid", journal = "Science", year = 2002, volume = "298", number = "", pages = "763-764" } @Article{ paine92, author = "R. T. Paine", title = "Food-web analysis through field measurements of per capita interaction strength", journal = "Nature", year = 1992, volume = "355", number = "", pages = "73-75" } @InCollection{ pandurangan02, author = "G. Pandurangan and P. Raghavan and E. Upfal", title = "Using {P}age{R}ank to characterize web structure", booktitle = "Proceedings 8th Annual International Computing and Combinatorics Conference (COCOON)", year = 2002, volume = "", number = "", pages = "" } @Book{ pearl88, author = "J. Pearl", title = "Probabilistic reasoning in intelligent systems", year = 1988, publisher = "Morgan Kaufmann", address = "San Mateo, CA" } @Book{ pearl88, author = "J. Pearl", title = "Probabilistic Reasoning in Intelligent Systems: Networks of Plausible Inference", year = 1988, publisher = "Morgan Kaufmann", address = "San Mateo, CA." } @Article{ pennock02, author = "D. M. Pennock and G. W. Flake and S. Lawrence and E. J. Glover and C. Lee Giles", title = "Winners don't take all: {C}haracterizing the competition for links on the web", journal = "Proceedings of the National Academy of Sciences", year = 2002, volume = "99", number = "", pages = "5207-5211" } @Article{ perline96, author = "R. Perline", title = "Zipf's law, the central limit theorem, and the random division of the unit interval", journal = "Physical review E", year = 1996, volume = "54", number = "1", pages = "220-223" } @InCollection{ perrone94, author = "M. P. Perrone and L. N. Cooper", title = "When networks disagree: ensemble method for neural networks", booktitle = "Neural Networks for Speech and Image Processing", editor = "R. J. Mammone", publisher = "Chapman and Hall", address = "London", note = "Chapter 10", year = 1994 } @Book{ phadke88, author = "A. G. Phadke and J. S. Thorp", title = "Computer Relaying for Power Systems", publisher = "Wiley", address = "New York", year = 1988 } @Article{ philips90, author = "T. K. Philips and D. F. Towsley and J. K. Wolf", title = "On the diameter of a class of random graphs", journal = "IEEE Transactions on Information Theory", year = 1990, volume = "36", number = "2", pages = "285-288" } @Article{ pimm91, author = "S. L. Pimm and J. H. Lawton and J. E. Cohen", title = "Food web patterns and their consequences", journal = "Nature", year = 1991, volume = "350", number = "", pages = "669-674" } @Article{ pitkow98, author = "J. E. Pitkow", title = "Summary of {WWW} characterizations", journal = "Computer Networks and ISDN Systems", year = 1998, volume = "30", number = "", pages = "551--558", note = "" } @Article{ pitkow98, author = "J. E. Pitkow", title = "Summary of {WWW} characterizations", journal = "Computer Networks and ISDN Systems", year = 1998, volume = "30", number = "", pages = "551--558", note = "" } @Article{ pittel94, author = "B. Pittel", title = "Note on the heights of random recursive trees and random $m$-ary search trees", journal = "Random Structures and Algorithms", volume = "5", number = "", pages = "337-347", note = "", year = 1994 } @InCollection{ popescul01, author = "A. Popescul and L. H. Ungar and D. M. Pennock and S. Lawrence", title = "Probabilistic models for unified collaborative and content-based recommendation in sparse-data environments", booktitle = "Proceedings of the 17th International Conference on Uncertainty in Artificial Intelligence", volume = "", publisher = "Morgan Kaufmann", address = "San Francisco, CA", number = "", editor = "", pages = "437--444", year = 2001 } @InCollection{ popescul01, author = "A. Popescul and L. H. Ungar and D. M. Pennock and S. Lawrence", title = "Probabilistic models for unified collaborative and content-based recommendation in sparse-data environments", booktitle = "Proceedings of the 17th International Conference on Uncertainty in Artificial Intelligence", volume = "", publisher = "Morgan Kaufmann", address = "San Francisco, CA", number = "", editor = "", pages = "437--444", year = 2001 } @Book{ pratt95, author = "J. W. Pratt and H. Raiffa and R. Schlaifer", title = "Introduction to statistical decision theory", year = 1995, publisher = "MIT Press", address = "Cambridge, MA" } @Book{ press88, author = "W. H. Press and B. P. Flannery and S. A. Teukolsky and W. T. Vetterling", title = "Numerical Recipes in C", publisher = "Cambridge University Press", address = "Cambridge, UK", year = 1988, volume = "", note = "", pages = "" } @Book{ press89, author = "S. J. Press", title = "Bayesian statistics: {P}rinciples, models, and applications", year = 1989, publisher = "John Wiley", address = "New York" } @InCollection{ puzicha99, author = "T. Hofmann and J. Puzicha", title = "Latent class models for collaborative filtering", booktitle = "Proceedings of the 1999 International Joint Conference on Artificial Intelligence", volume = "", publisher = "", address = "", number = "", pages = "688--693", year = 1999 } @InCollection{ puzicha99, author = "T. Hofmann and J. Puzicha", title = "Latent class models for collaborative filtering", booktitle = "Proceedings of the 1999 International Joint Conference on Artificial Intelligence", volume = "", publisher = "", address = "", number = "", pages = "688--693", year = 1999 } @Article{ redner98, author = "S. Redner", title = "How popular is your paper? {A}n empirical study of the citation distribution", journal = "Euro. Phys. J. B", year = 1998, volume = "4", number = "", pages = "131-134" } @InCollection{ resnick94, author = "P. Resnick and N. Iacovou and M. Suchak and P. Bergstrom and J. Riedl", title = "Group{L}ens: an open architecture for collaborative filtering of netnews", booktitle = "Proceedings of the Ninth ACM Conference on Computer-Supported Cooperative Work", volume = "", publisher = "ACM Press", address = "New York", number = "", editor = "", pages = "175--186", year = 1994 } @InCollection{ resnick94, author = "P. Resnick and N. Iacovou and M. Suchak and P. Bergstrom and J. Riedl", title = "Group{L}ens: an open architecture for collaborative filtering of netnews", booktitle = "Proceedings of the Ninth ACM Conference on Computer-Supported Cooperative Work", volume = "", publisher = "ACM Press", address = "New York", number = "", editor = "", pages = "175--186", year = 1994 } @TechReport{ roberts99, author = "M. J. Roberts and S. M. Mahesh", title = "Hotmail", journal = "", year = 1999, volume = "", number = "", pages = "", note = "HBS Case 899-185, Harvard Business School Publishing", institution = "Harvard University, Cambridge, MA" } @TechReport{ roberts99, author = "M. J. Roberts and S. M. Mahesh", title = "Hotmail", journal = "", year = 1999, volume = "", number = "", pages = "", note = "HBS Case 899-185, Harvard Business School Publishing", institution = "Harvard University, Cambridge, MA" } @Book{ ross02, author = "Sheldon M. Ross", title = "Probability Models for Computer Science", publisher = "Adademic Press", address = "San Diego, CA", year = 2002 } @Book{ ross02, author = "Sheldon M. Ross", title = "Probability Models for Computer Science", publisher = "Adademic Press", address = "San Diego, CA", year = 2002 } @Article{ rostbaldi01, author = "B. Rost and P. Baldi", title = "New improvements in protein secondary structure prediction", journal = "Briefings in Bioinformatics", year = 2001, volume = "", number = "", pages = "", note = "In press" } @Article{ rostbaldi01, author = "B. Rost and P. Baldi", title = "New improvements in protein secondary structure prediction", journal = "Briefings in Bioinformatics", year = 2001, volume = "", number = "", pages = "", note = "In press" } @InCollection{ roweis98, author = "S. Roweis", title = "E{M} algorithm for {PCS} and {SPCA}", booktitle = "Advances in Neural Information Processing Systems", volume = "10", pages = "626-632", editor = "M. I. Jordan and M. J. Kearns and S. A. Solla", publisher = "The MIT Press", address = "Cambridge, MA", year = 1998 } @InCollection{ rumelhart95, author = "D.E. Rumelhart and R. Durbin and R. Golden and Y. Chauvin", title = "Backpropagation: the basic theory", booktitle = "Backpropagation: Theory, Architectures and Applications", editors = "Y. Chauvin and D. E. Rumelhart", year = 1995, publisher = "Lawrence Erlbaum Associates", address = "Hillsdale, NJ", volume = "", number = "", pages = "1-34" } @Article{ sakakibara94nar, title = "Stochastic Context-Free Grammars for t{RNA} modeling", author = {Y. Sakakibara and M. Brown and R. Hughey and I. S. Mian and K. Sj{\"o}lander and R. C. Underwood and D. Haussler}, journal = "Nucl. Acids Res.", year = "1994", volume = 22, pages = "5112-5120" } @Article{ sarukkai00, author = "R. R. Sarukkai", title = "Link prediction and path analysis using {M}arkov chains", journal = "Computer Networks", year = 2000, volume = "33", publisher = "", address = "", number = "", editor = "", pages = "377--386" } @Article{ sarukkai00, author = "R. R. Sarukkai", title = "Link prediction and path analysis using {M}arkov chains", journal = "Computer Networks", year = 2000, volume = "33", publisher = "", address = "", number = "", editor = "", pages = "377--386" } @InCollection{ sarwar00, author = "B. M. Sarwar and G. Karypis and J. A. Konstan and J. T. Riedl", title = "Analysis of recommender algorithms for e-commerce", booktitle = "Proceedings of the 2nd ACM Conference on Electronic Commerce", volume = "", publisher = "ACM Press", address = "New York", number = "", editor = "", pages = "158--167", year = 2000 } @InCollection{ sarwar00, author = "B. M. Sarwar and G. Karypis and J. A. Konstan and J. T. Riedl", title = "Analysis of recommender algorithms for e-commerce", booktitle = "Proceedings of the 2nd ACM Conference on Electronic Commerce", volume = "", publisher = "ACM Press", address = "New York", number = "", editor = "", pages = "158--167", year = 2000 } @Article{ saul94, author = "L. Saul and M. I. Jordan", title = "Learning in {B}oltzmann Trees", journal = "Neural Computation", year = 1994, volume = "6", number = "6", pages = "1174-1184" } @InCollection{ saul95, author = "L. Saul and M. I. Jordan", title = "Boltzmann chains and hidden markov models", booktitle = "Advances in Neural Information Processing Systems", volume = "7", note = "(IEEE)", editor = "J. D. Cowan and G. Tesauro and J. Alspector", publisher = "Morgan Kaufmann", address = "San Francisco, CA", year = 1995 } @InCollection{ saul96, author = "L. K. Saul and M. I. Jordan", title = "Exploiting tractable substructures in intractable networks", booktitle = "Advances in Neural Information Processing Systems", editor = "D. S. Touretzky and M. C. Mozer and M. E. Hasselmo", publisher = "MIT Press", address = "Cambridge, MA", volume = "8", pages = "486-492", year = 1996 } @Article{ saul99, author = "Lawrence K. Saul and Michael I. Jordan", title = "Mixed Memory Markov Models: Decomposing Complex Stochastic Processes as Mixtures of Simpler Ones", journal = "Machine Learning", volume = "37", number = "1", pages = "75--87", year = "1999" } @Article{ saul99, author = "Lawrence K. Saul and Michael I. Jordan", title = "Mixed Memory Markov Models: Decomposing Complex Stochastic Processes as Mixtures of Simpler Ones", journal = "Machine Learning", volume = "37", number = "1", pages = "75--87", year = "1999" } @Book{ savage72, author = "L. J. Savage", title = "The foundations of statistics", year = 1972, note = "(First Edition in 1954)", publisher = "Dover", address = "New York" } @Article{ schafer01, author = "J. B. Schafer and J. A. Konstan and J. Riedl", title = "E-commerce recommendation applications", journal = "Journal of Data Mining and Knowledge Discovery", year = 2001, volume = "5", number = "", pages = "115--153", note = "" } @Article{ schafer01, author = "J. B. Schafer and J. A. Konstan and J. Riedl", title = "E-commerce recommendation applications", journal = "Journal of Data Mining and Knowledge Discovery", year = 2001, volume = "5", number = "", pages = "115--153", note = "" } @InCollection{ scholkopf95, author = "B. Scholkopf and C. Burges and V. Vapnik", title = "Extracting support data for a given task", booktitle = "Proceedings First International Conference on Knowledge Discovery and Data Mining", volume = "", note = "", editor = "U. M. Fayyad and R. Uthurusamy", publisher = "AAAI Press", address = "Menlo Park, CA", year = 1995 } @Unpublished{ scholkopf96, author = "B. Scholkopf and A. Smola and K. Muller", title = "Nonlinear component analysis as a kernel eigenvalue problem", note = "Technical Report No. 44", publisher = "Max-Planck-Institut fur Biologische Kybernetik", address = "Tubingen", year = 1996 } @Article{ scholkopf98, author = "B. Scholkopf and A. smola and K. R. Muller", title = "Nonlinear component analysis as a kernel eigenvalue problem", journal = "Neural Computation", year = 1998, volume = "10", number = "", pages = "1299-1319" } @Article{ sen02, author = "R. Sen and M. H. Hansen", title = "Predicting a {W}eb user's next request based on log data", journal = "Journal of Computational Graphics and Statistics", year = 2002, volume = "", number = "", pages = "", note = "To appear" } @Article{ sen02, author = "R. Sen and M. H. Hansen", title = "Predicting a {W}eb user's next request based on log data", journal = "Journal of Computational Graphics and Statistics", year = 2002, volume = "", number = "", pages = "", note = "To appear" } @Article{ shachter88, author = "R. D. Shachter", title = "Probabilistic inference and influence diagrams", journal = "Operation Research", year = 1988, volume = "36", pages = "589-604" } @InProceedings{ shachter94, author = "R. D. Shachter and S. K. Anderson and P. Szolovits", title = "Global conditioning for probabilistic inference in belief networks", booktitle = "Proceedings of the Uncertainty in AI Conference", year = 1994, volume = "", publisher = "Morgan Kaufmann", address = "San Francisco, CA", pages = "514-522" } @InProceedings{ shahabi01, author = "Cyrus Shahabi and Farnoush Banaei-Kashani and Jabed Faruque", title = "A Framework for Efficient and Anonymous Web Usage Mining Based on Client-Side Tracking", booktitle = "Lecture Notes in Artificial Intelligence (no 2356): Proceedings of WEBKDD 2001", pages = "113-144", publisher = "Springer-Verlag", year = "2001" } @Article{ shannon48, author = "C. E. Shannon", title = "A mathematical theory of communication", journal = "Bell System Technical Journal", year = 1948, volume = "27", number = "", pages = "379-423, 623-656" } @InCollection{ shardanand95, author = "U. Shardanand and P. Maes", title = "Social information filtering: algorithms for automating `word of mouth'", booktitle = "Proceedings of the Conference on Human Factors in Computing Systems", volume = "", publisher = "", address = "", number = "", editor = "", pages = "210--217", year = 1995 } @InCollection{ shardanand95, author = "U. Shardanand and P. Maes", title = "Social information filtering: algorithms for automating `word of mouth'", booktitle = "Proceedings of the Conference on Human Factors in Computing Systems", volume = "", publisher = "", address = "", number = "", editor = "", pages = "210--217", year = 1995 } @Article{ shore80, author = "J. E. Shore and R. W. Johnson", title = "Axiomatic Derivation of the Principle of Maximum Entropy and the Principle of Minimum Cross-Entropy", journal = "IEEE Trans. Info. Theory", year = 1980, volume = "26", pages = "26-37 " } @Article{ silverstein98, author = "C. Silverstein and M. Henzinger and H. Marais and M. Moricz", title = "Analysis of a very large {A}lta{V}ista query log", journal = "", year = 1998, volume = "", number = "", pages = "", note = "Digital System Research Center, Technical Note 1998-14" } @Article{ silverstein98, author = "C. Silverstein and M. Henzinger and H. Marais and M. Moricz", title = "Analysis of a very large {A}lta{V}ista query log", journal = "", year = 1998, volume = "", number = "", pages = "", note = "Digital System Research Center, Technical Note 1998-14" } @Misc{ slonim-unsupervised, author = "Noam Slonim and Nir Friedman and Naftali Tishby", title = "Unsupervised Document Classification Using Sequential Information Maximization", url = "citeseer.nj.nec.com/slonim02unsupervised.html" } @InProceedings{ slonim-unsupervised, author = "Noam Slonim and Nir Friedman and Naftali Tishby", title = "Unsupervised Document Classification Using Sequential Information Maximization", booktitle = "Proceedings of the 25th International Conference on Research and Development in Information Retrieval (SIGIR)", pages = "208-215", publisher = "ACM Press", year = "2002" } @InProceedings{ slonim-unsupervised, author = "Noam Slonim and Nir Friedman and Naftali Tishby", title = "Unsupervised Document Classification Using Sequential Information Maximization", booktitle = "Proceedings of the 25th International Conference on Research and Development in Information Retrieval (SIGIR)", pages = "208-215", publisher = "ACM Press", year = "2002" } @InProceedings{ slonim00document, author = "Noam Slonim and Naftali Tishby", title = "Document clustering using word clusters via the information bottleneck method", booktitle = "Proceedings of the 23rd International Conference on Research and Development in Information Retrieval (SIGIR)", pages = "208-215", publisher = "ACM Press", year = "2000", url = "citeseer.nj.nec.com/slonim00document.html" } @InProceedings{ slonim00document, author = "Noam Slonim and Naftali Tishby", title = "Document clustering using word clusters via the information bottleneck method", booktitle = "Proceedings of the 23rd International Conference on Research and Development in Information Retrieval (SIGIR)", pages = "208-215", publisher = "ACM Press", year = "2000", url = "citeseer.nj.nec.com/slonim00document.html" } @Article{ smith91, author = "A. F. M. Smith", title = "Bayesian computational methods", journal = "Phil. Trans. R. Soc. London A", year = 1991, volume = "337", pages = "369-386" } @Article{ smith93, author = "A. F. Smith and G. O. Roberts", title = "Bayesian computation via the {G}ibbs sampler and related {M}arkov chain {M}onte {C}arlo methods", journal = "J. R. Statis. Soc.", year = 1993, volume = "55", pages = "3-23" } @Article{ smyth97, author = "P. Smyth and D. Heckerman and M. I. Jordan", title = "Probabilistic independence networks for hidden {M}arkov probability models", journal = "Neural Computation", year = 1997, volume = "9", pages = "227-267" } @Article{ smyth97, author = "P. Smyth and D. Heckerman and M. I. Jordan", title = "Probabilistic independence networks for hidden {M}arkov probability models", journal = "Neural Computation", year = 1997, volume = "9", pages = "227-267" } @Book{ smythdatamining01, title = "Principles of Data Mining", author = "D. Hand and H. Mannila and P. Smyth", year = 2001, publisher = "MIT Press", address = "Cambridge, MA", volume = "", number = "", pages = "" } @Article{ smythe95, author = "R. Smythe and H. Mahmound", title = "A survey of recursive trees ", journal = "Theoretical Probability and Mathematical Statistics", year = 1995, volume = "51", number = "", pages = " 1-27" } @Article{ soderland99, author = "Stephen Soderland", title = "Learning Information Extraction Rules for Semi-Structured and Free Text", journal = "Machine Learning", volume = "34", number = "1-3", pages = "233-272", year = "1999", url = "citeseer.nj.nec.com/soderland99learning.html" } @InCollection{ sollich96, author = "P. Sollich and A. Krogh", title = "Learning with ensembles: how over-fitting can be useful", booktitle = "Advances in Neural Information Processing Systems", volume = "8", note = "", editor = "D. S. Touretzky and M. C. Mozer and M. E. Hasselmo", publisher = "MIT Press", address = "Cambridge, MA", year = 1996 } @Article{ spencer, author = "J. Spencer and L. Sacks", title = "{IP} network topology and the impact of underlying transport networks", journal = "", year = "", volume = "", number = "", pages = "" } @Article{ sperduti97, author = "A. Sperduti and A. Starita", title = "Supervised neural networks for the classification of structures", journal = "IEEE Transactions on Neural Networks", year = 1997, volume = "8", number = "3", pages = "714-735" } @Article{ spiegelhalter93, author = "D. J. Spiegelhalter and A. P. Dawid and S. L. Lauritzen and R. G. Cowell", title = "Bayesian analysis in expert systems", journal = "Stat. Sci.", year = 1993, volume = "8", pages = "219-283" } @Article{ spink02, author = "A. Spink and B. J. Jansen and D. Wolfram and T. Saracevic", title = "From e-sex to e-commerce: {W}eb search changes", journal = "IEEE Computer", year = 2002, volume = "35", number = "3", pages = "107--109", note = "" } @Article{ spink02, author = "A. Spink and B. J. Jansen and D. Wolfram and T. Saracevic", title = "From e-sex to e-commerce: {W}eb search changes", journal = "IEEE Computer", year = 2002, volume = "35", number = "3", pages = "107--109", note = "" } @Article{ spitzer71, author = "F. Spitzer", title = "Markov random fields and {G}ibbs ensembles", journal = "Am. Math. Monthly", year = 1971, volume = "78", pages = "142-154" } @Book{ sself01, author = "P. Baldi", title = "The Shattered Self--the End of Natural Evolution", year = 2001, publisher = "The MIT Press", address = "Cambridge, MA" } @Book{ sself01, author = "P. Baldi", title = "The Shattered Self--the End of Natural Evolution", year = 2001, publisher = "The MIT Press", address = "Cambridge, MA" } @Article{ swendsen87, author = "R. H. Swendsen and J. S. Wang", title = "Nonuniversal critical dynamics in {M}onte {C}arlo simulations", journal = "Phys. Rev. Lett.", year = 1987, volume = "58", pages = "86-88" } @Article{ taga59, author = "Y. Taga and K. Isii", title = "On a stochastic model concerning the pattern of communication-diffusion of news in a social group", journal = "Annals of the Institute of Statistical Mathematics", year = 1959, volume = "11", number = "", pages = "25--43", note = "" } @Article{ taga59, author = "Y. Taga and K. Isii", title = "On a stochastic model concerning the pattern of communication-diffusion of news in a social group", journal = "Annals of the Institute of Statistical Mathematics", year = 1959, volume = "11", number = "", pages = "25--43", note = "" } @Article{ tan02, author = "P. Tan and V. Kumar", title = "Discovery of {W}eb robot sessions based on their navigational patterns", journal = "Data Mining and Knowledge Discovery", year = 2002, volume = "6", number = "", pages = "9--35", note = "" } @Article{ tan02, author = "P. Tan and V. Kumar", title = "Discovery of {W}eb robot sessions based on their navigational patterns", journal = "Data Mining and Knowledge Discovery", year = 2002, volume = "6", number = "", pages = "9--35", note = "" } @InProceedings{ tantrum02, author = "Jeremy Tantrum and Alejandro Murua and Werner Stuetzle", title = "Hierarchical model-based clustering of large datasets through fractionation and refractionation", booktitle = "Proceedings of the Eighth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining", publisher = "ACM Press", address = "New York", pages = " ", year = "2002" } @InProceedings{ tantrum02, author = "Jeremy Tantrum and Alejandro Murua and Werner Stuetzle", title = "Hierarchical model-based clustering of large datasets through fractionation and refractionation", booktitle = "Proceedings of the Eighth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining", publisher = "ACM Press", address = "New York", pages = " ", year = "2002" } @Article{ tarjan84, author = "R. E. Tarjan and M. Yannakakis", title = "Simple linear-time algorithms to test the chordality of graphs, test acyclicity of hypergraphs. and selectively reduce acyclic hypergraphs", journal = "SIAM Journal on Computing", year = 1984, volume = "13", number = "3", pages = "566-579" } @InProceedings{ taskar02, author = "B. Taskar and P. Abbeel and D. Koller", title = "Discriminative probabilistic models for relational data", booktitle = "Proceedings of the Eighteenth Conference on Uncertainty in Artificial Intelligence", volume = "", publisher = "Morgan Kaufmann", address = "San Francisco, CA", number = "", pages = " ", year = 2002 } @InProceedings{ taskar02, author = "B. Taskar and P. Abbeel and D. Koller", title = "Discriminative probabilistic models for relational data", booktitle = "Proceedings of the Eighteenth Conference on Uncertainty in Artificial Intelligence", volume = "", publisher = "Morgan Kaufmann", address = "San Francisco, CA", number = "", pages = " ", year = 2002 } @InCollection{ tauscher97, author = "L. Tauscher and S. Greenberg", title = "Revisitation patterns in {W}orld {W}ide {W}eb navigation", booktitle = "Proceedings of the Conference on Human Factors in Computing Systems {CHI}'97", volume = "", publisher = "ACM Press", address = "New York", number = "", pages = "97--137", year = 1997 } @InCollection{ tauscher97, author = "L. Tauscher and S. Greenberg", title = "Revisitation patterns in {W}orld {W}ide {W}eb navigation", booktitle = "Proceedings of the Conference on Human Factors in Computing Systems {CHI}'97", volume = "", publisher = "ACM Press", address = "New York", number = "", pages = "97--137", year = 1997 } @Article{ tedeschi00, author = "B. Tedeschi", title = "Easier to use sites would help e-tailers close more sales", journal = "New York Times", year = 2000, volume = "", number = "", pages = "", note = "June 12" } @Article{ tedeschi00, author = "B. Tedeschi", title = "Easier to use sites would help e-tailers close more sales", journal = "New York Times", year = 2000, volume = "", number = "", pages = "", note = "June 12" } @Article{ thodeberg96, author = "H. H. Thodeberg", title = "A review of {B}ayesian neural networks with an application to near infrared spectroscopy", journal = "IEEE Transactions on Neural Networks", year = 1996, volume = "7", number = "", pages = "56-72" } @Article{ tierney94, author = "L. Tierney", title = "Markov chains for exploring posterior distributions", journal = "Ann. Statis.", year = 1994, volume = "22", pages = "1701-1762" } @InCollection{ tishby01a, author = "N. Tishby and N. Slonim", title = "Data clustering by {M}arkovian relaxation and the information bottleneck method", booktitle = "Neural Information Processing Systems (NIPS 2000)", volume = "13", editor = "T. Leen and T. Dietterich and V. Tresp", publisher = "MIT Press", address = "Cambridge, MA", pages = "", year = 2001 } @InCollection{ tishby01b, author = "N. Slonim and N. Tishby", title = "The power of word clustering for text classification", booktitle = "Proceedings of the European Colloquium on IR Research, ECIR 2001", editor = "", publisher = "", address = "", pages = "", year = 2001 } @InCollection{ tishby99, author = "N. Tishby and F. Pereira and W. Bialek", title = "The information bottleneck method", booktitle = "Proceedings of the 37th Annual Allerton Conference on Communication, Control, and Computing", volume = "", editor = "B. Hajek and R. S. Sreenivas", publisher = "University of Illinois", address = "", year = 1999, pages = "368-377" } @Book{ titterington85, author = "D. M. Titterington and A. F. M. Smith and U. E. Makov", title = "Statistical Analysis of Finite Mixture Distributions", publisher = "John Wiley \& Sons", address = "New York", year = 1985, volume = "", note = "", pages = "" } @Article{ travers69, author = "J. Travers and S. Milgram", title = "An experimental study of the smal world problem", journal = "Sociometry", year = 1969, volume = "32", number = "", pages = "425" } @InCollection{ ungar98, author = "L. H. Ungar and D. P. Foster", title = "Clustering methods for collaborative filtering", booktitle = "Proceedings of the Workshop on Recommendation Systems at the Fifteenth National Conference on Artificial Intelligence", volume = "", publisher = "AAAI Press", address = "Menlo Park, CA", number = "", editor = "", pages = "", year = 1998 } @InCollection{ ungar98, author = "L. H. Ungar and D. P. Foster", title = "Clustering methods for collaborative filtering", booktitle = "Proceedings of the Workshop on Recommendation Systems at the Fifteenth National Conference on Artificial Intelligence", volume = "", publisher = "AAAI Press", address = "Menlo Park, CA", number = "", editor = "", pages = "", year = 1998 } @Book{ vapnik95, author = "V. Vapnik", title = "The Nature of Statistical Learning Theory", year = 1995, publisher = "Springer Verlag", address = "New York" } @Book{ vapnik95, author = "V. Vapnik", title = "The Nature of Statistical Learning Theory", year = 1995, publisher = "Springer Verlag", address = "New York" } @Article{ viterbi67, author = "A. J. Viterbi", title = "Error bounds for convolutional codes and an asymptotically optimum decoding algorithm", journal = "IEEE Transactions on Information Theory", year = 1967, volume = "IT-13", number = "", pages = "260-269" } @Book{ vonheijne87, author = "G. von Heijne", title = "Sequence analysis in molecular biology: treasure trove or trivial pursuit", year = 1987, publisher = "Academic Press", address = "San Diego, CA." } @Article{ watts02, author = "D. J. Watts and P. S. Dodds and M. E. J. Newman", title = "Identity and search in social networks", journal = "Science", year = 2002, volume = "296", number = "", pages = "1302-1305" } @Article{ watts98, author = {D. J. Watts and S. H. Strogatz}, title = {Collective dynamics of `small-world' networks}, type = {}, journal = {Nature}, volume = {393}, number = {}, pages = {440-442}, year = {1998} } @Book{ weigend94, title = "Time Series Prediction: Forecasting the Future and Understanding the Past", author = "A. S. Weigend and N. A. Gershenfeld", publisher = "Addison Wesley", address = "", year = 1994 } @Article{ weiss00, author = "Y. Weiss", title = "Correctness of local probability propagation in graphical models with loops", journal = "Neural Computation", year = 2000, volume = "12", number = "", pages = "1-41" } @Article{ white70, author = "H. White", title = "Search parameters for the small world problem", journal = "Social Forces", year = 1970, volume = "49", number = "", pages = "259" } @Book{ whittaker90, author = "J. Whittaker", title = "Graphical models in applied multivariate statistics", year = 1990, publisher = "John Wiley \& Sons", address = "New York" } @Article{ wiens99, author = "B. L. Wiens", title = "When log-normal and gamma models give different results: a case study", journal = "The American Statistician", year = 1999, volume = "53", number = "", pages = "89-93" } @InCollection{ williams96, author = "C. K. I. Williams and C. E. Rasmussen", title = "Gaussian processes for regression", booktitle = "Advances in Neural Information Processing Systems", volume = "8", note = "", editor = "D. S. Touretzky and M. C. Mozer and M. E. Hasselmo", publisher = "MIT Press", address = "Cambridge, MA", year = 1996 } @Article{ wolpert92, author = "D. Wolpert", title = "Stacked generalization", journal = "Neural Networks", year = 1992, volume = "5", number = "2", pages = "241-259" } @InCollection{ xie02, author = "Y. Xie and D. O'Hallaron", title = "Locality in search engine queries and its implications for caching", booktitle = "Proceedings of IEEE Infocom 2002", volume = "", publisher = "IEEE Press", address = "", number = "", pages = "1238--1247", year = 2002 } @InCollection{ xie02, author = "Y. Xie and D. O'Hallaron", title = "Locality in search engine queries and its implications for caching", booktitle = "Proceedings of IEEE Infocom 2002", volume = "", publisher = "IEEE Press", address = "", number = "", pages = "1238--1247", year = 2002 } @InCollection{ xu96, author = "L. Xu", title = "A unified learning scheme: {B}ayesian-{K}ullback {Y}ing-{Y}ang machine", booktitle = "Advances in Neural Information Processing Systems", volume = "8", note = "", editor = "D. S. Touretzky and M. C. Mozer and M. E. Hasselmo", publisher = "MIT Press", address = "Cambridge, MA", year = 1996 } @Article{ yedidia00, author = "J.S. Yedidia and W. T. Freeman and Y. Weiss", title = "Generalized belief propagation", journal = "Neural Computation", year = 2000, volume = "12", number = "", pages = "1-41" } @Article{ york92, author = "J. York", title = "Use of the {G}ibbs sampler in expert systems", journal = "Artif. Intell.", year = 1992, volume = "56", pages = "115-130" } @InProceedings{ zamir98web, author = "Oren Zamir and Oren Etzioni", title = "Web Document Clustering: A Feasibility Demonstration", booktitle = "Research and Development in Information Retrieval", pages = "46-54", year = "1998", url = "citeseer.nj.nec.com/zamir98web.html" } @InProceedings{ zamir98web, author = "Oren Zamir and Oren Etzioni", title = "Web Document Clustering: A Feasibility Demonstration", booktitle = "Proceedings of the 21st International Conference on Research and Development in Information Retrieval (SIGIR)", pages = "46-54", year = "1998", publisher = "ACM Press", url = "citeseer.nj.nec.com/zamir98web.html" } @InProceedings{ zamir98web, author = "O. Zamir and O. Etzioni", title = "Web Document Clustering: A Feasibility Demonstration", booktitle = "Proceedings of the 21st International Conference on Research and Development in Information Retrieval (SIGIR)", pages = "46-54", year = "1998", publisher = "ACM Press", url = "citeseer.nj.nec.com/zamir98web.html" } @Article{ zellner98, author = "A. Zellner", title = "Optimal information processing and {B}ayes's theorem", journal = "The American Statistician", year = 1998, volume = "42", number = "4", pages = "278-284" } @InCollection{ zhang02, author = "H. Zhang and A. Goel and R. Govindan", title = "Using the small-world model to improve {F}reenet performance", booktitle = "Proceedings of the IEEE Infocom Conference", volume = "", number = "", pages = "", year = 2002 } @Article{ zhang02, author = "T. Zhang and V. S. Iyengar", title = "Recommender systems using linear classifiers", journal = "Journal of Machine Learning Research", year = 2002, volume = "2", number = "", pages = "313--334", note = "" } @Article{ zhangiyengar02, author = "T. Zhang and V. S. Iyengar", title = "Recommender systems using linear classifiers", journal = "Journal of Machine Learning Research", year = 2002, volume = "2", number = "", pages = "313--334", note = "" } @InCollection{ zhu01, author = "X. Zhu and J. Yu and J. Doyle", title = "Heavy tails, generalized coding, and optimal {W}eb layout", booktitle = "Proceedings of the 2001 IEEE INFOCOM Conference", volume = "3", publisher = "IEEE", pages = "1617-1626", address = "", year = 2001 } @InCollection{ zoubin98, author = "Z. Ghahramani", title = "Learning Dynamic {B}ayesian Networks", optcrossref = "", optkey = "", booktitle = "Adaptive Processing of Sequence and Data Structures", publisher = "Springer", year = "1998", editor = "C. L. Giles and M. Gori", optvolume = "", optnumber = "", optseries = "", opttype = "", optchapter = "", pages = "168--197", optaddress = "", optedition = "", optmonth = "", optnote = "", optannote = "" } @InCollection{ zukerman99, author = "I. Zukerman and D. W. Albrecht and A. E. Nicholson", title = "Predicting user's requests on the {WWW}", booktitle = "UM99---Proceedings of the Seventh International Conference on User Modeling", volume = "", publisher = "Springer", address = "", number = "", editor = "", pages = "275--284", year = 1999 } @InCollection{ zukerman99, author = "I. Zukerman and D. W. Albrecht and A. E. Nicholson", title = "Predicting user's requests on the {WWW}", booktitle = "UM99---Proceedings of the Seventh International Conference on User Modeling", volume = "", publisher = "Springer", address = "", number = "", editor = "", pages = "275--284", year = 1999 }