Learning Term Spaces based on Visual Feedback (bibtex)
@InProceedings{Granitzer2006a, author = {Granitzer, Michael and Neidhart, T and Lux, Mathias}, booktitle = {Proceedings of the 17th International Conference on Database and Expert Systems Applications (DEXA'06)}, title = {Learning Term Spaces based on Visual Feedback}, year = {2006}, address = {Los Alamitos, CA, USA}, editor = {Tjoa, A Min and Wagner, R}, month = sep, pages = {176-180}, publisher = {IEEE Computer Society}, abstract = {Extracting and visualizing concepts and relationship between text documents strongly depends on the used similarity measure. In order to provide meaningful visualizations and to extract useful knowledge from document collections, user needs must be captured by the internal representation of documents, and the used similarity measure. In most applications the Vector Space Model and the Cosine similarity are used therefore and serve as good approximations. Nevertheless, influencing similarities between documents is rather hard, since parameter tuning relies heavily on expert knowledge of the underlying algorithms, and the influence of different weighting schemes and similarity measures is not known before. In this paper we present an approach on how to adapt the vector space representation of documents by giving visual feedback to the system. Our approach starts by clustering a corpus of text documents and visualizing the results using multi dimensional scaling techniques. Afterwards, a 2D landscape visualization is shown which can be manipulated by the user. Based on these manipulations the high dimensional representation of the documents is adapted to fit the users need more precisely. Our experiments show that iterating these steps results in an adapted representation of documents and similarities, generating layouts as intended by the user and furthermore increases clustering accuracy. While this paper only investigates the influence on clustering and visualization, the method itself may also be used for increasing classification and retrieval performance since it adapts to the users need of similarity.}, language = {EN}, talktype = {none} }
Powered by bibtexbrowser (with ITEC extensions)