% Supavadee Aramvith % Encoding: utf-8 @Book{Schoeffmann2018b, title = {MultiMedia Modeling - 24th International Conference, MMM 2018 (Part 2)}, editor = {Klaus Schöffmann and Thanarat H. Chalidabhongse and Chong-Wah Ngo and Supavadee Aramvith and Noel E. O´Connor and Yo-Sung Ho and Moncef Gabbouj and Ahmed Elgammal}, publisher = {Springer}, year = {2018}, month = {Januar}, volume = {10705}, doi = {10.1007/978-3-319-73600-6}, url = {https://www.springer.com/de/book/9783319735993} } @Book{Schoeffmann2018a, title = {MultiMedia Modeling - 24th International Conference, MMM 2018 (Part 1)}, editor = {Klaus Schöffmann and Thanarat H. Chalidabhongse and Chong-Wah Ngo and Noel E. O´Connor and Supavadee Aramvith and Yo-Sung Ho and Moncef Gabbouj and Ahmed Elgammal}, publisher = {Springer}, year = {2018}, month = {Januar}, volume = {10704}, doi = {10.1007/978-3-319-73603-7}, url = {https://www.springer.com/de/book/9783319736020} } @InProceedings{Primus2018a, title = {The ITEC Collaborative Video Search System at the Video Browser Showdown 2018}, author = {Manfred Jürgen Primus and Bernd Münzer and Andreas Leibetseder and Klaus Schöffmann}, booktitle = {MultiMedia Modeling - 24th International Conference, MMM 2018 (Part 2)}, year = {2018}, address = {Berlin}, editor = {Klaus Schöffmann and Thanarat H. Chalidabhongse and Chong-Wah Ngo and Supavadee Aramvith and Noel E. O´Connor and Yo-Sung Ho and Moncef Gabbouj and Ahmed Elgammal}, month = {Januar}, pages = {438--443}, publisher = {Springer}, series = {LNCS}, volume = {10705}, abstract = {We present our video search system for the Video Browser Showdown (VBS) 2018 competition. It is based on the collaborative system used in 2017, which already performed well but also revealed high potential for improvement. Hence, based on our experience we introduce several major improvements, particularly (1) a strong optimization of similarity search, (2) various improvements for concept-based search, (3) a new flexible video inspector view, and (4) extended collaboration features, as well as numerous minor adjustments and enhancements, mainly concerning the user interface and means of user interaction. Moreover, we present a spectator view that visualizes the current activity of the team members to the audience to make the competition more attractive.}, doi = {10.1007/978-3-319-73600-6_47}, url = {https://link.springer.com/chapter/10.1007/978-3-319-73600-6_47} } @InProceedings{Primus2018, title = {Frame-Based Classification of Operation Phases in Cataract Surgery Videos}, author = {Manfred Jürgen Primus and Doris Putzgruber-Adamitsch and Mario Taschwer and Bernd Münzer and Yosuf El-Shabrawi and Laszlo Böszörmenyi and Klaus Schöffmann}, booktitle = {MultiMedia Modeling - 24th International Conference, MMM 2018 (Part 1)}, year = {2018}, address = {Berlin}, editor = {Klaus Schöffmann and Thanarat H. Chalidabhongse and Chong-Wah Ngo and Noel E. O´Connor and Supavadee Aramvith and Yo-Sung Ho and Moncef Gabbouj and Ahmed Elgammal}, month = {Januar}, pages = {241--253}, publisher = {Springer}, series = {LNCS}, volume = {10704}, abstract = {Cataract surgeries are frequently performed to correct a lens opacification of the human eye, which usually appears in the course of aging. These surgeries are conducted with the help of a microscope and are typically recorded on video for later inspection and educational purposes. However, post-hoc visual analysis of video recordings is cumbersome and time-consuming for surgeons if there is no navigation support, such as bookmarks to specific operation phases. To prepare the way for an automatic detection of operation phases in cataract surgery videos, we investigate the effectiveness of a deep convolutional neural network (CNN) to automatically assign video frames to operation phases, which can be regarded as a single-label multi-class classification problem. In absence of public datasets of cataract surgery videos, we provide a dataset of 21 videos of standardized cataract surgeries and use it to train and evaluate our CNN classifier. Experimental results display a mean F1-score of about 68% for frame-based operation phase classification, which can be further improved to 75% when considering temporal information of video frames in the CNN architecture.}, doi = {10.1007/978-3-319-73603-7_20}, url = {https://link.springer.com/chapter/10.1007/978-3-319-73603-7_20} } @InProceedings{Muenzer2018, title = {Video Browsing on a Circular Timeline}, author = {Bernd Münzer and Klaus Schöffmann}, booktitle = {MultiMedia Modeling - 24th International Conference, MMM 2018 (Part 2)}, year = {2018}, address = {Berlin}, editor = {Klaus Schöffmann and Thanarat H. Chalidabhongse and Chong-Wah Ngo and Supavadee Aramvith and Noel E. O´Connor and Yo-Sung Ho and Moncef Gabbouj and Ahmed Elgammal}, month = {Januar}, pages = {395--399}, publisher = {Springer}, series = {LNCS}, volume = {10705}, abstract = {The emerging ubiquity of videos in all aspects of society demands for innovative and efficient browsing and navigation mechanisms. We propose a novel visualization and interaction paradigm that replaces the traditional linear timeline with a circular timeline. The main advantages of this new concept are (1) significantly increased and dynamic navigation granularity, (2) minimized spacial distances between arbitrary points on the timeline, as well as (3) the possibility to efficiently utilize the screen space for bookmarks or other supplemental information associated with points of interest. The demonstrated prototype implementation proves the expedience of this new concept and includes additional navigation and visualization mechanisms, which altogether create a powerful video browser.}, doi = {10.1007/978-3-319-73600-6_40}, url = {https://link.springer.com/chapter/10.1007/978-3-319-73600-6_40} } @InProceedings{Leibetseder2018a, title = {Sketch-Based Similarity Search for Collaborative Feature Maps}, author = {Andreas Leibetseder and Sabrina Kletz and Klaus Schöffmann}, booktitle = {MultiMedia Modeling - 24th International Conference, MMM 2018 (Part 2)}, year = {2018}, address = {Berlin}, editor = {Klaus Schöffmann and Thanarat H. Chalidabhongse and Chong-Wah Ngo and Supavadee Aramvith and Noel E. O´Connor and Yo-Sung Ho and Moncef Gabbouj and Ahmed Elgammal}, month = {Januar}, pages = {425--430}, publisher = {Springer}, series = {LNCS}, volume = {10705}, abstract = {Past editions of the annual Video Browser Showdown (VBS) event have brought forward many tools targeting a diverse amount of techniques for interactive video search, among which sketch-based search showed promising results. Aiming at exploring this direction further, we present a custom approach for tackling the problem of finding similarities in the TRECVID IACC.3 dataset via hand-drawn pictures using color compositions together with contour matching. The proposed methodology is integrated into the established Collaborative Feature Maps (CFM) system, which has first been utilized in the VBS 2017 challenge.}, doi = {10.1007/978-3-319-73600-6_45}, url = {https://link.springer.com/chapter/10.1007/978-3-319-73600-6_45} } @InProceedings{Leibetseder2018, title = {Automatic Smoke Classification in Endoscopic Video}, author = {Andreas Leibetseder and Manfred Jürgen Primus and Klaus Schöffmann}, booktitle = {MultiMedia Modeling - 24th International Conference, MMM 2018 (Part 2)}, year = {2018}, address = {Berlin}, editor = {Klaus Schöffmann and Thanarat H. Chalidabhongse and Chong-Wah Ngo and Supavadee Aramvith and Noel E. O´Connor and Yo-Sung Ho and Moncef Gabbouj and Ahmed Elgammal}, month = {Januar}, pages = {362--366}, publisher = {Springer}, series = {LNCS}, volume = {10705}, abstract = {Medical smoke evacuation systems enable proper, filtered removal of toxic fumes during surgery, while stabilizing internal pressure during endoscopic interventions. Typically activated manually, they, however, are prone to inefficient utilization: tardy activation enables smoke to interfere with ongoing surgeries and late deactivation wastes precious resources. In order to address such issues, in this work we demonstrate a vision-based tool indicating endoscopic smoke – a first step towards automatic activation of said systems and avoiding human misconduct. In the back-end we employ a pre-trained convolutional neural network (CNN) model for distinguishing images containing smoke from others.}, doi = {10.1007/978-3-319-73600-6_33}, url = {https://link.springer.com/chapter/10.1007/978-3-319-73600-6_33} } @InProceedings{Kletz2018, title = {Evaluation of Visual Content Descriptors for Supporting Ad-Hoc Video Search Tasks at the Video Browser Showdown}, author = {Sabrina Kletz and Andreas Leibetseder and Klaus Schöffmann}, booktitle = {MultiMedia Modeling - 24th International Conference, MMM 2018 (Part 1)}, year = {2018}, address = {Berlin}, editor = {Klaus Schöffmann and Thanarat H. Chalidabhongse and Chong-Wah Ngo and Noel E. O´Connor and Supavadee Aramvith and Yo-Sung Ho and Moncef Gabbouj and Ahmed Elgammal}, month = {Januar}, pages = {203--215}, publisher = {Springer}, series = {LNCS}, volume = {10704}, abstract = {Since 2017 the Video Browser Showdown (VBS) collaborates with TRECVID and interactively evaluates Ad-Hoc Video Search (AVS) tasks, in addition to Known-Item Search (KIS) tasks. In this video search competition the participants have to find relevant target scenes to a given textual query within a specific time limit, in a large dataset consisting of 600 h of video content. Since usually the number of relevant scenes for such an AVS query is rather high, the teams at the VBS 2017 could find only a small portion of them. One way to support them at the interactive search would be to automatically retrieve other similar instances of an already found target scene. However, it is unclear which content descriptors should be used for such an automatic video content search, using a query-by-example approach. Therefore, in this paper we investigate several different visual content descriptors (CNN Features, CEDD, COMO, HOG, Feature Signatures and HOF) for the purpose of similarity search in the TRECVID IACC.3 dataset, used for the VBS. Our evaluation shows that there is no single descriptor that works best for every AVS query, however, when considering the total performance over all 30 AVS tasks of TRECVID 2016, CNN features provide the best performance.}, doi = {10.1007/978-3-319-73603-7_17}, url = {https://link.springer.com/chapter/10.1007/978-3-319-73603-7_17} }