% Year: 2017 % Encoding: utf-8 @InProceedings{mluxmartina2017, author = {Pogorelov, Konstantin and Riegler, Michael and Halvorsen, Pal and Griwodz, Carsten and de Lange, Thomas and Randel, Kristin and Eskeland, Sigrun and Dang-Ngyuen, Duc-Tien and Ostroukhova, Olga and Lux, Mathias and Spampinato, Concetto}, booktitle = {Working Notes Proceedings of the MediaEval 2017 Workshop}, title = {A Comparison of Deep Learning with Global Features for Gastrointestinal Disease Detection}, year = {2017}, address = {Dublin, Ireland}, editor = {Gravier, Guillaume and Bischke, Benjamin and Demarty, Claire-Hélène and Zaharieva, Maia and Riegler, Michael and Dellandrea, Emmanuel and Bogdanov, Dmitry and Sutcliffe, Richard and Jones, Gareth and Larson, Martha}, month = {sep}, pages = {3}, publisher = {CEUR Workshop Proceedings}, abstract = {This paper presents our approach for the 2017 Multimedia for Medicine Medico Task of the MediaEval 2017 Benchmark. We propose a system based on global features and deep neural networks, and preliminary results comparing the approaches are presented.}, language = {EN}, location = {Dublin, Ireland}, talkdate = {2017.09.14}, talktype = {registered}, url = {http://ceur-ws.org/Vol-1984/} } @InProceedings{mlux2017, author = {Riegler, Michael and Pogorelov, Konstantin and Halvorsen, Pal and Randel, Kristin and Eskeland, Sigrun and Dang-Nguyen, Duc-Tien and Lux, Mathias and Griwodz, Carsten and Spampinato, Concetto and de Lange, Thomas}, booktitle = {Working Notes Proceedings of the MediaEval 2017 Workshop}, title = {Multimedia for Medicine: The Medico Task at MediaEval 2017}, year = {2017}, address = {Dublin, Ireland}, editor = {Gravier, Guillaume and Bischke, Benjamin and Demarty, Claire-Hélène and Zaharieva, Maia and Riegler, Michael and Dellandrea, Emmanuel and Bogdanov, Dmitry and Sutcliffe, Richard and Jones, Gareth and Larson, Martha}, month = {jan}, pages = {3}, publisher = {CEUR Workshop Proceedings}, abstract = {The Multimedia for Medicine Medico Task, running for the first time as part of MediaEval 2017, focuses on detecting abnormalities, diseases and anatomical landmarks in images captured by medical devices in the gastrointestinal tract. The task characteristics are described, including the use case and its challenges, the dataset with ground truth, the required participant runs and the evaluation metrics.}, language = {EN}, location = {Dublin, Ireland}, talkdate = {2017.09.14}, talktype = {registered} } @InProceedings{martinadez2017, author = {Beck, Harald and Bierbaumer, Bruno and Dao-Tran, Minh and Eiter, Thomas and Hellwagner, Hermann and Schekotihin, Konstantin}, booktitle = {Communications (ICC), 2017 IEEE International Conference on}, title = {Stream Reasoning-Based Control of Caching Strategies in CCN Routers}, year = {2017}, address = {Paris, France}, editor = {Beylat, Jean Luc and Sari, Hikmet}, month = {may}, pages = {6}, publisher = {IEEE}, abstract = {Routers in Content-Centric Networking (CCN) may locally cache frequently requested content in order to speed up delivery to end users. Thus, the issue of caching strategies arises, i.e., which content shall be stored and when it should be replaced. In this work, we employ, and study the feasibility of, novel techniques towards intelligent control of CCN routers that autonomously switch between existing caching strategies in response to changing content request patterns. In particular, we present a router architecture for CCN networks that is controlled by rule-based stream reasoning, following the recent formal framework LARS which extends Answer Set Programming for streams. The obtained possibility for flexible router configuration at runtime allows for versatile network control schemes and may help advance the further development of CCN. Moreover, the empirical evaluation of our feasibility study shows that the resulting caching agent may give significant performance gains.}, doi = {10.1109/ICC.2017.7996762}, isbn10 = {978-1-4673-8999-0}, issn = {1938-1883}, keywords = {Cognition, Internet, Switches, Next generation networking, Programming, Computer architecture, Robots}, language = {EN}, location = {Paris}, talkdate = {2017.05.23}, talktype = {registered} } @InProceedings{martina2017, author = {Lux, Mathias and Riegler, Michael and Macstravic, Glenn}, booktitle = {ICMR '17 Proceedings of the 2017 ACM on International Conference on Multimedia Retrieval}, title = {LireSolr: A Visual Information Retrieval Server}, year = {2017}, address = {New Yor New York, USA}, editor = {Sebe, Nicu and Ionescu, Bogdan}, month = {jun}, pages = {3}, publisher = {ACM}, abstract = {In this paper, we present LireSolr, an open source image retrieval server, build on top of the LIRE library and the Apache Solr search server. With LireSolr, visual information retrieval can be run on a server, which allows better distribution of workloads and simplifies applications in several areas including mobile and web. Furthermore, we showcase several example scenarios how LireSolr can be used to point out the broad range of possibilities and applications. The system is easy to install and setup, and the large number of retrieval tools either provided by LIRE or by other Apache Solr is made easily available on the search server. Moreover, our tool demonstrates how predictions from CNNs can easily be used to extend the visual information retrieval functionality.}, doi = {10.1145/3078971.3079014}, isbn10 = {978-1-4503-4701-3}, language = {EN}, location = {Bukarest, Rumänien}, talkdate = {2017.06.08}, talktype = {poster}, url = {https://dl.acm.org/citation.cfm?id=3079014} } @Article{Zabrovskiy2017a, author = {Zabrovskiy, Anatoliy and Petrov, Evgeny and Kuzmin, Evgeny and Timmerer, Christian}, journal = {arXiv.org [cs.MM]}, title = {Evaluation of the Performance of Adaptive {HTTP} Streaming Systems}, year = {2017}, month = {oct}, pages = {7}, volume = {abs/1710.02459}, abstract = {Adaptive video streaming over HTTP is becoming omnipresent in our daily life. In the past, dozens of research papers have proposed novel approaches to address different aspects of adaptive streaming and a decent amount of player implementations (commercial and open source) are available. However, state of the art evaluations are sometimes superficial as many proposals only investigate a certain aspect of the problem or focus on a specific platform – player implementations used in actual services are rarely considered. HTML5 is now available on many platforms and foster the deployment of adaptive media streaming applications. We propose a common evaluation framework for adaptive HTML5 players and demonstrate its applicability by evaluating eight different players which are actually deployed in real-world services.}, address = {N.N.}, language = {EN}, pdf = {https://www.itec.aau.at/bib/files/1710.02459.pdf}, publisher = {N.N.}, url = {http://arxiv.org/abs/1710.02459} } @InProceedings{Zabrovskiy2017, author = {Zabrovskiy, Anatoliy and Kuzmin, Evgeny and Petrov, Evgeny and Timmerer, Christian and Mueller, Christopher}, booktitle = {Proceedings of the 8th ACM on Multimedia Systems Conference (MMSys'17)}, title = {AdViSE: Adaptive Video Streaming Evaluation Framework for the Automated Testing of Media Players}, year = {2017}, address = {New York, NY, USA}, editor = {Chen, Kuan-Ta}, month = {jun}, pages = {4}, publisher = {ACM}, abstract = {Today we can observe a plethora of adaptive video stream- ing services and media players which support interoperable formats like DASH and HLS. Most of the players and their rate adaptation algorithms work as a black box. We have de- veloped a system for easy and rapid testing of media players under various network scenarios. In this paper, we introduce AdViSE, the Adaptive Video Streaming Evaluation frame- work for the automated testing of adaptive media players. The presented framework is used for the comparison and testing of media players in the context of adaptive video streaming over HTTP in web/HTML5 environments. The demonstration showcases a series of experiments with different media players under given context conditions (e.g., network shaping, delivery format). We will also demonstrate the real-time capabilities of the framework and offline anal- ysis including several QoE metrics with respect to a newly introduced bandwidth index.}, doi = {10.1145/3083187.3083221}, isbn10 = {978-1-4503-5002-0}, language = {EN}, location = {Taipei, Taiwan}, pdf = {https://www.itec.aau.at/bib/files/Demo_Paper_Camera_Ready.pdf}, talkdate = {2017.06.21}, talktype = {poster} } @Article{Timmerer2017i, author = {Timmerer, Christian}, journal = {IEEE Communications Standards Magazine}, title = {Immersive Media Delivery: Overview of Ongoing Standardization Activities}, year = {2017}, issn = {2471-2825}, month = {dec}, number = {4}, pages = {71--74}, volume = {1}, abstract = {More and more immersive media applications and services are emerging on the market, but lack international standards to enable interoperability. This article provides an overview about ongoing standardization efforts in this exciting domain and highlights open research and standardization issues.}, address = {N.N.}, doi = {10.1109/MCOMSTD.2017.1700038}, language = {EN}, pdf = {https://www.itec.aau.at/bib/files/08258607.pdf}, publisher = {IEEE Communications Society} } @Article{Timmerer2017h, author = {Timmerer, Christian and Begen, Ali Cengiz}, journal = {Computing Now}, title = {Advancing Multimedia Content Distribution}, year = {2017}, month = {dec}, pages = {1}, address = {Los Alamitos, CA, USA}, language = {EN}, publisher = {IEEE Computer Society [online]}, url = {https://www.computer.org/web/computingnow/archive/advancing-multimedia-content-distribution-december-2017-introduction} } @Article{Timmerer2017g, author = {Timmerer, Christian}, journal = {SIGMultimedia Records}, title = {MPEG Column: 116th MPEG Meeting}, year = {2017}, issn = {1947-4598}, month = {jul}, number = {4}, pages = {N.N.}, volume = {8}, address = {New York, NY, USA}, doi = {10.1145/3129151.3129152}, language = {EN}, publisher = {ACM}, url = {http://doi.acm.org/10.1145/3129151.3129152} } @Article{Timmerer2017f, author = {Timmerer, Christian}, journal = {SIGMultimedia Records}, title = {MPEG Column: 117th MPEG Meeting}, year = {2017}, issn = {1947-4598}, month = {jul}, number = {1}, pages = {N.N.}, volume = {9}, address = {New York, NY, USA}, doi = {10.1145/3129151.3129153}, language = {EN}, publisher = {ACM}, url = {http://doi.acm.org/10.1145/3129151.3129153} } @Article{Timmerer2017e, author = {Timmerer, Christian}, journal = {SIGMultimedia Records}, title = {MPEG Column: 118th MPEG Meeting}, year = {2017}, issn = {1947-4598}, month = {oct}, number = {4}, pages = {N.N.}, volume = {8}, address = {New York, NY, USA}, doi = {10.1145/3149647.3149656}, language = {EN}, publisher = {ACM}, url = {http://doi.acm.org/10.1145/3149647.3149656} } @Article{Timmerer2017d, author = {Timmerer, Christian}, journal = {SIGMultimedia Records}, title = {MPEG Column: 119th MPEG Meeting in Turin, Italy}, year = {2017}, issn = {1947-4598}, month = {dec}, number = {2}, pages = {N.N.}, volume = {9}, address = {New York, NY, USA}, doi = {10.1145/3173058.3173061}, language = {EN}, publisher = {ACM}, url = {http://doi.acm.org/10.1145/3173058.3173061} } @Article{Timmerer2017c, author = {Timmerer, Christian}, journal = {SIGMultimedia Records}, title = {Report from ACM MMSys 2017}, year = {2017}, issn = {1947-4598}, month = {dec}, number = {2}, pages = {N.N.}, volume = {9}, address = {New York, NY, USA}, doi = {10.1145/3173058.3173068}, language = {EN}, publisher = {ACM}, url = {http://doi.acm.org/10.1145/3173058.3173068} } @InProceedings{Timmerer2017b, author = {Timmerer, Christian and Zabrovskiy, Anatoliy and Kuzmin, Evgeny and Petrov, Evgeny}, booktitle = {2017 21st Conference of Open Innovations Association (FRUCT)}, title = {Quality of experience of commercially deployed adaptive media players}, year = {2017}, address = {N.N.}, editor = {Balandin, Sergey}, month = {nov}, pages = {330-335}, publisher = {N.N.}, abstract = {In the past decade we observed the transition from push-based, fully managed media streaming to pull-based, unmanaged adaptive HTTP streaming thanks to enhancements in media compression, network capacity, and client capabilities. Adaptive media players, specifically their algorithms, have been subject to research for a long time and lead to various approaches documented in the literature. In the past years we witnessed more and more commercial deployments taking into account findings presented in scientific papers but a quantitative evaluation and assessments of its performance is missing. In this paper, we propose means for the automated performance evaluation of commercially deployed adaptive media players with respect to i) objective, well-known metrics, such as bitrate, stalls, startup delay and ii) derived/calculated metrics (instability, inefficiency, average bitrate) previously proposed in the literature. Additionally, we propose a new metric (Bandwidth index) to measure the effectiveness of bandwidth utilization and together with existing QoE models for adaptive HTTP streaming (focusing on stalls, startup delay) we demonstrate its usefulness in this domain.}, doi = {10.23919/FRUCT.2017.8250200}, issn = {ISSN 2305-7254}, language = {EN}, location = {Helsinki, Finland}, pdf = {https://www.itec.aau.at/bib/files/08250200.pdf}, talkdate = {2017.11.10}, talktype = {registered} } @Article{Timmerer2017a, author = {Timmerer, Christian and Begen, Ali Cengiz}, journal = {ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM)}, title = {Best Papers of the 2016 ACM Multimedia Systems (MMSys) Conference and Workshop on Network and Operating System Support for Digital Audio and Video (NOSSDAV) 2016}, year = {2017}, month = {jun}, number = {3s}, pages = {40:1--40:2}, volume = {13}, address = {New York, NY, USA}, doi = {10.1145/3084539}, language = {EN}, pdf = {https://www.itec.aau.at/bib/files/a40-timmerer.pdf}, publisher = {ACM Digital Library}, url = {http://doi.acm.org/10.1145/3084539} } @InProceedings{Timmerer2017_nab, author = {Timmerer, Christian and Graf, Mario and Mueller, Christopher}, booktitle = {2018 NAB Broadcast Engineering and IT Conference (BEITC)}, title = {Adaptive Streaming of VR/360-degree Immersive Media Services with high QoE}, year = {2017}, address = {Washington DC, USA}, editor = {available, not}, month = {apr}, pages = {5}, publisher = {National Association of Broadcasters (NAB)}, language = {EN}, pdf = {https://www.itec.aau.at/bib/files/TimmererC012317.pdf}, talktype = {none} } @PhdThesis{Taschwer2017, author = {Taschwer, Mario}, school = {Alpen-Adria-Universit{\"a}t Klagenfurt}, title = {Concept-Based and Multimodal Methods for Medical Case Retrieval}, year = {2017}, address = {Austria}, month = {mar}, abstract = {Medical case retrieval (MCR) is defined as a multimedia retrieval problem, where the document collection consists of medical case descriptions that pertain to particular diseases, patients' histories, or other entities of biomedical knowledge. Case descriptions are multimedia documents containing textual and visual modalities (images). A query may consist of a textual description of patient's symptoms and related diagnostic images. This thesis proposes and evaluates methods that aim at improving MCR effectiveness over the baseline of fulltext retrieval. We hypothesize that this objective can be achieved by utilizing controlled vocabularies of biomedical concepts for query expansion and concept-based retrieval. The latter represents case descriptions and queries as vectors of biomedical concepts, which may be generated automatically from textual and/or visual modalities by concept mapping algorithms. We propose a multimodal retrieval framework for MCR by late fusion of text-based retrieval (including query expansion) and concept-based retrieval and show that retrieval effectiveness can be improved by 49% using linear fusion of practical component retrieval systems. The potential of further improvement is experimentally estimated as a 166% increase of effectiveness over fulltext retrieval using query-adaptive fusion of ideal component retrieval systems. Additional contributions of this thesis include the proposal and comparative evaluation of methods for concept mapping, query and document expansion, and automatic classification and separation of compound figures found in case descriptions.}, language = {EN}, pages = {200}, pdf = {https://www.itec.aau.at/bib/files/phd-thesis-taschwer.pdf} } @InProceedings{SchoeffmannTutorialACMMM2017, author = {Schoeffmann,Klaus and Münzer,Bernd and Riegler,Michael and Halvorsen,Paal}, booktitle = {MM ’17 Proceedings of the 2017 ACM on Multimedia Conference}, title = {Medical Multimedia Information Systems (MMIS)}, year = {2017}, address = {New York, NY, USA}, editor = {Liu, Qiong and Lienhart, Rainer and Wang, Haohong}, month = {oct}, pages = {1957-1958}, publisher = {ACM}, abstract = {In hospitals all around the world, medical multimedia information systems have gained high importance over the last few years. One of the reasons is that an increasing number of interventions are performed in a minimally invasive way. These endoscopic inspections and surgeries are performed with a tiny camera -- the endoscope -- which produces a video signal that is used to control the intervention. Apart from the viewing purpose, the video signal is also used for automatic content analysis during the intervention as well as for post-surgical usage, such as communicating operation techniques, planning future interventions, and medical forensics. Another reason is video documentation, which is even enforced by law in some countries. The problem, however, is the sheer amount of unstructured medical videos that are added to the multimedia archive on a daily basis. Without proper management and a multimedia information system, the medical videos cannot be used efficiently for post-surgical scenarios. It is therefore already foreseeable that medical multimedia information systems will gain even more attraction in the next few years. In this tutorial we will introduce the audience to this challenging new field, describe the domain-specific characteristics and challenges of medical multimedia data, introduce related use cases, and talk about existing works -- contributed by the medical imaging and robotics community, but also already partly from the multimedia community -- as well as the many open issues and challenges that bear high research potential.}, doi = {10.1145/3123266.3130142}, isbn10 = {978-1-4503-4906-2}, keywords = {endoscopic video, medical image processing, medical multimedia}, language = {EN}, location = {Mountain View, CA}, talkdate = {2017.10.27}, talktype = {registered}, url = {https://dl.acm.org/citation.cfm?id=3130142} } @Article{Schoeffmann2017MTAPHusslein, author = {Schoeffmann, Klaus and Husslein, Heinrich and Kletz, Sabrina and Petscharnig, Stefan and Münzer, Bernd and Beecks, Christian}, journal = {Multimedia Tools and Applications}, title = {Video Retrieval in Laparoscopic Video Recordings with Dynamic Content Descriptors}, year = {2017}, month = {nov}, pages = {18}, address = {USA}, language = {EN}, publisher = {Springer US} } @InProceedings{Schoeffmann2017MMM, author = {Schoeffmann, Klaus and Primus, Manfred Jürgen and Muenzer, Bernd and Petscharnig, Stefan and Karisch, Christoph and Xu, Qing and Huerst, Wolfgang}, booktitle = {MultiMedia Modeling: 23rd International Conference, MMM 2017, Reykjavik, Iceland, January 4-6, 2017, Proceedings, Part II}, title = {Collaborative Feature Maps for Interactive Video Search}, year = {2017}, address = {Cham}, editor = {Amsaleg, Laurent and Guðmundsson, Gylfi Þór and Gurrin, Cathal and Jónsson, Björn Þór and Satoh, Shin’ichi}, month = {jan}, pages = {457-462}, publisher = {Springer International Publishing}, abstract = {This extended demo paper summarizes our interface used for the Video Browser Showdown (VBS) 2017 competition, where visual and textual known-item search (KIS) tasks, as well as ad-hoc video search (AVS) tasks in a 600-h video archive need to be solved interactively. To this end, we propose a very flexible distributed video search system that combines many ideas of related work in a novel and collaborative way, such that several users can work together and explore the video archive in a complementary manner. The main interface is a perspective Feature Map, which shows keyframes of shots arranged according to a selected content similarity feature (e.g., color, motion, semantic concepts, etc.). This Feature Map is accompanied by additional views, which allow users to search and filter according to a particular content feature. For collaboration of several users we provide a cooperative heatmap that shows a synchronized view of inspection actions of all users. Moreover, we use collaborative re-ranking of shots (in specific views) based on retrieved results of other users.}, doi = {10.1007/978-3-319-51814-5_41}, language = {EN}, location = {Reykjavik, Iceland}, talkdate = {2017.01.04}, talktype = {registered}, url = {https://link.springer.com/chapter/10.1007/978-3-319-51814-5_41#copyrightInformation} } @InProceedings{Schoeffmann2017CHIIR, author = {Hopfgartner,Frank and Schoeffmann,Klaus}, booktitle = {Proceedings of the 2017 Conference on Conference Human Information Interaction and Retrieval (CHIIR'17)}, title = {Interactive Search in Video \& Lifelogging Repositories}, year = {2017}, address = {New York, NY, USA}, editor = {Nordlie, ragnar and Pharo, Nils}, month = {mar}, pages = {421-423}, publisher = {ACM}, abstract = {Due to increasing possibilities to create digital video, we are facing the emergence of large video archives that are made accessible either online or offline. Though a lot of research has been spent on video retrieval tools and methods, which allow for automatic search in videos, still the performance of automatic video retrieval is far from optimal. At the same time, the organization of personal data is receiving increasing research attention due to the challenges that are faced in gathering, enriching, searching and visualizing this data. Given the increasing quantities of personal data being gathered by individuals, the concept of a heterogeneous personal digital libraries of rich multimedia and sensory content for every individual is becoming a reality. Despite the differences between video archives and personal lifelogging libraries, we are facing very similar challenges when accessing these multimedia repositories. For example, users will struggle to find the information they are looking for in either collection if they are not able to formulate their search needs through a query. In this tutorial we discussed (i) proposed solutions for improved video & lifelog content navigation, (ii) typical interaction of content-based querying features, and (iii) advanced content visualization methods. Moreover, we discussed and demonstrate interactive video & lifelog search systems and ways to evaluate their performance.}, doi = {10.1145/3020165.3022161}, isbn10 = {978-1-4503-4677-1}, language = {EN}, location = {Oslo}, talkdate = {2017.03.07}, talktype = {registered}, url = {https://dl.acm.org/citation.cfm?id=3022161} } @InProceedings{Schatz2017, author = {Schatz, Raimund and Sackl, Andreas and Timmerer, Christian and Gardlo, Bruno}, booktitle = {2017 Ninth International Conference on Quality of Multimedia Experience (QoMEX)}, title = {Towards Subjective Quality of Experience Assessment for Omnidirectional Video Streaming}, year = {2017}, address = {New York, USA}, editor = {Raake, Alexander}, month = {jun}, pages = {6}, publisher = {IEEE}, abstract = {Currently, we witness dramatically increasing interest in immersive media technologies like Virtual Reality (VR), particularly in omnidirectional video (OV) streaming. Omnidirectional (also called 360-degree) videos are panoramic spherical videos in which the user can look around during playback and which therefore can be understood as hybrids between traditional movie streaming and interactive VR worlds. Unfortunately, streaming this kind of content is extremely bandwidth intensive (compared to traditional 2D video) and therefore, Quality of Experience (QoE) tends to deteriorate significantly in absence of continuous optimal bandwidth conditions. In this paper, we present a first approach towards subjective QoE assessment for omnidirectional video (OV) streaming. We present the results of a lab study on the QoE impact of stalling in the context of OV streaming using head-mounted displays (HMDs). Our findings show that subjective testing for immersive media like OV is not trivial, with even simple cases like stalling leading to unexpected results. After a discussion of characteristic pitfalls and lessons learned, we provide a a set of recommendations for upcoming OV assessment studies.}, doi = {10.1109/QoMEX.2017.7965657}, isbn10 = {978-1-5386-4024-1}, issn = {2472-7814}, language = {EN}, location = {Erfurt, Germany}, pdf = {https://www.itec.aau.at/bib/files/QoMEX_2017_paper_44.pdf}, talkdate = {2017.06.01}, talktype = {registered} } @Article{Rainer2017a, author = {Rainer, Benjamin and Petscharnig, Stefan and Timmerer, Christian and Hellwagner, Hermann}, journal = {IEEE Transactions on Multimedia}, title = {Statistically Indifferent Quality Variation: An Approach for Reducing Multimedia Distribution Cost for Adaptive Video Streaming Services}, year = {2017}, month = {mar}, pages = {13}, volume = {19}, abstract = {Forecasts predict that Internet traffic will continue to grow in the near future. A huge share of this traffic is caused by multimedia streaming. The Quality of Experience (QoE) of such streaming services is an important aspect and in most cases the goal is to maximize the bit rate which -- in some cases -- conflicts with the requirements of both consumers and providers. For example, in mobile environments users may prefer a lower bit rate to come along with their data plan. Likewise, providers aim at minimizing bandwidth usage in order to reduce costs by transmitting less data to users while maintaining a high QoE. Today's adaptive video streaming services try to serve users with the highest bit rates which consequently results in high QoE. In practice, however, some of these high bit rate representations may not differ significantly in terms of perceived video quality compared to lower bit rate representations. In this paper, we present a novel approach to determine the statistically indifferent quality variation (SIQV) of adjacent video representations for adaptive video streaming services by adopting standard objective quality metrics and existing QoE models. In particular, whenever the quality variation between adjacent representations is imperceptible from a statistical point of view, the representation with higher bit rate can be substituted with a lower bit rate representation. As expected, this approach results in savings with respect to bandwidth consumption while still providing a high QoE for users. The approach is evaluated subjectively with a crowdsourcing study. Additionally, we highlight the benefits of our approach, by providing a case study that extrapolates possible savings for providers.}, address = {New York, USA}, doi = {10.1109/TMM.2016.2629761}, keywords = {Adaptive Video Streaming, Quality of Experience, MPEG-DASH}, language = {EN}, publisher = {IEEE}, url = {http://ieeexplore.ieee.org/document/7745907/} } @InProceedings{PrimusTrecVID2017, author = {Primus, Manfred Jürgen and Münzer, Bernd and Schoeffmann, Klaus}, booktitle = {Proceedings of TRECVID 2017}, title = {ITEC-UNIKLU Ad-Hoc Video Search Submission 2017}, year = {2017}, address = {NIST, Gaithersburg, MD, USA}, editor = {Awad,George and Butt,Asad and Fiscus,Jonathan and Joy,David and Delgado,Andrew and Michel,Martial and Smeaton,Alan and Graham,Yvette and Kraaij,Wessel and Quénot,Georges and Eskevich,Maria and Ordelman,Roeland and Jones,Gareth and Huet,Benoit}, month = {nov}, pages = {10}, publisher = {NIST, USA}, abstract = {This paper describes our approach used for the fully automatic and manually assisted Ad-hoc Video Search (AVS) task for TRECVID 2017. We focus on the combination of different convolutional neural network models and query optimization. Each of this model focus on a specific query part, which could be, e.g., location, objects, or the wide-ranging ImageNet classes. All classification results are collected in different combinations in Lucene indixes. For the manually assisted run we use a junk filter and different query optimization methods.}, language = {EN}, location = {Gaithersburg, MD, USA}, talkdate = {2017.11.13}, talktype = {poster} } @Article{Posch2017a, author = {Posch, Daniel and Rainer, Benjamin and Hellwagner, Hermann}, journal = {IEEE/ACM Transactions on Networking}, title = {SAF: Stochastic Adaptive Forwarding in Named Data Networking}, year = {2017}, month = {apr}, number = {2}, pages = {14}, volume = {25}, abstract = {Forwarding decisions in classical IP-based networks are predetermined by routing. This is necessary to avoid loops, inhibiting opportunities to implement an adaptive and intelligent forwarding plane. Consequently, content distribution efficiency is reduced due to a lack of inherent multi-path transmission. In Named Data Networking (NDN) instead, routing shall hold a supporting role to forwarding, providing sufficient potential to enhance content dissemination at the forwarding plane. In this paper we design, implement, and evaluate a novel probability-based forwarding strategy, called Stochastic Adaptive Forwarding (SAF) for NDN. SAF imitates a self-adjusting water pipe system, intelligently guiding and distributing Interests through network crossings circumventing link failures and bottlenecks. Just as real pipe systems, SAF employs overpressure valves enabling congested nodes to lower pressure autonomously. Through an implicit feedback mechanism it is ensured that the fraction of the traffic forwarded via congested nodes decreases. By conducting simulations we show that our approach outperforms existing forwarding strategies in terms of the Interest satisfaction ratio in the majority of the evaluated scenarios. This is achieved by extensive utilization of NDN's multipath and content-lookup capabilities without relying on the routing plane. SAF explores the local environment by redirecting requests that are likely to be dropped anyway. This enables SAF to identify new paths to the content origin or to cached replicas, circumventing link failures and resource shortages without relying on routing updates.}, address = {New York, USA}, doi = {10.1109/TNET.2016.2614710}, language = {EN}, pdf = {https://www.itec.aau.at/bib/files/saf.pdf}, publisher = {IEEE}, url = {https://dx.doi.org/10.1109/TNET.2016.2614710} } @Article{Posch2017, author = {Posch, Daniel and Rainer, Benjamin and Hellwagner, Hermann}, journal = {Computer Communication Review}, title = {Towards a Context-Aware Forwarding Plane in Named Data Networking supporting QoS}, year = {2017}, month = {jan}, number = {1}, pages = {9}, volume = {47}, abstract = {The emergence of Information-Centric Networking (ICN) provides considerable opportunities for context-aware data distribution in the network's forwarding plane. While packet forwarding in classical IP-based networks is basically predetermined by routing, ICN foresees an adaptive forwarding plane considering the requirements of network applications. As research in this area is still at an early stage, most of the work so far focused on providing the basic functionality, rather than on considering the available context information to improve Quality of Service (QoS). This article investigates to which extent existing forwarding strategies take account of the available context information and can therefore increase service quality. The article examines a typical scenario encompassing different user applications (Voice over IP, video streaming, and classical data transfer) with varying demands (context), and evaluates how well the applications' requirements are met by the existing strategies.}, address = {New York, USA}, doi = {10.1145/3041027.3041029}, language = {EN}, pdf = {https://www.itec.aau.at/bib/files/ccr.pdf}, publisher = {ACM SIGCOMM}, url = {https://dl.acm.org/citation.cfm?doid=3041027.3041029} } @Article{Pinheiro2017, author = {Pinheiro, Antonio and Timmerer, Christian}, journal = {SIGMultimedia Records}, title = {Standards Column: JPEG and MPEG}, year = {2017}, issn = {1947-4598}, month = {oct}, number = {1}, pages = {N.N.}, volume = {9}, address = {New York, NY, USA}, doi = {10.1145/3149647.3149648}, language = {EN}, publisher = {ACM}, url = {http://doi.acm.org/10.1145/3149647.3149648} } @InProceedings{Petscharnig_2017_DS, title = {Semi-Automatic Retrieval of Relevant Segments from Laparoscopic Surgery Videos}, author = {Petscharnig, Stefan}, booktitle = {Proceedings of the 2017 ACM on International Conference on Multimedia Retrieval}, year = {2017}, address = {New York, NY, USA}, editor = {Ionescu, Bogdan and Sebe, Nicu}, month = {jun}, pages = {484--488}, publisher = {ACM}, series = {ICMR '17}, abstract = {Over the last decades, progress in medical technology and imaging technology enabled the technique of minimally invasive surgery. In addition, multimedia technologies allow for retrospective analyses of surgeries. The accumulated videos and images allow for a speed-up in documentation, easier medical case assessment across surgeons, training young surgeons, as well as they find the usage in medical research. Considering a surgery lasting for hours of routine work, surgeons only need to see short video segments of interest to assess a case. Surgeons do not have the time to manually extract video sequences of their surgeries from their big multimedia databases as they do not have the resources for this time-consuming task. The thesis deals with the questions of how to semantically classify video frames using Convolutional Neural Networks into different semantic concepts of surgical actions and anatomical structures. In order to achieve this goal, the capabilities of predefined CNN architectures and transfer learning in the laparoscopic video domain are investigated. The results are expected to improve by domain-specific adaptation of the CNN input layers, i.e. by fusion of the image with motion and relevance information. Finally, the thesis investigates to what extent surgeons' needs are covered with the proposed extraction of relevant scenes.}, doi = {10.1145/3078971.3079008}, keywords = {endoscopic image classification, endoscopic video retrieval}, language = {EN}, location = {Bucharest, Romania}, talkdate = {2017.06.08}, talktype = {registered}, url = {http://doi.acm.org/10.1145/3078971.3079008} } @InProceedings{PetscharnigSparDa2017, author = {Petscharnig, Stefan and Lux, Mathias and Chatzichristofis, Savvas}, booktitle = {15th International Workshop on Content-Based Multimedia Indexing}, title = {Dimensionality Reduction for Image Features using Deep Learning and Autoencoders}, year = {2017}, address = {New York, USA}, editor = {Bertini, Marco}, month = {jun}, pages = {.}, publisher = {ACM}, abstract = {The field of similarity based image retrieval has experienced a game changer lately. Hand crafted image features have been vastly outperformed by machine learning based approaches. Deep learning methods are very good at finding optimal features for a domain, given enough data is available to learn from. However, hand crafted features are still means to an end in domains, where the data either is not freely available, i.e. because it violates privacy, where there are commercial concerns, or where it cannot be transmitted, i.e. due to bandwidth limitations. Moreover, we have to rely on hand crafted methods whenever neural networks cannot be trained effectively, e.g. if there is not enough training data. In this paper, we investigate a particular approach to combine hand crafted features and deep learning to (i) achieve early fusion of off the shelf handcrafted global image features and (ii) reduce the overall number of dimensions to combine both worlds. This method allows for fast image retrieval in domains, where training data is sparse.}, doi = {10.1145/3095713.3095737}, isbn10 = {978-1-4503-5333-5}, language = {EN}, location = {Firenze, Italy}, talkdate = {2017.06.21}, talktype = {registered}, url = {https://dl.acm.org/citation.cfm?id=3095737} } @InProceedings{PetscharnigMMM17, author = {Petscharnig, Stefan and Schoeffmann, Klaus}, booktitle = {International Conference on Multimedia Modeling}, title = {Deep Learning of Shot Classification in Gynecologic Surgery Videos}, year = {2017}, address = {Cham}, editor = {Amsaleg, Laurent and Guðmundsson, Gylfi Þór and Gurrin, Cathal and Jónsson, Björn Þór and Satoh, Shin’ichi}, month = {jan}, pages = {702-713}, publisher = {Springer}, abstract = {In the last decade, advances in endoscopic surgery resulted in vast amounts of video data which is used for documentation, analysis, and education purposes. In order to find video scenes relevant for aforementioned purposes, physicians manually search and annotate hours of endoscopic surgery videos. This process is tedious and time-consuming, thus motivating the (semi-)automatic annotation of such surgery videos. In this work, we want to investigate whether the single-frame model for semantic surgery shot classification is feasible and useful in practice. We approach this problem by further training of AlexNet, an already pre-trained CNN architecture. Thus, we are able to transfer knowledge gathered from the Imagenet database to the medical use case of shot classification in endoscopic surgery videos. We annotate hours of endoscopic surgery videos for training and testing data. Our results imply that the CNN-based single-frame classification approach is able to provide useful suggestions to medical experts while annotating video scenes. Hence, the annotation process is consequently improved. Future work shall consider the evaluation of more sophisticated classification methods incorporating the temporal video dimension, which is expected to improve on the baseline evaluation done in this work.}, edition = {LNCS 10132}, keywords = {Multimedia content analysis, Convolutional neural networks, Deep learning, Medical shot classification}, language = {EN}, location = {Klagenfurt, Austria}, talkdate = {2017.01.05}, talktype = {registered}, url = {https://link.springer.com/chapter/10.1007/978-3-319-51811-4_57} } @InProceedings{PetscharnigME17, author = {Petscharnig, Stefan and Schoeffmann, Klaus and Lux, Mathias}, booktitle = {Working Notes Proceedings of the MediaEval 2017 Workshop}, title = {An Inception-like CNN Architecture for GI Disease and Anatomical Landmark Classification}, year = {2017}, address = {Vol-1984}, editor = {Gravier, Guillaume and Bischke, Benjamin and Demarty, Claire-Hélène and Zaharieva, Maia and Riegler, Michael and Dellandrea, Emmanuel and Bogdanov, Dmitry and Sutcliffe, Richard and Jones, Gareth and Larson, Martha}, month = {oct}, pages = {1--3}, publisher = {CEUR-WS}, abstract = {In this working note, we describe our approach to gastrointestinal disease and anatomical landmark classification for the Medico task at MediaEval 2017. We propose an inception-like CNN architecture and a fixed-crop data augmentation scheme for training and testing. The architecture is based on GoogLeNet and designed to keep the number of trainable parameters and its computational overhead small. Preliminary experiments show that the architecture is able to learn the classification problem from scratch using a tiny fraction of the provided training data only.}, language = {EN}, location = {Dublin, Ireland}, talkdate = {2017.09.15}, talktype = {registered}, url = {http://slim-sig.irisa.fr/me17/} } @Article{Petscharnig2017, author = {Petscharnig, Stefan and Schoeffmann, Klaus}, journal = {Multimedia Tools and Applications}, title = {Learning laparoscopic video shot classification for gynecological surgery}, year = {2017}, issn = {1573-7721}, month = {apr}, pages = {1-19}, abstract = {Videos of endoscopic surgery are used for education of medical experts, analysis in medical research, and documentation for everyday clinical life. Hand-crafted image descriptors lack the capabilities of a semantic classification of surgical actions and video shots of anatomical structures. In this work, we investigate how well single-frame convolutional neural networks (CNN) for semantic shot classification in gynecologic surgery work. Together with medical experts, we manually annotate hours of raw endoscopic gynecologic surgery videos showing endometriosis treatment and myoma resection of over 100 patients. The cleaned ground truth dataset comprises 9 h of annotated video material (from 111 different recordings). We use the well-known CNN architectures AlexNet and GoogLeNet and train these architectures for both, surgical actions and anatomy, from scratch. Furthermore, we extract high-level features from AlexNet with weights from a pre-trained model from the Caffe model zoo and feed them to an SVM classifier. Our evaluation shows that we reach an average recall of .697 and .515 for classification of anatomical structures and surgical actions respectively using off-the-shelf CNN features. Using GoogLeNet, we achieve a mean recall of .782 and .617 for classification of anatomical structures and surgical actions respectively. With AlexNet the achieved recall is .615 for anatomical structures and .469 for surgical action classification respectively. The main conclusion of our work is that advances in general image classification methods transfer to the domain of endoscopic surgery videos in gynecology. This is relevant as this domain is different from natural images, e.g. it is distinguished by smoke, reflections, or a limited amount of colors.}, address = {Berlin, Heidelberg, New York}, doi = {10.1007/s11042-017-4699-5}, keywords = {Video classification, Deep learning, Convolutional Neural Network}, language = {EN}, publisher = {Springer}, url = {http://dx.doi.org/10.1007/s11042-017-4699-5} } @InProceedings{Muenzer2017c, author = {Münzer, Bernd and Primus, Manfred Jürgen and Kletz, Sabrina and Petscharnig, Stefan and Schoeffmann, Klaus}, booktitle = {IEEE International Symposium on Multimedia (ISM2017)}, title = {Static vs. Dynamic Content Descriptors for Video Retrieval in Laparoscopy}, year = {2017}, address = {Taichung, Taiwan}, editor = {Chang, Kang-Ming and Chang, Wen-Thong}, month = {dec}, pages = {8}, publisher = {IEEE}, abstract = {The domain of minimally invasive surgery has recently attracted attention from the Multimedia community due to the fact that systematic video documentation is on the rise in this medical field. The vastly growing volumes of video archives demand for effective and efficient techniques to retrieve specific information from large video collections with visually very homogeneous content. One specific challenge in this context is to retrieve scenes showing similar surgical actions, i.e., similarity search. Although this task has a high and constantly growing relevance for surgeons and other health professionals, it has rarely been investigated in the literature so far for this particular domain. In this paper, we propose and evaluate a number of both static and dynamic content descriptors for this purpose. The former only take into account individual images, while the latter consider the motion within a scene. Our experimental results show that although static descriptors achieve the highest overall performance, dynamic descriptors are much more discriminative for certain classes of surgical actions. We conclude that the two approaches have complementary strengths and further research should investigate methods to combine them.}, language = {EN}, location = {Taichung, Taiwan}, talkdate = {2017.12.12}, talktype = {registered} } @InProceedings{Muenzer2017b, author = {Münzer, Bernd and Schoeffmann, Klaus and Böszörmenyi, Laszlo}, booktitle = {IEEE International Symposium on Multimedia (ISM2017)}, title = {EndoXplore: A Web-based Video Explorer for Endoscopic Videos}, year = {2017}, address = {Taichung, Taiwan}, editor = {Chang, Kang-Ming and Chang, Wen-Thong}, month = {dec}, pages = {2}, publisher = {IEEE}, abstract = {The rapidly increasing volume of videos recorded in the course of endoscopic screenings and surgeries poses demanding challenges to video retrieval and browsing systems. Surgeons typically have to use standard video players to retrospectively review their procedures, which is an extremely cumbersome and time-consuming process. We present an HTML5-based video explorer that is specially tailored to this purpose and enables a time-efficient post-operative review of procedures. It incorporates various interactive browsing mechanisms as well as domain-specific content-based features based on previous research results. Preliminary interviews with surgeons indicate that this tool can considerably improve retrieval and browsing efficiency for users in the medical domain and allows surgeons to more easily and quickly revisit specific moments in recordings of their endoscopic surgeries.}, language = {EN}, location = {Taichung, Taiwan}, talkdate = {2017.12.11}, talktype = {poster} } @InProceedings{Muenzer2017a, author = {Münzer, Bernd and Primus, Manfred Jürgen and Hudelist, Marco and Beecks, Christian and Hürst, Wolfgang and Schoeffmann, Klaus}, booktitle = {2017 IEEE International Conference on Multimedia \& Expo Workshops (ICMEW)}, title = {When content-based video retrieval and human computation unite: Towards effective collaborative video search}, year = {2017}, address = {Hongkong, China}, editor = {Chan, Yui-Lam and Rahardja, Susanto}, month = {jul}, pages = {214-219}, publisher = {IEEE}, abstract = {Although content-based retrieval methods achieved very good results for large-scale video collections in recent years, they still suffer from various deficiencies. On the other hand, plain human perception is a very powerful ability that still outperforms automatic methods in appropriate settings, but is very limited when it comes to large-scale data collections. In this paper, we propose to take the best from both worlds by combining an advanced content-based retrieval system featuring various query modalities with a straightforward mobile tool that is optimized for fast human perception in a sequential manner. In this collaborative system with multiple users, both subsystems benefit from each other: The results of issued queries are used to re-rank the video list on the tablet tool, which in turn notifies the retrieval tool about parts of the dataset that have already been inspected in detail and can be omitted in subsequent queries. The preliminary experiments show promising results in terms of search performance.}, doi = {10.1109/ICMEW.2017.8026262}, language = {EN}, location = {Hongkong}, talkdate = {2017.07.10}, talktype = {registered} } @Article{Muenzer2017, author = {Münzer, Bernd and Schoeffmann, Klaus and Böszörmenyi, Laszlo}, journal = {Multimedia Tools and Applications}, title = {Content-based processing and analysis of endoscopic images and videos: A survey}, year = {2017}, month = {jan}, pages = {1-40}, abstract = {In recent years, digital endoscopy has established as key technology for medical screenings and minimally invasive surgery. Since then, various research communities with manifold backgrounds have picked up on the idea of processing and automatically analyzing the inherently available video signal that is produced by the endoscopic camera. Proposed works mainly include image processing techniques, pattern recognition, machine learning methods and Computer Vision algorithms. While most contributions deal with real-time assistance at procedure time, the post-procedural processing of recorded videos is still in its infancy. Many post-processing problems are based on typical Multimedia methods like indexing, retrieval, summarization and video interaction, but have only been sparsely addressed so far for this domain. The goals of this survey are (1) to introduce this research field to a broader audience in the Multimedia community to stimulate further research, (2) to describe domain-specific characteristics of endoscopic videos that need to be addressed in a pre-processing step, and (3) to systematically bring together the very diverse research results for the first time to provide a broader overview of related research that is currently not perceived as belonging together.}, address = {Berlin, Heidelberg, New York}, doi = {10.1007/s11042-016-4219-z}, language = {EN}, publisher = {Springer}, url = {https://link.springer.com/article/10.1007/s11042-016-4219-z} } @Misc{Mueller2017, author = {Mueller, Christopher and Lederer, Stefan and Timmerer, Christian}, howpublished = {Patent}, month = {jun}, note = {US 15365886}, title = {Adaptation logic for varying a bitrate}, year = {2017}, url = {https://patents.google.com/patent/US20170188069A1/en} } @InProceedings{Moll2017a, author = {Moll, Philipp and Posch, Daniel and Hellwagner, Hermann}, booktitle = {Proceedings of the IEEE International Conference on Multimedia and Expo Workshops (ICMEW) 2017}, title = {Investigation of push-based traffic for conversational services in Named Data Networking}, year = {2017}, address = {Hong Kong}, editor = {Pesquet-Popescu,Beatrice and Ngo,Chong-Wah}, month = {jul}, pages = {315-320}, publisher = {IEEE}, abstract = {Conversational services (e.g., Internet telephony) exhibit hard Quality of Service (QoS) requirements, such as low delay and jitter. Current IP-based solutions for conversational services use push-based data transfer only, since pull-based communication as envisaged in Named Data Networking (NDN) suffers from the two-way delay. Unfortunately, IP's addressing scheme requires additional services for contacting communication partners. NDN provides an inherent solution for this issue by using a location-independent naming scheme. Nevertheless, it currently does not provide a mechanism for push-based data transfer. In this paper, we investigate Persistent Interests as a solution for push-based communication. We improve and implement the idea of Persistent Interests, and study their applicability for conversational services in NDN. This is done by comparing different push- and pull-based approaches for Internet telephony.}, doi = {10.1109/ICMEW.2017.8026212}, isbn13 = {978-1-5386-0560-8}, language = {EN}, location = {Hong Kong}, pdf = {https://www.itec.aau.at/bib/files/MuSIC_2017.pdf}, talkdate = {2017.07.10}, talktype = {registered}, url = {http://ieeexplore.ieee.org/document/8026212/} } @InProceedings{Moll2017, author = {Moll, Philipp and Janda, Julian and Hellwagner, Hermann}, booktitle = {Proceedings of the 4th ACM Conference on Information-Centric Networking}, title = {Adaptive Forwarding of Persistent Interests in Named Data Networking}, year = {2017}, address = {New York, NY, USA}, editor = {Schmidt, Thomas C and Seedorf, Jan}, month = {sep}, pages = {180-181}, publisher = {ACM}, abstract = {Persistent Interests (PIs) are a promising approach to introduce push-type traffic in Named Data Networking (NDN), in particular for conversational services such as voice and video calls. Forwarding decisions for PIs are crucial in NDN because they establish a long-lived path for the data flowing back toward the PI issuer. In the course of studying the use of PIs in NDN, we investigate adaptive PI forwarding and present a strategy combining regular NDN forwarding information and results from probing potential alternative paths through the network. Simulation results indicate that our adaptive PI forwarding approach is superior to the PI-adapted Best Route strategy when network conditions change due to link failures.}, doi = {10.1145/3125719.3132091}, isbn13 = {978-1-4503-5122-5}, language = {EN}, location = {Berlin, Germany}, pdf = {https://www.itec.aau.at/bib/files/ACM-ICN-2017_Poster.pdf}, talkdate = {2017.09.27}, talktype = {registered}, url = {http://dl.acm.org/citation.cfm?id=3132091} } @InProceedings{Leibetseder2017c, author = {Leibetseder, Andreas and Münzer, Bernd and Schoeffmann, Klaus}, booktitle = {IEEE International Symposium on Multimedia (ISM2017)}, title = {A Tool for Endometriosis Annotation in Endoscopic Videos}, year = {2017}, address = {Taichung, Taiwan}, editor = {Chang, Kang-Ming and Chang, Wen-Thong}, month = {dec}, pages = {2}, publisher = {IEEE}, abstract = {When regarding physicians’ tremendously packed timetables, it comes as no surprise that they start managing even critical situations hastily in order to cope with the high demands laid out for them. Apart from treating patients’ conditions they as well are required to perform time-consuming administrative tasks, including post-surgery video analyses. Concerning documentation of minimally invasive surgeries (MIS), specifically endoscopy, such processes usually involve repeatedly perusing through lengthy, in the worst case uncut recordings – a redundant task that nowadays can be optimized by using readily available technology: we present a tool for annotating endoscopic video frames targeting a specific use case – endometriosis, i.e. the dislocation of uterine-like tissue.}, language = {EN}, location = {Taichung, Taiwan}, talkdate = {2017.12.11}, talktype = {poster} } @InProceedings{Leibetseder2017b, author = {Leibetseder, Andreas and Primus, Manfred Jürgen and Petscharnig, Stefan and Schoeffmann, Klaus}, booktitle = {Proceedings of the on Thematic Workshops of ACM Multimedia 2017}, title = {Real-Time Image-based Smoke Detection in Endoscopic Videos}, year = {2017}, address = {New York, NY, USA}, editor = {Wu, Wanmin and Yag, Jiancho and Tian, Qi and Zimmermann, Roger}, month = {jan}, pages = {296--304}, publisher = {ACM}, series = {Thematic Workshops '17}, abstract = {The nature of endoscopy as a type of minimally invasive surgery (MIS) requires surgeons to perform complex operations by merely inspecting a live camera feed. Inherently, a successful intervention depends upon ensuring proper working conditions, such as skillful camera handling, adequate lighting and removal of confounding factors, such as fluids or smoke. The latter is an undesirable byproduct of cauterizing tissue and not only constitutes a health hazard for the medical staff as well as the treated patients, it can also considerably obstruct the operating physician's field of view. Therefore, as a standard procedure the gaseous matter is evacuated by using specialized smoke suction systems that typically are activated manually whenever considered appropriate. We argue that image-based smoke detection can be employed to undertake such a decision, while as well being a useful indicator for relevant scenes in post-procedure analyses. This work represents a continued effort to previously conducted studies utilizing pre-trained convolutional neural networks (CNNs) and threshold-based saturation analysis. Specifically, we explore further methodologies for comparison and provide as well as evaluate a public dataset comprising over 100K smoke/non-smoke images extracted from the Cholec80 dataset, which is composed of 80 different cholecystectomy procedures. Having applied deep learning to merely 20K images of a custom dataset, we achieve Receiver Operating Characteristic (ROC) curves enclosing areas of over 0.98 for custom datasets and over 0.77 for the public dataset. Surprisingly, a fixed threshold for saturation-based histogram analysis still yields areas of over 0.78 and 0.75.}, doi = {10.1145/3126686.3126690}, isbn10 = {978-1-4503-5416-5}, keywords = {cnn classification, deep learning, endoscopic surgery, image processing, smoke detection}, language = {EN}, location = {Mountain View, California, USA}, talkdate = {2017.10.27}, talktype = {registered}, url = {http://doi.acm.org/10.1145/3126686.3126690} } @InProceedings{Leibetseder2017, author = {Leibetseder, Andreas and Primus, Manfred Jürgen and Petscharnig, Stefan and Schoeffmann, Klaus}, booktitle = {Computer Assisted and Robotic Endoscopy and Clinical Image-Based Procedures: 4th International Workshop, CARE 2017, and 6th International Workshop, CLIP 2017, Held in Conjunction with MICCAI 2017, Qu{\'e}bec City, QC, Canada, September 14, 2017, Proceedings}, title = {Image-Based Smoke Detection in Laparoscopic Videos}, year = {2017}, address = {Cham, Schweiz}, editor = {Cardoso, M Jorge and Arbel, Tal and Luo, Xiongbiao and Wesarg, Stefan and Reichl, Tobias and Gonzalez Ballester, Miguel Angel and McLeod, Jonathan and Drechsler, Klaus and Peters, Terry and Erdt, Marius and Mori, Kensaku and Linguraru, Marius George and Uhl, Andreas and Oyarzun Laura, Cristina and Shekhar, Raj}, month = {jan}, pages = {70--87}, publisher = {Springer International Publishing}, abstract = {The development and improper removal of smoke during minimally invasive surgery (MIS) can considerably impede a patient's treatment, while additionally entailing serious deleterious health effects. Hence, state-of-the-art surgical procedures employ smoke evacuation systems, which often still are activated manually by the medical staff or less commonly operate automatically utilizing industrial, highly-specialized and operating room (OR) approved sensors. As an alternate approach, video analysis can be used to take on said detection process -- a topic not yet much researched in aforementioned context. In order to advance in this sector, we propose utilizing an image-based smoke classification task on a pre-trained convolutional neural network (CNN). We provide a custom data set of over 30 000 laparoscopic smoke/non-smoke images, part of which served as training data for GoogLeNet-based [41] CNN models. To be able to compare our research for evaluation, we separately developed a non-CNN classifier based on observing the saturation channel of a sample picture in the HSV color space. While the deep learning approaches yield excellent results with Receiver Operating Characteristic (ROC) curves enclosing areas of over 0.98, the computationally much less costly analysis of an image's saturation histogram under certain circumstances can, surprisingly, as well be a good indicator for smoke with areas under the curves (AUCs) of around 0.92--0.97.}, doi = {10.1007/978-3-319-67543-5_7}, edition = {LNCS}, language = {EN}, location = {Québec City, Kanada}, talkdate = {2017.09.14}, talktype = {registered}, url = {https://doi.org/10.1007/978-3-319-67543-5_7} } @InProceedings{Kletz2017, author = {Kletz, Sabrina and Schoeffmann, Klaus and Münzer, Bernd and Primus, Manfred J and Husslein, Heinrich}, booktitle = {Proceedings of the First ACM Workshop on Educational and Knowledge Technologies (MultiEdTech 2017)}, title = {Surgical Action Retrieval for Assisting Video Review of Laparoscopic Skills}, year = {2017}, address = {Mountain View, California, USA}, editor = {Li, Qiong and Lienhart, Rainer and Wang, Hao Hong}, month = {oct}, pages = {9}, publisher = {ACM}, series = {MultiEdTech '17}, abstract = {An increasing number of surgeons promote video review of laparoscopic surgeries for detection of technical errors at an early stage as well as for training purposes. The reason behind is the fact that laparoscopic surgeries require specific psychomotor skills, which are difficult to learn and teach. The manual inspection of surgery video recordings is extremely cumbersome and time-consuming. Hence, there is a strong demand for automated video content analysis methods. In this work, we focus on retrieving surgical actions from video collections of gynecologic surgeries. We propose two novel dynamic content descriptors for similarity search and investigate a query-by-example approach to evaluate the descriptors on a manually annotated dataset consisting of 18 hours of video content. We compare several content descriptors including dynamic information of the segments as well as descriptors containing only spatial information of keyframes of the segments. The evaluation shows that our proposed dynamic content descriptors considering motion and spatial information from the segment achieve a better retrieval performance than static content descriptors ignoring temporal information of the segment at all. The proposed content descriptors in this work enable content-based video search for similar laparoscopic actions, which can be used to assist surgeons in evaluating laparoscopic surgical skills.}, doi = {10.1145/3132390.3132395}, keywords = {feature signatures, laparoscopic video, medical endoscopy, motion analysis, similarity search, video retrieval}, language = {EN}, location = {Mountain View, California, USA}, talkdate = {2017.10.27}, talktype = {registered}, url = {http://doi.acm.org/10.1145/3132390.3132395} } @InProceedings{Janetschek2017a, author = {Janetschek, Matthias and Prodan, Radu and Benedict, Shajulin}, title = {A Compiler Transformation-based Approach to Scientific Workflow Enactment}, booktitle = {Proceedings of the 12th Workshop on Workflows in Support of Large-Scale Science}, year = {2017}, pages = {1-12}, publisher = {ACM}, doi = {10.1145/3150994.3150999}, url = {https://dl.acm.org/citation.cfm?doid=3150994.3150999} } @InProceedings{Hurst2017, author = {Hürst, Wolfgang and Ip Vai Ching, Algernon and Schoeffmann, Klaus and Primus, Manfred Juergen}, booktitle = {MultiMedia Modeling: 23rd International Conference, MMM 2017, Reykjavik, Iceland, January 4-6, 2017, Proceedings, Part II}, title = {Storyboard-Based Video Browsing Using Color and Concept Indices}, year = {2017}, address = {Cham}, editor = {Amsaleg, Laurent and Guðmundsson, Gylfi Þór and Gurrin, Cathal and Jónsson, Björn Þór and Satoh, Shin’ichi}, month = {jan}, pages = {480-485}, publisher = {Springer International Publishing}, abstract = {We present an interface for interactive video browsing where users visually skim storyboard representations of the files in search for known items (known-item search tasks) and textually described subjects, objects, or events (ad-hoc search tasks). Individual segments of the video are represented as a color-sorted storyboard that can be addressed via a color-index. Our storyboard representation is optimized for quick visual inspections considering results from our ongoing research. In addition, a concept based-search is used to filter out parts of the storyboard containing the related concept(s), thus complementing the human-based visual inspection with a semantic, content-based annotation.}, language = {EN}, location = {Reykjavik, Iceland}, talkdate = {2017.01.04}, talktype = {registered} } @InProceedings{Hudelist2017Thumb, author = {Hudelist, Marco and Schoeffmann, Klaus}, booktitle = {International Conference on Multimedia Modeling}, title = {An Evaluation of Video Browsing on Tablets with the ThumbBrowser}, year = {2017}, address = {Cham}, editor = {Amsaleg, Laurent and Guðmundsson, Gylfi Þór and Gurrin, Cathal and Jónsson, Björn Þór and Satoh, Shin’ichi}, month = {jan}, pages = {89-100}, publisher = {Springer}, abstract = {We present an extension and evaluation of a novel interaction concept for video browsing on tablets. It can be argued that the best user experience for watching video on tablets can be achieved when the device is held in landscape orientation. Most mobile video players ignore this fact and make the interaction unnecessarily hard when the tablet is held with both hands. Naturally, in this hand posture only the thumbs are available for interaction. Our ThumbBrowser-interface takes this into account and combines it in its latest iteration with content analysis information as well as two different interaction methods. The interface was already introduced in a basic form in earlier work. In this paper we report on extensions that we applied and show first evaluation results in comparison to standard video players. We are able to show that our video browser is superior in terms of search accuracy and user satisfaction.}, doi = {10.1007/978-3-319-51814-5_8}, isbn10 = {978-3-319-51813-8}, language = {EN}, location = {Klagenfurt, Austria}, talkdate = {2017.01.05}, talktype = {registered} } @InProceedings{Hudelist2017, author = {Hudelist, Marco A and Husslein, Heinrich and Münzer, Bernd and Schoeffmann, Klaus}, booktitle = {Proceedings of the Third IEEE International Conference on Multimedia Big Data (BigMM 2017)}, title = {A Tool to Support Surgical Quality Assessment}, year = {2017}, address = {Laguna Hills, California, USA}, editor = {Chen, Shu-Ching and Sheu, Philip Chen-Yu}, month = {apr}, pages = {2}, publisher = {IEEE}, series = {BigMM'17}, abstract = {In the domain of medical endoscopy an increasing number of surgeons nowadays store video recordings of their interventions in a huge video archive. Among some other purposes, the videos are used for post-hoc surgical quality assessment, since objective assessment of surgical procedures has been identified as essential component for improvement of surgical quality. Currently, such assessment is performed manually and for selected procedures only, since the amount of data and cumbersome interaction is very time-consuming. In the future, quality assessment should be carried out comprehensively and systematically by means of automated assessment algorithms. In this demo paper, we present a tool that supports human assessors in collecting manual annotations and therefore should help them to deal with the huge amount of visual data more efficiently. These annotations will be analyzed and used as training data in the future.}, doi = {10.1109/BigMM.2017.45}, keywords = {data handling, endoscopes, medical image processing, surgery, video signal processing, automated assessment algorithms, human assessor support tool, intervention video recordings, manual annotation collection, medical endoscopy, post-hoc surgical quality assessment, surgical procedure assessment, surgical quality assessment support tool, video archive, visual data, Minimally invasive surgery, Navigation, Quality assessment, Tools, User interfaces, Video recording, generric error rating tool, medical multimedia, surgical quality assessment}, language = {EN}, location = {Laguna Hills, California, USA}, talkdate = {2017.04.21}, talktype = {poster}, url = {http://ieeexplore.ieee.org/document/7966750/} } @Article{HHmartina2017, author = {Zhu, X and Mao, S and Hassan, M Hassan and Hellwagner, Hermann}, journal = {IEEE Transactions on Multimedia}, title = {Guest Editorial: Video Over Future Networks}, year = {2017}, issn = {1941-0077}, month = {oct}, number = {10}, pages = {2133 - 2135}, volume = {19}, abstract = {The papers in this special issue focus on the deployment of video over future networks. The past decade has seen how major improvements in broadband and mobile networks have led to widespread popularity of video streaming applications, and how the latter now becomes the major driving force behind exponentially growing Internet traffic. This special issue seeks to investigate these future Internet technologies through the prism of its most prevalent application, that of video communications. video.}, address = {Piscataway, NJ}, doi = {10.1109/TMM.2017.2743638}, keywords = {Special issues and sections, Streaming media, Mobile communication, Network architecture, Quality of experience, Ultra-high definition video}, language = {EN}, publisher = {IEEE}, url = {http://ieeexplore.ieee.org/document/8038904/} } @InProceedings{Graf2017, author = {Graf, Mario and Timmerer, Christian and Mueller, Christopher}, booktitle = {Proceedings of the 8th ACM on Multimedia Systems Conference (MMSys'17)}, title = {Towards Bandwidth Efficient Adaptive Streaming of Omnidirectional Video over HTTP: Design, Implementation, and Evaluation}, year = {2017}, address = {New York, NY, USA}, editor = {Chen, Kuan-Ta}, month = {jun}, pages = {11}, publisher = {ACM}, abstract = {Real-time entertainment services such as streaming audio- visual content deployed over the open, unmanaged Internet account now for more than 70% during peak periods. More and more such bandwidth hungry applications and services are proposed like immersive media services such as virtual reality and, specifically omnidirectional/360-degree videos. The adaptive streaming of omnidirectional video over HTTP imposes an important challenge on today’s video delivery infrastructures which calls for dedicated, thoroughly designed techniques for content generation, delivery, and consumption. This paper describes the usage of tiles — as specified within modern video codecs such HEVC/H.265 and VP9 — enabling bandwidth efficient adaptive streaming of omnidirectional video over HTTP and we define various streaming strategies. Therefore, the parameters and characteristics of a dataset for omnidirectional video are proposed and exemplary instanti- ated to evaluate various aspects of such an ecosystem, namely bitrate overhead, bandwidth requirements, and quality as- pects in terms of viewport PSNR. The results indicate bitrate savings from 40% (in a realistic scenario with recorded head movements from real users) up to 65% (in an ideal scenario with a centered/fixed viewport) and serve as a baseline and guidelines for advanced techniques including the outline of a research roadmap for the near future.}, language = {EN}, location = {Taipei, Taiwan}, pdf = {https://www.itec.aau.at/bib/files/Special_Session_Camera_Ready.pdf}, talkdate = {2016.06.20}, talktype = {registered} } @InProceedings{Darragh2017, author = {Egan, Darragh and Keighrey, Conor and Barrett, John and Qiao, Yuansong and Brennan, Sean and Timmerer, Christian and Murray, Niall}, booktitle = {Proceedings of the 2nd International Workshop on Multimedia Alternate Realities}, title = {Subjective Evaluation of an Olfaction Enhanced Immersive Virtual Reality Environment}, year = {2017}, address = {New York, NY, USA}, editor = {Chambel, Teresa and Kaiser, Rene and Niamur, Omar Aziz and Ooi, Wei Tsang}, month = {oct}, pages = {15--18}, publisher = {ACM}, series = {AltMM '17}, abstract = {Recent research efforts have reported findings on user Quality of Experience (QoE) of immersive virtual reality (VR) experiences. Truly immersive multimedia experiences also include multisensory components such as factional, tactile etc., in addition to audiovisual stimuli. In this context, this paper reports the results of a user QoE study of an olfaction-enhanced immersive VR environment. The results presented compare the user QoE between two groups (VR vs VR + Olfaction) and consider how the addition of olfaction affected user QoE levels (considering sense of enjoyment, immersion and discomfort). Self-reported measures via post-test questionnaire (10 questions) only revealed one statistically significant difference between the groups; in terms of how users felt with respect to their senses being stimulated. The presence of olfaction in the VR environment did not have a statistically significant effect in terms of user levels of enjoyment, immersion and discomfort.}, doi = {10.1145/3132361.3132363}, isbn13 = {978-1-4503-5507-0}, language = {EN}, pdf = {https://www.itec.aau.at/bib/files/p15-egan.pdf}, talktype = {none}, url = {http://doi.acm.org/10.1145/3132361.3132363} } @InProceedings{Borodulin2017, author = {Borodulin, Kirill and Radchenko, Gleb and Shestakov, Aleksandr and Sokolinsky, Leonid and Tchernykh, Andrey and Prodan, Radu}, title = {Towards Digital Twins Cloud Platform: Microservices and Computational Workflows to Rule a Smart Factory}, booktitle = {2017 IEEE/ACM $10^{\mathit{th}}$ International Conference on Utility and Cloud Computing}, year = {2017}, pages = {209-210}, month = {December}, publisher = {ACM} } @InProceedings{Beecks2017, author = {Beecks, Christian and Kletz, Sabrina and Schoeffmann, Klaus}, booktitle = {Proceedings of the Third IEEE International Conference on Multimedia Big Data (BigMM 2017)}, title = {Large-Scale Endoscopic Image and Video Linking with Gradient-Based Signatures}, year = {2017}, address = {Laguna Hills, California, USA}, editor = {Chen, Shu-Ching and Sheu, Philip Chen-Yu}, month = {apr}, pages = {5}, publisher = {IEEE}, series = {BigMM}, abstract = {Given a large-scale video archive of surgical interventions and a medical image showing a specific moment of an operation, how to find the most image-related videos efficiently without the utilization of additional semantic characteristics? In this paper, we investigate a novel content-based approach of linking medical images with relevant video segments arising from endoscopic procedures. We propose to approximate the video segments' content-based features by gradient-based signatures and to index these signatures with the Minkowski distance in order to determine the most query-like video segments efficiently. We benchmark our approach on a large endoscopic image and video archive and show that our approach achieves a significant improvement in efficiency in comparison to the state-of-the-art while maintaining high accuracy.}, doi = {10.1109/BigMM.2017.44}, keywords = {feature signatures, laparoscopic video, medical endoscopy, motion analysis, similarity search, video retrieval}, language = {EN}, location = {Laguna Hills, California, USA}, talkdate = {2017.04.19}, talktype = {registered}, url = {http://ieeexplore.ieee.org/document/7966709/} } @InProceedings{0f4de0c76764d43901677cf74a330af9, title = {{Nerthus: A Bowel Preparation Quality Video Dataset}}, author = {Pogorelov, Konstantin and Ranheim Randel, Kristin and de Lange, Thomas and Eskeland, Sigrun L. and Griwodz, Carsten and Spampinato, Concetto and Taschwer, Mario and Lux, Mathias and Schmidt, Peter T. and Riegler, Michael and Halvorsen, Pal}, booktitle = {Proceedings of the 8th ACM on Multimedia Systems Conference (MMSys 2017)}, year = {2017}, editor = {Kuan-Ta Chen and Pablo Cesar and Cheng-Hsin Hsu}, month = {Juni}, pages = {170--174}, publisher = {Association for Computing Machinery (ACM)}, abstract = {Bowel preparation (cleansing) is considered to be a key precondition for successful colonoscopy (endoscopic examination of the bowel). The degree of bowel cleansing directly affects the possibility to detect diseases and may influence decisions on screening and follow-up examination intervals. An accurate assessment of bowel preparation quality is therefore important. Despite the use of reliable and validated bowel preparation scales, the grading may vary from one doctor to another. An objective and automated assessment of bowel cleansing would contribute to reduce such inequalities and optimize use of medical resources. This would also be a valuable feature for automatic endoscopy reporting in the future. In this paper, we present Nerthus, a dataset containing videos from inside the gastrointestinal (GI) tract, showing different degrees of bowel cleansing. By providing this dataset, we invite multimedia researchers to contribute in the medical field by making systems automatically evaluate the quality of bowel cleansing for colonoscopy. Such innovations would probably contribute to improve the medical field of GI endoscopy.}, doi = {10.1145/3083187.3083216}, url = {https://dl.acm.org/citation.cfm?id=3083216} }