% Year: 2021 % Encoding: utf-8 @InProceedings{Zabrovskiy2021c, author = {Anatoliy Zabrovskiy and Prateek Agrawal and Christian Timmerer and Radu Prodan}, booktitle = {2021 30th Conference of Open Innovations Association (FRUCT)}, title = {{FAUST: Fast Per-Scene Encoding Using Entropy-Based Scene Detection and Machine Learning}}, year = {2021}, month = {oct}, pages = {292--302}, publisher = {IEEE}, abstract = {HTTP adaptive video streaming is a widespread and sought-after technology on the Internet that allows clients to dynamically switch between different stream qualities presented in the bitrate ladder to optimize overall received video quality. Currently, there exist several approaches of different complexity for building such a ladder. The simplest method is to use a static bitrate ladder, and the more complex one is to compute a per-title encoding ladder. The main drawback of these approaches is that they do not provide bitrate ladders for scenes with different visual complexity within the video. Moreover, most modern methods require additional computationally-intensive test encodings of the entire video to construct the convex hull, used to calculate the bitrate ladder. This paper proposes a new fast per-scene encoding approach called FAUST based on 1) quick entropy-based scene detection and 2) prediction of optimized bitrate ladder for each scene using an artificial neural network. The results show that our model reduces the mean absolute error to 0.15, the mean square error to 0.08, and the bitrate to 13.5 % while increasing the difference in video multimethod assessment fusion to 5.6 points.}, doi = {10.23919/fruct53335.2021.9599963}, keywords = {Visualization, Technological innovation, Bit rate, Switches, Mean square error methods, Streaming media, Encoding}, url = {https://ieeexplore.ieee.org/document/9599963} } @Article{Verma2021, author = {Pawan Kumar Verma and Prateek Agrawal and Ivone Amorim and Radu Prodan}, journal = {IEEE Transactions on Computational Social Systems}, title = {{WELFake: Word Embedding Over Linguistic Features for Fake News Detection}}, year = {2021}, issn = {2329-924X}, month = {aug}, number = {4}, pages = {881--893}, volume = {8}, abstract = {Social media is a popular medium for the dissemination of real-time news all over the world. Easy and quick information proliferation is one of the reasons for its popularity. An extensive number of users with different age groups, gender, and societal beliefs are engaged in social media websites. Despite these favorable aspects, a significant disadvantage comes in the form of fake news, as people usually read and share information without caring about its genuineness. Therefore, it is imperative to research methods for the authentication of news. To address this issue, this article proposes a two-phase benchmark model named WELFake based on word embedding (WE) over linguistic features for fake news detection using machine learning classification. The first phase preprocesses the data set and validates the veracity of news content by using linguistic features. The second phase merges the linguistic feature sets with WE and applies voting classification. To validate its approach, this article also carefully designs a novel WELFake data set with approximately 72,000 articles, which incorporates different data sets to generate an unbiased classification output. Experimental results show that the WELFake model categorizes the news in real and fake with a 96.73% which improves the overall accuracy by 1.31% compared to bidirectional encoder representations from transformer (BERT) and 4.25% compared to convolutional neural network (CNN) models. Our frequency-based and focused analyzing writing patterns model outperforms predictive-based related works implemented using the Word2vec WE method by up to 1.73%.}, doi = {10.1109/tcss.2021.3068519}, keywords = {Bidirectional encoder representations from transformer (BERT), convolutional neural network (CNN), fake news, linguistic feature, machine learning (ML), text classification, voting classifier, word embedding (WE)}, publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, url = {https://ieeexplore.ieee.org/document/9395133} } @Article{Timmerer2021, author = {Christian Timmerer and Mathias Wien and Lu Yu and Amy Reibman}, journal = {Proceedings of the IEEE}, title = {{Special issue on Open Media Compression: Overview, Design Criteria, and Outlook on Emerging Standards}}, year = {2021}, issn = {1558-2256}, month = {sep}, number = {9}, pages = {1423--1434}, volume = {109}, abstract = {Universal access to and provisioning of multimedia content is now a reality. It is easy to generate, distribute, share, and consume any multimedia content, anywhere, anytime, or any device. Open media standards took a crucial role toward enabling all these use cases leading to a plethora of applications and services that have now become a commodity in our daily life. Interestingly, most of these services adopt a streaming paradigm, are typically deployed over the open, unmanaged Internet, and account for most of today’s Internet traffic. Currently, the global video traffic is greater than 60% of all Internet traffic [1], and it is expected that this share will grow to more than 80% in the near future [2]. In addition, Nielsen’s law of Internet bandwidth states that the users’ bandwidth grows by 50% per year, which roughly fits data from 1983 to 2019 [3]. Thus, the users’ bandwidth can be expected to reach approximately 1 Gb/s by 2022. At the same time, network applications will grow and utilize the bandwidth provided, just like programs and their data expand to fill the memory available in a computer system. Most of the available bandwidth today is consumed by video applications, and the amount of data is further increasing due to already established and emerging applications, e.g., ultrahigh definition, high dynamic range, or virtual, augmented, mixed realities, or immersive media applications in general.}, doi = {10.1109/jproc.2021.3098048}, keywords = {Special issues and sections, Video coding, Video compression, Media, Streaming media, Transform coding, Virtual reality, Mixed reality, Internet}, publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, url = {https://ieeexplore.ieee.org/document/9519598} } @InProceedings{Tashtarian2021, author = {Farzad Tashtarian and Abdelhak Bentaleb and Reza Farahani and Minh Nguyen and Christian Timmerer and Hellwagner, Hermann and Roger Zimmermann}, booktitle = {2021 IEEE 46th Conference on Local Computer Networks (LCN)}, title = {{A Distributed Delivery Architecture for User Generated Content Live Streaming over HTTP}}, year = {2021}, month = {oct}, pages = {162--169}, publisher = {IEEE}, abstract = {Live User Generated Content (UGC) has become very popular in today’s video streaming applications, in particular with gaming and e-sport. However, streaming UGC presents unique challenges for video delivery. When dealing with the technical complexity of managing hundreds or thousands of concurrent streams that are geographically distributed, UGC systems are forces to made difficult trade-offs with video quality and latency. To bridge this gap, this paper presents a fully distributed architecture for UGC delivery over the Internet, termed QuaLA (joint Quality-Latency Architecture). The proposed architecture aims to jointly optimize video quality and latency for a better user experience and fairness. By using the proximal Jacobi alternating direction method of multipliers (ProxJ-ADMM) technique, QuaLA proposes a fully distributed mechanism to achieve an appropriate solution. We demonstrate the effectiveness of the proposed architecture through real-world experiments using the CloudLAB testbed. Experimental results show the outperformance of QuaLA in achieving high quality with more than 57% improvement while preserving a good level of fairness and respecting a given target latency among all clients compared to conventional client-driven solutions.}, doi = {10.1109/lcn52139.2021.9525027}, keywords = {UGC streaming, low latency live streaming, fairness, QoE, HAS, DASH, ABR, adaptive streaming, ADMM}, url = {https://ieeexplore.ieee.org/document/9525027} } @InProceedings{Taraghi2021b, author = {Babak Taraghi}, booktitle = {Proceedings of the 29th {ACM} International Conference on Multimedia}, title = {{End-to-end Quality of Experience Evaluation for HTTP Adaptive Streaming}}, year = {2021}, month = {oct}, pages = {2936--2939}, publisher = {ACM}, abstract = {Exponential growth in multimedia streaming traffic over the Internet motivates the research and further investigation of the user's perceived quality of such services. Enhancement of experienced quality by the users becomes more substantial when service providers compete on establishing superiority by gaining more subscribers or customers. Quality of Experience (QoE) enhancement would not be possible without an authentic and accurate assessment of the streaming sessions. HTTP Adaptive Streaming (HAS) is today's prevailing technique to deliver the highest possible audio and video content quality to the users. An end-to-end evaluation of QoE in HAS covers the precise measurement of the metrics that affect the perceived quality, eg. startup delay, stall events, and delivered media quality. Mentioned metrics improvements could limit the service's scalability, which is an important factor in real-world scenarios. In this study, we will investigate the stated metrics, best practices and evaluations methods, and available techniques with an aim to (i) design and develop practical and scalable measurement tools and prototypes, (ii) provide a better understanding of current technologies and techniques (eg. Adaptive Bitrate algorithms), (iii) conduct in-depth research on the significant metrics in a way that improvements of QoE with scalability in mind would be feasible, and finally (iv) provide a comprehensive QoE model which outperforms state-of-the-art models.}, doi = {10.1145/3474085.3481025}, keywords = {HTTP Adaptive Streaming, Quality of Experience, Subjective Evaluation, Objective Evaluation, Adaptive Bitrate, QoE model}, url = {https://dl.acm.org/doi/10.1145/3474085.3481025} } @Article{Taraghi2021a, author = {Babak Taraghi and Minh Nguyen and Hadi Amirpour and Christian Timmerer}, journal = {IEEE Access}, title = {{Intense: In-Depth Studies on Stall Events and Quality Switches and Their Impact on the Quality of Experience in {HTTP} Adaptive Streaming}}, year = {2021}, issn = {2169-3536}, month = aug, pages = {118087--118098}, volume = {9}, abstract = {With the recent growth of multimedia traffic over the Internet and emerging multimedia streaming service providers, improving Quality of Experience (QoE) for HTTP Adaptive Streaming (HAS) becomes more important. Alongside other factors, such as the media quality, HAS relies on the performance of the media player’s Adaptive Bitrate (ABR) algorithm to optimize QoE in multimedia streaming sessions. QoE in HAS suffers from weak or unstable internet connections and suboptimal ABR decisions. As a result of imperfect adaptiveness to the characteristics and conditions of the internet connection, stall events and quality level switches could occur and with different durations that negatively affect the QoE. In this paper, we address various identified open issues related to the QoE for HAS, notably (i) the minimum noticeable duration for stall events in HAS; (ii) the correlation between the media quality and the impact of stall events on QoE; (iii) the end-user preference regarding multiple shorter stall events versus a single longer stall event; and (iv) the end-user preference of media quality switches over stall events. Therefore, we have studied these open issues from both objective and subjective evaluation perspectives and presented the correlation between the two types of evaluations. The findings documented in this paper can be used as a baseline for improving ABR algorithms and policies in HAS.}, doi = {10.1109/access.2021.3107619}, keywords = {Crowdsourcing, HTTP adaptive streaming, quality of experience, quality switches, stall events, subjective evaluation, objective evaluation}, publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, url = {https://ieeexplore.ieee.org/document/9521894} } @InProceedings{Taraghi2021, author = {Babak Taraghi and Abdelhak Bentaleb and Christian Timmerer and Roger Zimmermann and Hellwagner, Hermann}, booktitle = {Proceedings of the 31st ACM Workshop on Network and Operating Systems Support for Digital Audio and Video}, title = {{Understanding quality of experience of heuristic-based HTTP adaptive bitrate algorithms}}, year = {2021}, month = {jul}, pages = {82--89}, publisher = {ACM}, abstract = {Adaptive bitrate (ABR) algorithms play a crucial role in delivering the highest possible viewer's Quality of Experience (QoE) in HTTP Adaptive Streaming (HAS). Online video streaming service providers use HAS - the dominant video streaming technique on the Internet - to deliver the best QoE for their users. A viewer's delight relies heavily on how the ABR of a media player can adapt the stream's quality to the current network conditions. QoE for video streaming sessions has been assessed in many research projects to give better insight into the significant quality metrics such as startup delay and stall events. The ITU Telecommunication Standardization Sector (ITU-T) P.1203 quality evaluation model allows to algorithmically predict a subjective Mean Opinion Score (MOS) by considering various quality metrics. Subjective evaluation is the best assessment method for examining the end-user opinion over a video streaming session's experienced quality. We have conducted subjective evaluations with crowdsourced participants and evaluated the MOS of the sessions using the ITU-T P.1203 quality model. This paper's main contribution is to investigate the correspondence of subjective and objective evaluations for well-known heuristic-based ABRs.}, doi = {10.1145/3458306.3458875}, keywords = {HTTP Adaptive Streaming, ABR Algorithms, Quality of Experience, Crowdsourcing, Subjective Evaluation, Objective Evaluation, MOS}, url = {https://dl.acm.org/doi/10.1145/3458306.3458875} } @InProceedings{Steinkellner2021, author = {Philip Steinkellner and Klaus Schöffmann}, booktitle = {2021 International Conference on Content-Based Multimedia Indexing (CBMI)}, title = {{Evaluation of Object Detection Systems and Video Tracking in Skiing Videos}}, year = {2021}, month = {jun}, pages = {1--6}, publisher = {IEEE}, abstract = {Nowadays, modern ski resorts provide additional services to customers, such as recording videos of specific moments from their skiing experience. This and similar tasks can be achieved by using computer vision methods. In this work, we evaluate the detection performance of current object detection methods and the tracking performance of a detection-based tracking algorithm. The evaluation is based on videos of skiers and snowboarders from ski resorts. We collect videos of race tracks from different resorts and compile a public dataset of images and videos, where skiers and snowboarders are annotated with bounding boxes. Based on this data, we evaluate the performance of four state-of-the-art object detection methods. This evaluation is performed with general models trained on the MS COCO dataset as well as with custom models trained on our dataset. In addition, we review the performance of the detection-based, multi-object tracking algorithm Deep SORT, which we adapt for skier tracking.The results show promising performance and reveal that the MS COCO models already achieve high Precision, while training a custom model additionally improves the performance. Bigger models profit from custom training in terms of more accurate bounding box placement and higher Precision, while smaller models have an overall high training payoff. The modified Deep SORT tracker manages to follow a skier’s trajectory over an extended period and operates with high accuracy, which indicates that the tracker is overall well suited for tracking of skiers and snowboarders on race tracks. Even when exposed to strong camera and skier movement changes, the tracker stays latched onto the target.}, doi = {10.1109/cbmi50038.2021.9461905}, keywords = {Object Detection, Object Tracking, YOLOv4, Faster R-CNN, Deep SORT, Skiing, Sports Video Analysis}, url = {http://dx.doi.org/10.1109/cbmi50038.2021.9461905} } @Article{Sokolova2021, author = {Natalia Sokolova and Klaus Schoeffmann and Mario Taschwer and Stephanie Sarny and Doris Putzgruber-Adamitsch and Yosuf El-Shabrawi}, journal = {PLOS ONE}, title = {{Automatic detection of pupil reactions in cataract surgery videos}}, year = {2021}, issn = {1932-6203}, month = {oct}, number = {10}, pages = {e0258390}, volume = {16}, abstract = {In the light of an increased use of premium intraocular lenses (IOL), such as EDOF IOLs, multifocal IOLs or toric IOLs even minor intraoperative complications such as decentrations or an IOL tilt, will hamper the visual performance of these IOLs. Thus, the post-operative analysis of cataract surgeries to detect even minor intraoperative deviations that might explain a lack of a post-operative success becomes more and more important. Up-to-now surgical videos are evaluated by just looking at a very limited number of intraoperative data sets, or as done in studies evaluating the pupil changes that occur during surgeries, in a small number intraoperative picture only. A continuous measurement of pupil changes over the whole surgery, that would achieve clinically more relevant data, has not yet been described. Therefore, the automatic retrieval of such events may be a great support for a post-operative analysis. This would be especially true if large data files could be evaluated automatically. In this work, we automatically detect pupil reactions in cataract surgery videos. We employ a Mask R-CNN architecture as a segmentation algorithm to segment the pupil and iris with pixel-based accuracy and then track their sizes across the entire video. We can detect pupil reactions with a harmonic mean (H) of Recall, Precision, and Ground Truth Coverage Rate (GTCR) of 60.9% and average prediction length (PL) of 18.93 seconds. However, we consider the best configuration for practical use the one with the H value of 59.4% and PL of 10.2 seconds, which is much shorter. We further investigate the generalization ability of this method on a slightly different dataset without retraining the model. In this evaluation, we achieve the H value of 49.3% with the PL of 18.15 seconds.}, doi = {10.1371/journal.pone.0258390}, editor = {Andreas Wedrich}, publisher = {Public Library of Science (PLoS)}, url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0258390} } @InCollection{Shams2021, author = {Nakisa Shams and Hadi Amirpour and Christian Timmerer and Mohammad Ghanbari}, booktitle = {Proceedings of Sixth International Congress on Information and Communication Technology}, publisher = {Springer Singapore}, title = {{A Channel Allocation Algorithm for Cognitive Radio Users Based on Channel State Predictors}}, year = {2021}, month = {sep}, pages = {711--719}, volume = {235}, abstract = {Cognitive radio networks can efficiently manage the radio spectrum by utilizing the spectrum holes for secondary users in licensed frequency bands. The energy that is used to detect spectrum holes can be reduced considerably by predicting them. However, collisions can occur either between a primary user and secondary users or among the secondary users themselves. This paper introduces a centralized channel allocation algorithm (CCAA) in a scenario with multiple secondary users to control primary and secondary collisions. The proposed allocation algorithm, which uses a channel state predictor (CSP), provides good performance with fairness among the secondary users while they have minimal interference with the primary user. The simulation results show that the probability of a wrong prediction of an idle channel state in a multi-channel system is less than 0.9%. The channel state prediction saves the sensing energy by 73%, and the utilization of the spectrum can be improved by more than 77%.}, doi = {10.1007/978-981-16-2380-6_62}, keywords = {Cognitive radio, Neural networks, Prediction, Idle channel}, url = {https://link.springer.com/chapter/10.1007/978-981-16-2380-6_62} } @InProceedings{Schoeffmann2021, author = {Klaus Schoeffmann and Jakub Lokoc and Werner Bailer}, booktitle = {Proceedings of the 2nd ACM International Conference on Multimedia in Asia}, title = {{10 years of video browser showdown}}, year = {2021}, month = {mar}, pages = {1--3}, publisher = {ACM}, abstract = {The Video Browser Showdown (VBS) has influenced the Multimedia community already for 10 years now. More than 30 unique teams from over 21 countries participated in the VBS since 2012 already. In 2021, we are celebrating the 10th anniversary of VBS, where 17 international teams compete against each other in an unprecedented contest of fast and accurate multimedia retrieval. In this tutorial we discuss the motivation and details of the VBS contest, including its history, rules, evaluation metrics, and achievements for multimedia retrieval. We talk about the properties of specific VBS retrieval systems and their unique characteristics, as well as existing open-source tools that can be used as a starting point for participating for the first time. Participants of this tutorial get a detailed understanding of the VBS and its search systems, and see the latest developments of interactive video retrieval.}, doi = {10.1145/3444685.3450215}, url = {https://dl.acm.org/doi/10.1145/3444685.3450215} } @Article{Saurabh2021, author = {Nishant Saurabh and Carlos Rubia and Anandakumar Palanisamy and Spiros Koulouzis and Mirsat Sefidanoski and Antorweep Chakravorty and Zhiming Zhao and Aleksandar Karadimce and Radu Prodan}, journal = {Blockchain: Research and Applications}, title = {{The ARTICONF Approach to Decentralized Car-Sharing}}, year = {2021}, issn = {2096-7209}, month = {may}, pages = {1--37}, abstract = {Social media applications are essential for next generation connectivity. Today, social media are centralized platforms with a single proprietary organization controlling the network and posing critical trust and governance issues over the created and propagated content. The ARTICONF project [1] funded by the European Union’s Horizon 2020 program researches a decentralized social media platform based on a novel set of trustworthy, resilient and globally sustainable tools that address privacy, robustness and autonomy-related promises that proprietary social media platforms have failed to deliver so far. This paper presents the ARTICONF approach to a car-sharing decentralized application (DApp) use case, as a new collaborative peer-to-peer model providing an alternative solution to private car ownership. We describe a prototype implementation of the car-sharing social media DApp and illustrate through real snapshots how the different ARTICONF tools support it in a simulated scenario.}, doi = {10.1016/j.bcra.2021.100013}, keywords = {Social media, car-sharing, decentralization, blockchain}, publisher = {Elsevier BV}, url = {https://www.sciencedirect.com/science/article/pii/S2096720921000087?via=ihub} } @Article{Rossetto2021, author = {Luca Rossetto and Ralph Gasser and Jakub Lokoc and Werner Bailer and Klaus Schoeffmann and Bernd Muenzer and Tomas Soucek and Phuong Anh Nguyen and Paolo Bolettieri and Andreas Leibetseder and Stefanos Vrochidis}, journal = {IEEE Transactions on Multimedia}, title = {{Interactive Video Retrieval in the Age of Deep Learning - Detailed Evaluation of VBS 2019}}, year = {2021}, issn = {1941-0077}, month = mar, pages = {243--256}, volume = {23}, abstract = {Despite the fact that automatic content analysis has made remarkable progress over the last decade - mainly due to significant advances in machine learning - interactive video retrieval is still a very challenging problem, with an increasing relevance in practical applications. The Video Browser Showdown (VBS) is an annual evaluation competition that pushes the limits of interactive video retrieval with state-of-the-art tools, tasks, data, and evaluation metrics. In this paper, we analyse the results and outcome of the 8th iteration of the VBS in detail. We first give an overview of the novel and considerably larger V3C1 dataset and the tasks that were performed during VBS 2019. We then go on to describe the search systems of the six international teams in terms of features and performance. And finally, we perform an in-depth analysis of the per-team success ratio and relate this to the search strategies that were applied, the most popular features, and problems that were experienced. A large part of this analysis was conducted based on logs that were collected during the competition itself. This analysis gives further insights into the typical search behavior and differences between expert and novice users. Our evaluation shows that textual search and content browsing are the most important aspects in terms of logged user interactions. Furthermore, we observe a trend towards deep learning based features, especially in the form of labels generated by artificial neural networks. But nevertheless, for some tasks, very specific content-based search features are still being used. We expect these findings to contribute to future improvements of interactive video search systems.}, doi = {10.1109/tmm.2020.2980944}, keywords = {Interactive Video Retrieval, Video Browsing, Video Content Analysis, Content-based Retrieval, Evaluations}, publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, url = {https://ieeexplore.ieee.org/document/9037125} } @Article{Ross2021, author = {Tobias Ross and Annika Reinke and Peter M. Full and Martin Wagner and Hannes Kenngott and Martin Apitz and Hellena Hempe and Diana Mindroc-Filimon and Patrick Scholz and Thuy Nuong Tran and Pierangela Bruno and Pablo Arbeláez and Gui-Bin Bian and Sebastian Bodenstedt and Jon Lindström Bolmgren and Laura Bravo-Sánchez and Hua-Bin Chen and Cristina González and Dong Guo and Paal Halvorsen and Pheng-Ann Heng and Enes Hosgor and Zeng-Guang Hou and Fabian Isensee and Debesh Jha and Tingting Jiang and Yueming Jin and Kadir Kirtac and Sabrina Kletz and Stefan Leger and Zhixuan Li and Klaus H. Maier-Hein and Zhen-Liang Ni and Michael A. Riegler and Klaus Schoeffmann and Ruohua Shi and Stefanie Speidel and Michael Stenzel and Isabell Twick and Gutai Wang and Jiacheng Wang and Liansheng Wang and Lu Wang and Yujie Zhang and Yan-Jie Zhou and Lei Zhu and Manuel Wiesenfarth and Annette Kopp-Schneider and Beat P. Müller-Stich and Lena Maier-Hein}, journal = {Medical Image Analysis}, title = {{Comparative validation of multi-instance instrument segmentation in endoscopy: Results of the ROBUST-MIS 2019 challenge}}, year = {2021}, issn = {1361-8415}, month = {may}, number = {66}, pages = {1--62}, volume = {70}, abstract = {Intraoperative tracking of laparoscopic instruments is often a prerequisite for computer and robotic-assisted interventions. While numerous methods for detecting, segmenting and tracking of medical instruments based on endoscopic video images have been proposed in the literature, key limitations remain to be addressed: Firstly, robustness, that is, the reliable performance of state-of-the-art methods when run on challenging images (e.g. in the presence of blood, smoke or motion artifacts). Secondly, generalization; algorithms trained for a specific intervention in a specific hospital should generalize to other interventions or institutions. In an effort to promote solutions for these limitations, we organized the Robust Medical Instrument Segmentation (ROBUST-MIS) challenge as an international benchmarking competition with a specific focus on the robustness and generalization capabilities of algorithms. For the first time in the field of endoscopic image processing, our challenge included a task on binary segmentation and also addressed multi-instance detection and segmentation. The challenge was based on a surgical data set comprising 10,040 annotated images acquired from a total of 30 surgical procedures from three different types of surgery. The validation of the competing methods for the three tasks (binary segmentation, multi-instance detection and multi-instance segmentation) was performed in three different stages with an increasing domain gap between the training and the test data. The results confirm the initial hypothesis, namely that algorithm performance degrades with an increasing domain gap. While the average detection and segmentation quality of the best-performing algorithms is high, future research should concentrate on detection and segmentation of small, crossing, moving and transparent instrument(s) (parts).}, doi = {10.1016/j.media.2020.101920}, keywords = {Multi-instance instrument, minimally invasive surgery, robustness and generalization, surgical data science}, publisher = {Elsevier BV}, url = {https://www.sciencedirect.com/science/article/pii/S136184152030284X} } @InProceedings{Roman2021, author = {Dumitru Roman and Nikolay Nikolov and Ahmet Soylu and Brian Elvesaeter and Hui Song and Radu Prodan and Dragi Kimovski and Andrea Marrella and Francesco Leotta and Mihhail Matskin and Giannis Ledakis and Konstantinos Theodosiou and Anthony Simonet-Boulogne and Fernando Perales and Evgeny Kharlamov and Alexandre Ulisses and Arnor Solberg and Raffaele Ceccarelli}, booktitle = {2021 IEEE Symposium on Computers and Communications (ISCC)}, title = {{Big Data Pipelines on the Computing Continuum: Ecosystem and Use Cases Overview}}, year = {2021}, month = {sep}, pages = {1--4}, publisher = {IEEE}, abstract = {Organisations possess and continuously generate huge amounts of static and stream data, especially with the proliferation of Internet of Things technologies. Collected but unused data, i.e., Dark Data, mean loss in value creation potential. In this respect, the concept of Computing Continuum extends the traditional more centralised Cloud Computing paradigm with Fog and Edge Computing in order to ensure low latency pre-processing and filtering close to the data sources. However, there are still major challenges to be addressed, in particular related to management of various phases of Big Data processing on the Computing Continuum. In this paper, we set forth an ecosystem for Big Data pipelines in the Computing Continuum and introduce five relevant real-life example use cases in the context of the proposed ecosystem.}, doi = {10.1109/iscc53001.2021.9631410}, keywords = {Big Data, Computing Continuum, Dark Data, Data Pipelines, Cloud-Fog-Edge Computing}, url = {https://ieeexplore.ieee.org/document/9631410} } @Article{Ristov2021, author = {Sasko Ristov and Thomas Fahringer and Radu Prodan and Magdalena Kostoska and Marjan Gusev and Schahram Dustdar}, journal = {IEEE Internet Computing}, title = {{Inter-host Orchestration Platform Architecture for Ultra-scale Cloud Applications}}, year = {2021}, issn = {1941-0131}, pages = {1--1}, abstract = {Cloud data centers exploit many memory page management techniques that reduce the total memory utilization and access time. Mainly these techniques are applied to a hypervisor in a single host (intra-hypervisor) without the possibility to exploit the knowledge obtained by a group of hosts (clusters). We introduce a novel inter-hypervisor orchestration platform to provide intelligent memory page management for horizontal scaling. It will use the performance behavior of faster virtual machines to activate pre-fetching mechanisms that reduce the number of page faults. The overall platform consists of five modules - profiler, collector, classifier, predictor, and pre-fetcher. We developed and deployed a prototype of the platform, which comprises the first three modules. The evaluation shows that data collection is feasible in real-time, which means that if our approach is used on top of the existing memory page management techniques, it can significantly lower the miss rate that initiates page faults.}, doi = {10.1109/mic.2020.3034293}, keywords = {Cloud infrastructure, memory access management, page faults, pre-fetching, virtualiziation, XEN}, publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, url = {https://ieeexplore.ieee.org/document/9245504} } @Article{Rinner2021, author = {Bernhard Rinner and Christian Bettstetter and Hellwagner, Hermann and Stephan Weiss}, journal = {Computer}, title = {{Multidrone Systems: More Than the Sum of the Parts}}, year = {2021}, issn = {0018-9162}, month = {may}, number = {5}, pages = {34--43}, volume = {54}, abstract = {Now that drones have evolved from bulky platforms to agile devices, a challenge is to combine multiple drones into an integrated autonomous system, offering functionality that individual drones cannot achieve. Such multidrone systems require connectivity, communication, and coordination. We discuss these building blocks along with case studies and lessons learned.}, doi = {10.1109/mc.2021.3058441}, keywords = {Autonomous systems, Drones}, publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, url = {https://ieeexplore.ieee.org/document/9427128} } @InCollection{Ressmann2021, author = {Anja Ressmann and Klaus Schoeffmann}, booktitle = {MultiMedia Modeling}, publisher = {Springer International Publishing}, title = {{IVOS - The ITEC Interactive Video Object Search System at VBS 2021}}, year = {2021}, month = jan, number = {12573}, pages = {479--483}, abstract = {We present IVOS, an interactive video content search system that allows for object-based search and filtering in video archives. The main idea behind is to use the result of recent object detection models to index all keyframes with a manageable set of object classes, and allow the user to filter by different characteristics, such as object name, object location, relative object size, object color, and combinations for different object classes – e.g., “large person in white on the left, with a red tie”. In addition to that, IVOS can also find segments with a specific number of objects of a particular class (e.g., “many apples” or “two people”) and supports similarity search, based on similar object occurrences.}, doi = {10.1007/978-3-030-67835-7_48}, keywords = {Content-based video retrieval, Interactive video search, Object detection, Deep learning}, url = {https://link.springer.com/chapter/10.1007/978-3-030-67835-7_48} } @InCollection{Prodan2021, author = {Shajulin Benedict and Prateek Agrawal and Radu Prodan}, booktitle = {Communications in Computer and Information Science}, publisher = {Springer Singapore}, title = {{Energy Consumption Analysis of R-Based Machine Learning Algorithms for Pandemic Predictions}}, year = {2021}, month = jun, pages = {192--204}, volume = {1393}, abstract = {The push for agile pandemic analytic solutions has attained development-stage software modules of applications instead of functioning as full-fledged production-stage applications – i.e., performance, scalability, and energy-related concerns are not optimized for the underlying computing domains. And while the research continues to support the idea that reducing the energy consumption of algorithms improves the lifetime of battery-operated machines, advisable tools in almost any developer setting, an energy analysis report for R-based analytic programs is indeed a valuable suggestion. This article proposes an energy analysis framework for R-programs that enables data analytic developers, including pandemic-related application developers, to analyze the programs. It reveals an energy analysis report for R programs written to predict the new cases of 215 countries using random forest variants. Experiments were carried out at the IoT cloud research lab and the energy efficiency aspects were discussed in the article. In the experiments, ranger-based prediction program consumed 95.8 J.}, doi = {10.1007/978-981-16-3660-8_18}, keywords = {Analysis, Energy consumption, Machine learning, R-program, Tools}, url = {https://link.springer.com/chapter/10.1007/978-981-16-3660-8_18} } @InProceedings{Pasandi2021a, author = {Hannaneh Barahouei Pasandi and Tamer Nadeem and Hadi Amirpour and Christian Timmerer}, booktitle = {Proceedings of the 27th Annual International Conference on Mobile Computing and Networking}, title = {{A cross-layer approach for supporting real-time multi-user video streaming over WLANs*}}, year = {2021}, month = {oct}, pages = {849--851}, publisher = {ACM}, abstract = {MU-MIMO is a high-speed technique in IEEE 802.11ac and upcoming 802.11ax technologies that improves spectral efficiency by allowing concurrent communication between one Access Point and multiple users. In this paper, we present MuVIS, a novel framework that proposes MU-MIMO-aware optimization for multi-user multimedia applications over IEEE 802.11ac/ax. Taking a cross-layer approach, MuVIS first optimizes the MU-MIMO user group selection for the users with the same characteristics in the PHY/MAC layer. It then optimizes the video bitrate for each group accordingly. We present our design and its evaluation on smartphones and laptops over 802.11ac WiFi.}, doi = {10.1145/3447993.3482868}, url = {https://dl.acm.org/doi/abs/10.1145/3447993.3482868} } @InProceedings{Pasandi2021, author = {Hannaneh Barahouei Pasandi and Hadi Amirpour and Tamer Nadeem and Christian Timmerer}, booktitle = {Proceedings of the Workshop on Design, Deployment, and Evaluation of Network-assisted Video Streaming}, title = {{Learning-driven MU-MIMO Grouping for Multi-User Multimedia Applications Over Commodity WiFi}}, year = {2021}, month = {dec}, pages = {15--21}, publisher = {ACM}, abstract = {MU-MIMO is a high-speed technique in IEEE 802.11ac and upcoming ax technologies that improves spectral efficiency by allowing concurrent communication between one Access Point and multiple users. In this paper, we present LATTE, a novel framework that proposes MU-MIMO-aware optimization for multi-user multimedia applications over IEEE 802.11ac/ax. Taking a cross-layer approach, LATTE first optimizes the MU-MIMO user group selection for the users with the same characteristics in the PHY/MAC layer. It then optimizes the video bitrate for each group accordingly. We present our design and its evaluation on smartphones and laptops over 802.11ac WiFi. Our experimental evaluations indicate that LATTE can outperform other video rate adaptation algorithms.}, doi = {10.1145/3488662.3493828}, url = {https://dl.acm.org/doi/10.1145/3488662.3493828} } @InProceedings{Nguyen2021, author = {Minh Nguyen}, booktitle = {Proceedings of the 12th ACM Multimedia Systems Conference}, title = {{Policy-driven Dynamic HTTP Adaptive Streaming Player Environment}}, year = {2021}, month = {jun}, pages = {408--412}, publisher = {ACM}, abstract = {Video streaming services account for the majority of today's traffic on the Internet. Although the data transmission rate has been increasing significantly, the growing number and variety of media and higher quality expectations of users have led networked media applications to fully or even over-utilize the available throughput. HTTP Adaptive Streaming (HAS) has become a predominant technique for multimedia delivery over the Internet today. However, there are critical challenges for multimedia systems, especially the tradeoff between the increasing content (complexity) and various requirements regarding time (latency) and quality (QoE). This thesis will cover the main aspects within the end user's environment, including video consumption and interactivity, collectively referred to as player environment, which is probably the most crucial component in today's multimedia applications and services. We will investigate the methods that can enable the specification of various policies reflecting the user's needs in given use cases. Besides, we will also work on schemes that allow efficient support for server-assisted, and network-assisted HAS systems. Finally, those approaches will be considered to combine into policies that fit the requirements of all use cases (e.g., live streaming, video on demand, etc.).}, doi = {10.1145/3458305.3478466}, url = {https://dl.acm.org/doi/10.1145/3458305.3478466} } @InProceedings{Najafabadi2021, author = {Zahra Najafabadi Samani and Nishant Saurabh and Radu Prodan}, booktitle = {2021 IEEE 5th International Conference on Fog and Edge Computing (ICFEC)}, title = {{Multilayer Resource-aware Partitioning for Fog Application Placement}}, year = {2021}, month = {may}, pages = {9--18}, publisher = {IEEE}, abstract = {Fog computing emerged as a crucial platform for the deployment of IoT applications. The complexity of such applications require methods that handle the resource diversity and network structure of Fog devices, while maximizing the service placement and reducing the resource wastage. Prior studies in this domain primarily focused on optimizing application-specific requirements and fail to address the network topology combined with the different types of resources encountered in Fog devices. To overcome these problems, we propose a multilayer resource-aware partitioning method to minimize the resource wastage and maximize the service placement and deadline satisfaction rates in a Fog infrastructure with high multi-user application placement requests. Our method represents the heterogeneous Fog resources as a multilayered network graph and partitions them based on network topology and resource features. Afterwards, it identifies the appropriate device partitions for placing an application according to its requirements, which need to overlap in the same network topology partition. Simulation results show that our multilayer resource-aware partitioning method is able to place twice as many services, satisfy deadlines for three times as many application requests, and reduce the resource wastage by up to 15–32 times compared to two availability-aware and resource-aware state-of-the-art methods.}, doi = {10.1109/icfec51620.2021.00010}, keywords = {Fog computing, application placement, resource partitioning, resource wastage, deadline satisfaction}, url = {https://ieeexplore.ieee.org/document/9458908} } @Article{Moll2021, author = {Philipp Moll and Selina Isak and Hellwagner, Hermann and Jeff Burke}, journal = {Computer Networks}, title = {{A Quadtree-based synchronization protocol for inter-server game state synchronization}}, year = {2021}, issn = {1389-1286}, month = {feb}, pages = {107723}, volume = {185}, abstract = {Online games are a fundamental part of the entertainment industry but the current IP infrastructure does not satisfactorily fulfill the needs of these services. The novel networking architecture Named Data Networking (NDN) inherently supports network-level multicast and packet-level security and thereby introduces promising features for online games. In this paper, we propose an NDN-based approach to synchronize game state in a server cluster, a task necessary to allow multiple players in large numbers to play in the same game world. The proposed Quadtree Synchronization Protocol applies NDN’s data-centric nature to decouple the game world from the game servers hosting it. This means that requesting changes of a specific game world region becomes possible without knowing which game server is responsible for the requested region. We use a hierarchic game world structure when requesting data that allows the network to forward requests to the responsible game server without directly addressing it. This region-based naming scheme decouples world regions from servers which eases the management of the game server cluster and allows easier recovery after server failures. In addition, this decoupling allows exchanging information about a geographical region, such as a game world, without knowledge of the other participants changing the world. Such a region-based synchronization mode is not possible to implement with existing protocols. However, it allows building distributed systems that do not require a central server to work. Besides architectural benefits, network emulations show that our protocol increases the efficiency of data transport by utilizing network-level multicast. Our proposed approach can keep up with current protocols which can be used for inter-server game state synchronization.}, doi = {10.1016/j.comnet.2020.107723}, keywords = {Named Data Networking, Distributed dataset synchronization, Online games}, publisher = {Elsevier {BV}}, url = {https://www.sciencedirect.com/science/article/pii/S1389128620313177} } @InProceedings{Menon2021a, author = {Vignesh V Menon and Hadi Amirpour and Mohammad Ghanbari and Christian Timmerer}, booktitle = {2021 IEEE International Conference on Image Processing (ICIP)}, title = {{Efficient Content-Adaptive Feature-Based Shot Detection for HTTP Adaptive Streaming}}, year = {2021}, month = {sep}, pages = {2174--2178}, publisher = {IEEE}, abstract = {Video delivery over the Internet has been becoming a commodity in recent years, owing to the widespread use of Dynamic Adaptive Streaming over HTTP (DASH). The DASH specification defines a hierarchical data model for Media Presentation Descriptions (MPDs) in terms of segments. This paper focuses on segmenting video into multiple shots for encoding in Video on Demand (VoD) HTTP Adaptive Streaming (HAS) applications. Therefore, we propose a novel Discrete Cosine Transform (DCT) feature-based shot detection and successive elimination algorithm for shot detection and compare it against the default shot detection algorithm of the x265 implementation of the High Efficiency Video Coding (HEVC) standard. Our experimental results demonstrate that our proposed feature-based pre-processor has a recall rate of 25% and an F-measure of 20% greater than the benchmark algorithm for shot detection.}, doi = {10.1109/icip42928.2021.9506092}, keywords = {HTTP Adaptive Streaming, Video-on-Demand, Shot detection, multi-shot encoding}, url = {https://ieeexplore.ieee.org/document/9506092} } @InProceedings{Menon2021, author = {Vignesh Menon and Hadi Amirpourazarian and Christian Timmerer and Mohammad Ghanbari}, booktitle = {2021 Picture Coding Symposium (PCS)}, title = {{Efficient Multi-Encoding Algorithms for HTTP Adaptive Bitrate Streaming}}, year = {2021}, month = jun, pages = {1--5}, publisher = {IEEE}, abstract = {Since video accounts for the majority of today’s internet traffic, the popularity of HTTP Adaptive Streaming (HAS) is increasing steadily. In HAS, each video is encoded at multiple bitrates and spatial resolutions (i.e., representations) to adapt to a heterogeneity of network conditions, device characteristics, and end-user preferences. Most of the streaming services utilize cloud-based encoding techniques which enable a fully parallel encoding process to speed up the encoding and consequently to reduce the overall time complexity. State-of-the-art approaches further improve the encoding process by utilizing encoder analysis information from already encoded representation(s) to improve the encoding time complexity of the remaining representations. In this paper, we investigate various multi-encoding algorithms (i.e., multi-rate and multi-resolution) and propose novel multi- encoding algorithms for large-scale HTTP Adaptive Streaming deployments. Experimental results demonstrate that the proposed multi-encoding algorithm optimized for the highest compression efficiency reduces the overall encoding time by 39% with a 1.5% bitrate increase compared to stand-alone encodings. Its optimized version for the highest time savings reduces the overall encoding time by 50% with a 2.6% bitrate increase compared to stand-alone encodings.}, doi = {10.1109/pcs50896.2021.9477499}, keywords = {HTTP Adaptive Streaming, HEVC, Multi-rate Encoding, Multi-encoding}, url = {https://ieeexplore.ieee.org/document/9477499} } @InProceedings{Mehran2021, author = {Narges Mehran and Dragi Kimovski and Radu Prodan}, booktitle = {2021 IEEE/ACM 21st International Symposium on Cluster, Cloud and Internet Computing (CCGrid)}, title = {{A Two-Sided Matching Model for Data Stream Processing in the Cloud textendash Fog Continuum}}, year = {2021}, month = {may}, pages = {514--524}, publisher = {IEEE}, abstract = {Latency-sensitive and bandwidth-intensive stream processing applications are dominant traffic generators over the Internet network. A stream consists of a continuous sequence of data elements, which require processing in nearly real-time. To improve communication latency and reduce the network congestion, Fog computing complements the Cloud services by moving the computation towards the edge of the network. Unfortunately, the heterogeneity of the new Cloud – Fog continuum raises important challenges related to deploying and executing data stream applications. We explore in this work a two-sided stable matching model called Cloud – Fog to data stream application matching (CODA) for deploying a distributed application rep-resented as a workflow of stream processing microservices on heterogeneous computing continuum resources. In CODA, the application microservices rank the continuum resources based on their microservice stream processing time, while resources rank the stream processing microservices based on their residual bandwidth. A stable many-to-one matching algorithm assigns microservices to resources based on their mutual preferences, aiming to optimize the complete stream processing time on the application side, and the total streaming traffic on the resource side. We evaluate the CODA algorithm using simulated and real-world Cloud – Fog experimental scenarios. We achieved 11-45% lower stream processing time and 1.3-20% lower streaming traffic compared to related state-of-the-art approaches.}, doi = {10.1109/ccgrid51090.2021.00061}, keywords = {Cloud - fog computing, Distributed databases, Bandwidth, Games, Streaming media, Data models, Real-time systems}, url = {https://ieeexplore.ieee.org/document/9499353} } @InProceedings{Matha2021, author = {Roland Matha and Dragi Kimovski and Anatoliy Zabrovskiy and Christian Timmerer and Radu Prodan}, booktitle = {2021 IEEE 17th International Conference on eScience (eScience)}, title = {{Where to Encode: A Performance Analysis of x86 and Arm-based Amazon EC2 Instances}}, year = {2021}, month = {sep}, pages = {118--127}, publisher = {IEEE}, abstract = {Video streaming became an undivided part of the Internet. To efficiently utilise the limited network bandwidth it is essential to encode the video content. However, encoding is a computationally intensive task, involving high-performance resources provided by private infrastructures or public clouds. Public clouds, such as Amazon EC2, provide a large portfolio of services and instances optimized for specific purposes and budgets. The majority of Amazon’s instances use x86 processors, such as Intel Xeon or AMD EPYC. However, following the recent trends in computer architecture, Amazon introduced Arm based instances that promise up to 40% better cost performance ratio than comparable x86 instances for specific workloads. We evaluate in this paper the video encoding performance of x86 and Arm instances of four instance families using the latest FFmpeg version and two video codecs. We examine the impact of the encoding parameters, such as different presets and bitrates, on the time and cost for encoding. Our experiments reveal that Arm instances show high time and cost saving potential of up to 33.63% for specific bitrates and presets, especially for the x264 codec. However, the x86 instances are more general and achieve low encoding times, regardless of the codec.}, doi = {10.1109/escience51609.2021.00022}, keywords = {Amazon EC2, Arm instances, AVC, Cloud computing, FFmpeg, Graviton2, HEVC, Performance analysis, Video encoding}, url = {https://www.computer.org/csdl/proceedings-article/escience/2021/036100a118/1y14GC0fb6o} } @Article{Madaan2021, author = {Vishu Madaan and Aditya Roy and Charu Gupta and Prateek Agrawal and Anand Sharma and Cristian Bologa and Radu Prodan}, journal = {New Generation Computing}, title = {{XCOVNet: Chest X-ray Image Classification for COVID-19 Early Detection Using Convolutional Neural Networks}}, year = {2021}, issn = {1882-7055}, month = {feb}, pages = {1--15}, abstract = {COVID-19 (also known as SARS-COV-2) pandemic has spread in the entire world. It is a contagious disease that easily spreads from one person in direct contact to another, classified by experts in five categories: asymptomatic, mild, moderate, severe, and critical. Already more than 66 million people got infected worldwide with more than 22 million active patients as of 5 December 2020 and the rate is accelerating. More than 1.5 million patients (approximately 2.5% of total reported cases) across the world lost their life. In many places, the COVID-19 detection takes place through reverse transcription polymerase chain reaction (RT-PCR) tests which may take longer than 48 h. This is one major reason of its severity and rapid spread. We propose in this paper a two-phase X-ray image classification called XCOVNet for early COVID-19 detection using convolutional neural Networks model. XCOVNet detects COVID-19 infections in chest X-ray patient images in two phases. The first phase pre-processes a dataset of 392 chest X-ray images of which half are COVID-19 positive and half are negative. The second phase trains and tunes the neural network model to achieve a 98.44% accuracy in patient classification.}, doi = {10.1007/s00354-021-00121-7}, keywords = {Coronavirus, SARS-COV-2, COVID-19 disease diagnosis, Machine learning, Image classification}, publisher = {Springer Science and Business Media LLC}, url = {https://link.springer.com/article/10.1007/s00354-021-00121-7} } @InProceedings{Lv2021, author = {Zezhong Lv and Qing Xu and Klaus Schoeffmann and Simon Parkinson}, booktitle = {2021 IEEE International Conference on Multimedia and Expo (ICME)}, title = {{A Jensen-Shannon Divergence Driven Metric of Visual Scanning Efficiency Indicates Performance of Virtual Driving}}, year = {2021}, month = {jul}, pages = {1--6}, publisher = {IEEE}, abstract = {Visual scanning plays an important role in sampling visual information from the surrounding environments for a lot of everyday sensorimotor tasks, such as driving. In this paper, we consider the problem of visual scanning mechanism underpinning sensorimotor tasks in 3D dynamic environments. We exploit the use of eye tracking data as a behaviometric, for indicating the visuo-motor behavioral measure in the context of virtual driving. A new metric of visual scanning efficiency (VSE), which is defined as a mathematical divergence between a fixation distribution and a distribution of optical flows induced by fixations, is proposed by making use of a widely-known information theoretic tool, namely the square root of Jensen-Shannon divergence. Psychophysical eye tracking studies, in virtual reality based driving, are conducted to reveal that the new metric of visual scanning efficiency can be employed very well as a proxy evaluation for driving performance. These results suggest that the exploitation of eye tracking data provides an effective behaviometric for sensorimotor activities.}, doi = {10.1109/icme51207.2021.9428109}, keywords = {visual scanning efficiency, eye tracking, Jensen-Shannon divergence (JSD), behaviometric}, url = {https://ieeexplore.ieee.org/document/9428109} } @InCollection{Lux2021, author = {Ines Krajger and Mathias Lux and Erich J. Schwarz}, booktitle = {Educating Engineers for Future Industrial Revolutions}, publisher = {Springer International Publishing}, title = {{Digitalization of an Educational Business Model Game}}, year = {2021}, month = mar, pages = {241--252}, volume = {1329}, abstract = {Entrepreneurship Education is an important field of entrepreneurship research and has become a part of many programs of business and engineering schools. Educational games are a powerful tool to create a motivation learning environment. With the goal of investigating digitalization of business games, which are typically played inlarge groups and face to face, we particularly focus on the use case of thebusiness model game called “inspire! build your business”.}, doi = {10.1007/978-3-030-68201-9_25}, keywords = {Entrepreneurship Education, Digitalization, Gamification} } @InProceedings{Lorenzi2021, author = {Daniele Lorenzi and Minh Nguyen and Farzad Tashtarian and Simone Milani and Hellwagner, Hermann and Christian Timmerer}, booktitle = {Proceedings of the 2021 Workshop on Evolution, Performance and Interoperability of QUIC}, title = {{Days of future past}}, year = {2021}, month = {dec}, pages = {8--14}, publisher = {ACM}, abstract = {HTTP Adaptive Streaming (HAS) has become a predominant technique for delivering videos in the Internet. Due to its adaptive behavior according to changing network conditions, it may result in video quality variations that negatively impact the Quality of Experience (QoE) of the user. In this paper, we propose Days of Future Past, an optimization-based Adaptive Bitrate (ABR) algorithm over HTTP/3. Days of Future Past takes advantage of an optimization model and HTTP/3 features, including (i) stream multiplexing and (ii) request cancellation. We design a Mixed Integer Linear Programming (MILP) model that determines the optimal video qualities of both the next segment to be requested and the segments currently located in the buffer. If better qualities for buffered segments are found, the client will send corresponding HTTP GET requests to retrieve them. Multiple segments (i.e., retransmitted segments) might be downloaded simultaneously to upgrade some buffered but not yet played segments to avoid quality decreases using the stream multiplexing feature of QUIC. HTTP/3's request cancellation will be used in case retransmitted segments will arrive at the client after their playout time. The experimental results shows that our proposed method is able to improve the QoE by up to 33.9%.}, doi = {10.1145/3488660.3493802}, keywords = {HTTP/3, QUIC, Days of Future Past, HAS, QoE}, url = {https://dl.acm.org/doi/10.1145/3488660.3493802} } @Article{Lokoc2021, author = {Jakub Lokoc and Patrik Vesely and Frantisek Mejzlik and Gregor Kovalcik and Tomas Soucek and Luca Rossetto and Klaus Schoeffmann and Werner Bailer and Cathal Gurrin and Loris Sauter and Jaeyub Song and Stefanos Vrochidis and Jiaxin Wu and Björn Thor Jonsson}, journal = {ACM Transactions on Multimedia Computing, Communications, and Applications}, title = {{Is the Reign of Interactive Search Eternal? Findings from the Video Browser Showdown 2020}}, year = {2021}, issn = {1551-6865}, month = {jul}, number = {3}, pages = {1--26}, volume = {17}, abstract = {Comprehensive and fair performance evaluation of information retrieval systems represents an essential task for the current information age. Whereas Cranfield-based evaluations with benchmark datasets support development of retrieval models, significant evaluation efforts are required also for user-oriented systems that try to boost performance with an interactive search approach. This article presents findings from the 9th Video Browser Showdown, a competition that focuses on a legitimate comparison of interactive search systems designed for challenging known-item search tasks over a large video collection. During previous installments of the competition, the interactive nature of participating systems was a key feature to satisfy known-item search needs, and this article continues to support this hypothesis. Despite the fact that top-performing systems integrate the most recent deep learning models into their retrieval process, interactive searching remains a necessary component of successful strategies for known-item search tasks. Alongside the description of competition settings, evaluated tasks, participating teams, and overall results, this article presents a detailed analysis of query logs collected by the top three performing systems, SOMHunter, VIRET, and vitrivr. The analysis provides a quantitative insight to the observed performance of the systems and constitutes a new baseline methodology for future events. The results reveal that the top two systems mostly relied on temporal queries before a correct frame was identified. An interaction log analysis complements the result log findings and points to the importance of result set and video browsing approaches. Finally, various outlooks are discussed in order to improve the Video Browser Showdown challenge in the future.}, doi = {10.1145/3445031}, keywords = {Interactive video retrieval, deep learning, interactive search evaluation}, publisher = {Association for Computing Machinery (ACM)}, url = {https://dl.acm.org/doi/10.1145/3445031} } @InProceedings{Leibetseder2021b, author = {Andreas Leibetseder and Klaus Schoeffmann}, booktitle = {Proceedings of the 4th Annual on Lifelog Search Challenge}, title = {{lifeXplore at the Lifelog Search Challenge 2021}}, year = {2021}, month = {aug}, pages = {23--28}, publisher = {ACM}, abstract = {Since its first iteration in 2018, the Lifelog Search Challenge (LSC) continues to rise in popularity as an interactive lifelog data retrieval competition, co-located at the ACM International Conference on Multimedia Retrieval (ICMR). The goal of this annual live event is to search a large corpus of lifelogging data for specifically announced memories using a purposefully developed tool within a limited amount of time. As long-standing participants, we present our improved lifeXplore -- a retrieval system combining chronologic day summary browsing with interactive combinable concept filtering. Compared to previous versions, the tool is improved by incorporating temporal queries, advanced day summary features as well as usability improvements.}, doi = {10.1145/3463948.3469060}, keywords = {lifelogging, evaluation campaign, interactive image retrieval, image search}, url = {https://dl.acm.org/doi/10.1145/3463948.3469060} } @InCollection{Leibetseder2021a, author = {Andreas Leibetseder and Klaus Schoeffmann}, booktitle = {MultiMedia Modeling}, publisher = {Springer International Publishing}, title = {{Less is More - diveXplore 5.0 at VBS 2021}}, year = {2021}, month = jan, number = {12573}, pages = {455--460}, abstract = {As a longstanding participating system in the annual Video Browser Showdown (VBS2017-VBS2020) as well as in two iterations of the more recently established Lifelog Search Challenge (LSC2018-LSC2019), diveXplore is developed as a feature-rich Deep Interactive Video Exploration system. After its initial successful employment as a competitive tool at the challenges, its performance, however, declined as new features were introduced increasing its overall complexity. We mainly attribute this to the fact that many additions to the system needed to revolve around the system’s core element – an interactive self-organizing browseable featuremap, which, as an integral component did not accommodate the addition of new features well. Therefore, counteracting said performance decline, the VBS 2021 version constitutes a completely rebuilt version 5.0, implemented from scratch with the aim of greatly reducing the system’s complexity as well as keeping proven useful features in a modular manner.}, doi = {10.1007/978-3-030-67835-7_44}, keywords = {Video retrieval, Interactive video search, Video analysis}, url = {https://link.springer.com/chapter/10.1007/978-3-030-67835-7_44} } @InProceedings{Leibetseder2021, author = {Andreas Leibetseder and Klaus Schoeffmann and Joerg Keckstein and Simon Keckstein}, booktitle = {2021 International Conference on Content-Based Multimedia Indexing (CBMI)}, title = {{Post-surgical Endometriosis Segmentation in Laparoscopic Videos}}, year = {2021}, month = {jun}, pages = {1--4}, publisher = {IEEE}, abstract = {Endometriosis is a common women's condition exhibiting a manifold visual appearance in various body-internal locations. Having such properties makes its identification very difficult and error-prone, at least for laymen and non-specialized medical practitioners. In an attempt to provide assistance to gynecologic physicians treating endometriosis, this demo paper describes a system that is trained to segment one frequently occurring visual appearance of endometriosis, namely dark endometrial implants. The system is capable of analyzing laparoscopic surgery videos, annotating identified implant regions with multi-colored overlays and displaying a detection summary for improved video browsing.}, doi = {10.1109/cbmi50038.2021.9461900}, keywords = {Endometriosis, Lesion Segmentation, Mask R-CNN}, url = {http://dx.doi.org/10.1109/cbmi50038.2021.9461900} } @Article{Kimovski2021c, author = {Dragi Kimovski and Roland Matha and Josef Hammer and Narges Mehran and Hellwagner, Hermann and Radu Prodan}, journal = {IEEE Internet Computing}, title = {{Cloud, Fog, or Edge: Where to Compute?}}, year = {2021}, issn = {1941-0131}, month = {jul}, number = {4}, pages = {30--36}, volume = {25}, abstract = {The computing continuum extends the high-performance cloud data centers with energy-efficient and low-latency devices close to the data sources located at the edge of the network. However, the heterogeneity of the computing continuum raises multiple challenges related to application management. These include where to offload an application – from the cloud to the edge – to meet its computation and communication requirements. To support these decisions, we provide in this article a detailed performance and carbon footprint analysis of a selection of use case applications with complementary resource requirements across the computing continuum over a real-life evaluation testbed.}, doi = {10.1109/mic.2021.3050613}, keywords = {Edge computing, Cloud computing, Benchmarking, Carbon footprint}, publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, url = {https://ieeexplore.ieee.org/document/9321525} } @Article{Kimovski2021b, author = {Dragi Kimovski and Narges Mehran and Christopher Emanuel Kerth and Radu Prodan}, journal = {IEEE Transactions on Services Computing}, title = {{Mobility-Aware IoT Applications Placement in the Cloud Edge Continuum}}, year = {2021}, issn = {2372-0204}, month = jul, pages = {1--14}, abstract = {The Edge computing extension of the Cloud services towards the network boundaries raises important placement challenges for IoT applications running in a heterogeneous environment with limited computing capacities. Unfortunately, existing works only partially address this challenge by optimizing a single or aggregate objective (e.g., response time) and not considering the edge devices' mobility and resource constraints. To address this gap, we propose a novel mobility-aware multi-objective IoT application placement (mMAPO) method in the Cloud -- Edge Continuum that optimizes completion time, energy consumption, and economic cost as conflicting objectives. mMAPO utilizes a Markov model for predictive analysis of the Edge device mobility and constrains the optimization to devices that do not frequently move through the network. We evaluate the quality of the mMAPO placements using simulation and real-world experimentation on two IoT applications. Compared to related work, mMAPO reduces the economic cost by 28% and decreases the completion time by 80% while maintaining a stable energy consumption.}, doi = {10.1109/tsc.2021.3094322}, keywords = {Cloud-Edge Continuum, mobility, application placement, multi-objective optimization, energy consumption, cost}, publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, url = {https://ieeexplore.ieee.org/document/9473013} } @Article{Kimovski2021a, author = {Dragi Kimovski and Roland Matha and Gabriel Iuhasz and Fabrizio Marozzo and Dana Petcu and Radu Prodan}, journal = {Frontiers in Big Data}, title = {{Autotuning of Exascale Applications With Anomalies Detection}}, year = {2021}, issn = {2624-909X}, month = {nov}, pages = {1--14}, volume = {4}, abstract = {The execution of complex distributed applications in exascale systems faces many challenges, as it involves empirical evaluation of countless code variations and application runtime parameters over a heterogeneous set of resources. To mitigate these challenges, the research field of autotuning has gained momentum. The autotuning automates identifying the most desirable application implementation in terms of code variations and runtime parameters. However, the complexity and size of the exascale systems make the autotuning process very difficult, especially considering the number of parameter variations that have to be identified. Therefore, we introduce a novel approach for autotuning exascale applications based on a genetic multi-objective optimization algorithm integrated within the ASPIDE exascale computing framework. The approach considers multi-dimensional search space with support for pluggable objective functions, including execution time and energy requirements. Furthermore, the autotuner employs a machine learning-based event detection approach to detect events and anomalies during application execution, such as hardware failures or communication bottlenecks.}, doi = {10.3389/fdata.2021.657218}, keywords = {exascale computing, autotuning, events and anomalies detection, multi-objective optimization, IoT applications}, publisher = {Frontiers Media (SA)}, url = {https://www.frontiersin.org/articles/10.3389/fdata.2021.657218/full} } @Misc{Kimovski2021, author = {Dumitru, Roman and Nikolov, Nikolay and Elvesater, Brian and Soylu, Ahmet and Prodan, Radu and Kimovski, Dragi and Marrella, Andrea and Leotta, Francesco and Benvenuti, Dario and Matskin, Mihhail and Ledakis, Giannis and Simonet-Boulogne, Anthony and Perales, Fernando and Kharlamov, Evgeny and Ulisses, Alexandre and Solberg, Arnor and Ceccarelli, Raffaele}, howpublished = {RCIS '21 Proceedings of the 15th International Conference on Research Challenges in Information Science}, month = may, title = {{DataCloud: Enabling the Big Data Pipelines on the Computing Continuum}}, year = {2021}, abstract = {With the recent developments of Internet of Things (IoT) and cloud-based technologies, massive amounts of data are generated by heterogeneous sources and stored through dedicated cloud solutions. Often organizations generate much more data than they are able to interpret, and current Cloud Computing technologies cannot fully meet the requirements of the Big Data processing applications and their data transfer overheads. Many data are stored for compliance purposes only but not used and turned into value, thus becoming Dark Data, which are not only an untapped value, but also pose a risk for organizations. To guarantee a better exploitation of Dark Data, the DataCloud project aims to realize novel methods and tools for effective and efficient management of the Big Data Pipeline lifecycle encompassing the Computing Continuum. Big Data pipelines are composite pipelines for processing data with nontrivial properties, commonly referred to as the Vs of Big Data (e.g., volume, velocity, value, etc.). Tapping their potential is a key aspect to leverage Dark Data, although it requires to go beyond the current approaches and frameworks for Big Data processing. In this respect, the concept of Computing Continuum extends the traditional centralised Cloud Computing with Edge and Fog computing in order to ensure low latency pre-processing and filtering close to the data sources. This will prevent to overwhelm the centralised cloud data centres enabling new opportunities for supporting Big Data pipelines.}, doi = {http://dx.doi.org/10.1007/978-3-030-75018-3}, url = {https://link.springer.com/content/pdf/bbm:978-3-030-75018-3/1.pdf} } @Article{Khalid2021, author = {Yasir Noman Khalid and Muhammad Aleem and Usman Ahmed and Radu Prodan and Muhammad Arshad Islam and Muhammad Azhar Iqbal}, journal = {Computing}, title = {{FusionCL: a machine-learning based approach for OpenCL kernel fusion to increase system performance}}, year = {2021}, issn = {1436-5057}, month = jun, pages = {1--32}, abstract = {Employing general-purpose graphics processing units (GPGPU) with the help of OpenCL has resulted in greatly reducing the execution time of data-parallel applications by taking advantage of the massive available parallelism. However, when a small data size application is executed on GPU there is a wastage of GPU resources as the application cannot fully utilize GPU compute-cores. There is no mechanism to share a GPU between two kernels due to the lack of operating system support on GPU. In this paper, we propose the provision of a GPU sharing mechanism between two kernels that will lead to increasing GPU occupancy, and as a result, reduce execution time of a job pool. However, if a pair of the kernel is competing for the same set of resources (i.e., both applications are compute-intensive or memory-intensive), kernel fusion may also result in a significant increase in execution time of fused kernels. Therefore, it is pertinent to select an optimal pair of kernels for fusion that will result in significant speedup over their serial execution. This research presents FusionCL, a machine learning-based GPU sharing mechanism between a pair of OpenCL kernels. FusionCL identifies each pair of kernels (from the job pool), which are suitable candidates for fusion using a machine learning-based fusion suitability classifier. Thereafter, from all the candidates, it selects a pair of candidate kernels that will produce maximum speedup after fusion over their serial execution using a fusion speedup predictor. The experimental evaluation shows that the proposed kernel fusion mechanism reduces execution time by 2.83× when compared to a baseline scheduling scheme. When compared to state-of-the-art, the reduction in execution time is up to 8%.}, doi = {10.1007/s00607-021-00958-2}, keywords = {Scheduling, Kernel fusion, High-performance computing, Machine learning}, publisher = {Springer Science and Business Media LLC}, url = {https://link.springer.com/article/10.1007/s00607-021-00958-2} } @InProceedings{Kashansky2021b, author = {Vladislav Kashansky and Radu Prodan and Gleb Radchenko}, booktitle = {9th International Conference "Distributed Computing and Grid Technologies in Science and Education"}, title = {{Some aspects of the workflow scheduling in the computing continuum systems}}, year = {2021}, month = {dec}, pages = {106--110}, publisher = {Crossref}, abstract = {Contemporary computing systems are commonly characterized in terms of data-intensive workflows, that are managed by utilizing large number of heterogeneous computing and storage elements interconnected through complex communication topologies. As the scale of the system grows and workloads become more heterogeneous in both inner structure and the arrival patterns, scheduling problem becomes exponentially harder, requiring problem-specifc heuristics. Despite several decades of the active research on it, one issue that still requires effort is to enable efficient workflows scheduling in such complex environments, while preserving robustness of the results. Moreover, recent research trend coined under term "computing continuum" prescribes convergence of the multi-scale computational systems with complex spatio-temporal dynamics and diverse sets of the management policies. This paper contributes with the set of recommendations and brief analysis for the existing scheduling algorithms.}, doi = {10.54546/mlit.2021.29.45.001}, keywords = {scheduling, algorithms, brief review, workflows}, url = {http://ceur-ws.org/Vol-3041/} } @InProceedings{Kashansky2021a, author = {Vladislav Kashansky and Nishant Saurabh and Radu Prodan and Aso Validi and Cristina Olaverri-Monreal and Renate Burian and Gerhard Burian and Dimo Hirsch and Yisheng Lv and Fei-Yue Wang and Hai Zuhge}, booktitle = {Proceedings of the Conference on Information Technology for Social Good (GoodIT 2021)}, title = {{The ADAPT Project: Adaptive and Autonomous Data Performance Connectivity and Decentralized Transport Network}}, year = {2021}, month = {sep}, pages = {115--120}, publisher = {ACM}, abstract = {The ADAPT project started during the most critical phase of the COVID-19 outbreak in Europe when the demand for Personal Protective Equipment (PPE) from each country's healthcare system surpassed national stock amounts. Due to national shutdowns, reduced transport logistics, and containment measures on the federal and provincial levels, the authorities could not meet the rising demand from the health care system on the PPE equipment. Fortunately, the PPE production capacities in China have regained (and expanded) their available capacities through which Austria now can get the demand of PPE to protect its citizens. ADAPT develops an adaptive and autonomous decision-making network to support the involved stakeholders along the PPE supply chain to save and protect human lives. The ADAPT decentralized blockchain platform optimizes supply, demand, and transport capacities between China and Austria with transparent, real-time certification checks on equipment, production documentation, and intelligent decision-making capabilities at all levels of this multidimensional logistic problem.}, doi = {10.1145/3462203.3475880}, url = {https://dl.acm.org/doi/10.1145/3462203.3475880} } @InCollection{Kashansky2021, author = {Vladislav Kashansky and Gleb Radchenko and Radu Prodan}, booktitle = {Computational Science (ICCS 2021)}, publisher = {Springer International Publishing}, title = {{Monte Carlo Approach to the Computational Capacities Analysis of the Computing Continuum}}, year = {2021}, month = jun, pages = {779--793}, type = {audiocd}, abstract = {This article proposes an approach to the problem of computational capacities analysis of the computing continuum via theoretical framework of equilibrium phase-transitions and numerical simulations. We introduce the concept of phase transitions in computing continuum and show how this phenomena can be explored in the context of workflow makespan, which we treat as an order parameter. We simulate the behavior of the computational network in the equilibrium regime within the framework of the XY-model defined over complex agent network with Barabasi-Albert topology. More specifically, we define Hamiltonian over complex network topology and sample the resulting spin-orientation distribution with the Metropolis-Hastings technique. The key aspect of the paper is derivation of the bandwidth matrix, as the emergent effect of the “low-level” collective spin interaction. This allows us to study the first order approximation to the makespan of the “high-level” system-wide workflow model in the presence of data-flow anisotropy and phase transitions of the bandwidth matrix controlled by the means of “noise regime” parameter η. For this purpose, we have built a simulation engine in Python 3.6. Simulation results confirm existence of the phase transition, revealing complex transformations in the computational abilities of the agents. Notable feature is that bandwidth distribution undergoes a critical transition from single to multi-mode case. Our simulations generally open new perspectives for reproducible comparative performance analysis of the novel and classic scheduling algorithms.}, doi = {10.1007/978-3-030-77961-0_62}, keywords = {Complex networks, Computing continuum, Phase transitions, Computational model, MCMC, Metropolis-Hastings, XY-model, Equilibrium model}, url = {https://link.springer.com/chapter/10.1007/978-3-030-77961-0_62} } @Misc{Kashanskii2021a, author = {Kashanskii, Vladislav and Radchenko, Gleb and Prodan, Radu and Zabrovskiy, Anatoliy and Agrawal, Prateek}, howpublished = {Online Publication (Abstract)}, month = may, title = {{Automated Workflows Scheduling via Two-Phase Event-based MILP Heuristic for MRCPSP Problem}}, year = {2021}, abstract = {In today’s reality massive amounts of data-intensive tasks are managed by utilizing a large number of heterogeneous computing and storage elements interconnected through high-speed communication networks. However, one issue that still requires research effort is to enable effcient workflows scheduling in such complex environments. As the scale of the system grows and the workloads become more heterogeneous in the inner structure and the arrival patterns, scheduling problem becomes exponentially harder, requiring problem-specifc heuristics. Many techniques evolved to tackle this problem, including, but not limited to Heterogeneous Earliest Finish Time (HEFT), The Dynamic Scaling Consolidation Scheduling (DSCS), Partitioned Balanced Time Scheduling (PBTS), Deadline Constrained Critical Path (DCCP) and Partition Problem-based Dynamic Provisioning Scheduling (PPDPS). In this talk, we will discuss the two-phase heuristic for makespan-optimized assignment of tasks and computing machines on large-scale computing systems, consisting of matching phase with subsequent event-based MILP method for schedule generation. We evaluated the scalability of the heuristic using the Constraint Integer Programing (SCIP) solver with various configurations based on data sets, provided by the MACS framework. Preliminary results show that the model provides near-optimal assignments and schedules for workflows composed of up to 100 tasks with complex task I/O interactions and demonstrates variable sensitivity with respect to the scale of workflows and resource limitation policies imposed.}, keywords = {HPC Schedule Generation, MRCPSP Problem, Workflows Scheduling, Two-Phase Heuristic}, url = {https://ashpc21.si/booklet-of-abstracts/#dearflip-df_2168/} } @InCollection{Karisch2021, author = {Christof Karisch and Andreas Leibetseder and Klaus Schoeffmann}, booktitle = {MultiMedia Modeling}, publisher = {Springer International Publishing}, title = {{NoShot Video Browser at VBS2021}}, year = {2021}, month = jan, number = {12573}, pages = {405--409}, abstract = {We present our NoShot Video Browser, which has been successfully used at the last Video Browser Showdown competition VBS2020 at the MMM2020. NoShot is given its name due to the fact, that it neither makes use of any kind of shot detection nor utilize the VBS master shots. Instead videos are split into frames with a time distance of one second. The biggest strength of the system lies in its feature “time cache”, which shows results with the best confidence in a range of seconds.}, doi = {10.1007/978-3-030-67835-7_36}, keywords = {Video retrieval, Interactive video search, Video analysis}, url = {https://link.springer.com/chapter/10.1007/978-3-030-67835-7_36} } @Article{Karandikar_2021, author = {Nikita Karandikar and Rockey Abhishek and Nishant Saurabh and Zhiming Zhao and Alexander Lercher and Ninoslav Marina and Radu Prodan and Chunming Rong and Antorweep Chakravorty}, journal = {Blockchain: Research and Applications}, title = {Blockchain-based prosumer incentivization for peak mitigation through temporal aggregation and contextual clustering.1}, year = {2021}, issn = {2096-7209}, month = jun, pages = {1--35}, abstract = {Peak mitigation is of interest to power companies as peak periods may require the operator to over provision supply in order to meet the peak demand. Flattening the usage curve can result in cost savings, both for the power companies and the end users. Integration of renewable energy into the energy infrastructure presents an opportunity to use excess renewable generation to supplement supply and alleviate peaks. In addition, demand side management can shift the usage from peak to off peak times and reduce the magnitude of peaks. In this work, we present a data driven approach for incentive based peak mitigation. Understanding user energy profiles is an essential step in this process. We begin by analysing a popular energy research dataset published by the Ausgrid corporation. Extracting aggregated user energy behavior in temporal contexts and semantic linking and contextual clustering give us insight into consumption and rooftop solar generation patterns. We implement, and performance test a blockchain based prosumer incentivization system. The smart contract logic is based on our analysis of the Ausgrid dataset. Our implementation is capable of supporting 792,540 customers with a reasonably low infrastructure footprint.}, doi = {10.1016/j.bcra.2021.100016}, keywords = {Peak shaving, aggregation analysis, contextual clustering, blockchain, incentivization}, publisher = {Elsevier (BV)}, url = {https://www.sciencedirect.com/science/article/pii/S2096720921000117?via=ihub} } @Article{Jha2021, author = {Debesh Jha and Sharib Ali and Steven Hicks and Vajira Thambawita and Hanna Borgli and Pia H. Smedsrud and Thomas de Lange and Konstantin Pogorelov and Xiaowei Wang and Philipp Harzig and Minh-Triet Tran and Wenhua Meng and Trung-Hieu Hoang and Danielle Dias and Tobey H. Ko and Taruna Agrawal and Olga Ostroukhova and Zeshan Khan and Muhammad Atif Tahir and Yang Liu and Yuan Chang and Mathias Kirkerod and Dag Johansen and Mathias Lux and Haavard D. Johansen and Michael A. Riegler and Paal Halvorsen}, journal = {Medical Image Analysis}, title = {{A comprehensive analysis of classification methods in gastrointestinal endoscopy imaging}}, year = {2021}, issn = {1361-8415}, month = {may}, pages = {102007}, volume = {70}, abstract = {Gastrointestinal (GI) endoscopy has been an active field of research motivated by the large number of highly lethal GI cancers. Early GI cancer precursors are often missed during the endoscopic surveillance. The high missed rate of such abnormalities during endoscopy is thus a critical bottleneck. Lack of attentiveness due to tiring procedures, and requirement of training are few contributing factors. An automatic GI disease classification system can help reduce such risks by flagging suspicious frames and lesions. GI endoscopy consists of several multi-organ surveillance, therefore, there is need to develop methods that can generalize to various endoscopic findings. In this realm, we present a comprehensive analysis of the Medico GI challenges: Medical Multimedia Task at MediaEval 2017, Medico Multimedia Task at MediaEval 2018, and BioMedia ACM MM Grand Challenge 2019. These challenges are initiative to set-up a benchmark for different computer vision methods applied to the multi-class endoscopic images and promote to build new approaches that could reliably be used in clinics. We report the performance of 21 participating teams over a period of three consecutive years and provide a detailed analysis of the methods used by the participants, highlighting the challenges and shortcomings of the current approaches and dissect their credibility for the use in clinical settings. Our analysis revealed that the participants achieved an improvement on maximum Mathew correlation coefficient (MCC) from 82.68% in 2017 to 93.98% in 2018 and 95.20% in 2019 challenges, and a significant increase in computational speed over consecutive years.}, doi = {10.1016/j.media.2021.102007}, keywords = {Gastrointestinal endoscopy challenges, Artificial intelligence, Computer-aided detection and diagnosis, Medical imaging, Medico Task 2017, Medico Task 2018, BioMedia 2019 grand challenge}, publisher = {Elsevier (BV)}, url = {https://www.sciencedirect.com/science/article/pii/S1361841521000530?via=ihub} } @InProceedings{Hellwagner2021, author = {Antonia Stornig and Aymen Fakhreddine and Hellwagner, Hermann and Petar Popovski and Christian Bettstetter}, booktitle = {2021 IEEE 93rd Vehicular Technology Conference (VTC2021-Spring)}, title = {{Video Quality and Latency for UAV Teleoperation over {LTE}: A Study with ns3}}, year = {2021}, month = {apr}, pages = {1--7}, publisher = {IEEE}, abstract = {Teleoperation of an unmanned aerial vehicle (UAV) is a challenging mobile application with real-time control from a first-person view. It poses stringent latency requirements for both video and control traffic. This paper studies the video quality and latencies for UAV teleoperation over LTE using ns3 simulations. A key ingredient is the latency budget model. We observe that the latency of the video is higher and more sensitive to mobility than that of the control traffic. The latency is influenced by the traffic variation caused by the variable bit rate of the streaming application. High mobility tends to increase latency and lead to more outliers, being problematic in real-time control.}, doi = {10.1109/vtc2021-spring51267.2021.9448676}, keywords = {Drone, teleoperation, video streaming, ns3, simulation, QoS, QoE}, url = {https://ieeexplore.ieee.org/document/9448676} } @Article{Hayat2021, author = {Samira Hayat and Roland Jung and Hellwagner, Hermann and Christian Bettstetter and Driton Emini and Dominik Schnieders}, journal = {IEEE Robotics and Automation Letters}, title = {{Edge Computing in 5G for Drone Navigation: What to Offload?}}, year = {2021}, issn = {2377-3766}, month = {apr}, number = {2}, pages = {2571--2578}, volume = {6}, abstract = {Small drones that navigate using cameras may be limited in their speed and agility by low onboard computing power. We evaluate the role of edge computing in 5G for such autonomous navigation. The offloading of image processing tasks to an edge server is studied with a vision-based navigation algorithm. Three computation modes are compared: onboard, fully offloaded to the edge, and partially offloaded. Partial offloading is expected to pose lower demands on the communication network in terms of transfer rate than full offloading but requires some onboard processing. Our results on the computation time help select the most suitable mode for image processing, i.e., whether and what to offload, based on the network conditions.}, doi = {10.1109/lra.2021.3062319}, keywords = {Aerial systems, autonomous vehicle navigation, perception and autonomy, vision-based navigation}, publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, url = {https://ieeexplore.ieee.org/document/9363523} } @InCollection{Ghamsarian2021b, author = {Negin Ghamsarian and Mario Taschwer and Doris Putzgruber-Adamitsch and Stephanie Sarny and Yosuf El-Shabrawi and Klaus Schöffmann}, booktitle = {Neural Information Processing}, publisher = {Springer International Publishing}, title = {{ReCal-Net: Joint Region-Channel-Wise Calibrated Network for Semantic Segmentation in Cataract Surgery Videos}}, year = {2021}, month = dec, number = {13110}, pages = {391--402}, abstract = {Semantic segmentation in surgical videos is a prerequisite for a broad range of applications towards improving surgical outcomes and surgical video analysis. However, semantic segmentation in surgical videos involves many challenges. In particular, in cataract surgery, various features of the relevant objects such as blunt edges, color and context variation, reflection, transparency, and motion blur pose a challenge for semantic segmentation. In this paper, we propose a novel convolutional module termed as ReCal module, which can calibrate the feature maps by employing region intra-and-inter-dependencies and channel-region cross-dependencies. This calibration strategy can effectively enhance semantic representation by correlating different representations of the same semantic label, considering a multi-angle local view centering around each pixel. Thus the proposed module can deal with distant visual characteristics of unique objects as well as cross-similarities in the visual characteristics of different objects. Moreover, we propose a novel network architecture based on the proposed module termed as ReCal-Net. Experimental results confirm the superiority of ReCal-Net compared to rival state-of-the-art approaches for all relevant objects in cataract surgery. Moreover, ablation studies reveal the effectiveness of the ReCal module in boosting semantic segmentation accuracy.}, doi = {10.1007/978-3-030-92238-2_33}, keywords = {Cataract surgery, Semantic segmentation, Feature map calibration}, url = {https://link.springer.com/chapter/10.1007/978-3-030-92238-2_33} } @InCollection{Ghamsarian2021a, author = {Negin Ghamsarian and Mario Taschwer and Doris Putzgruber-Adamitsch and Stephanie Sarny and Yosuf El-Shabrawi and Klaus Schoeffmann}, booktitle = {Medical Image Computing and Computer Assisted Intervention (MICCAI 2021)}, publisher = {Springer International Publishing}, title = {{LensID: A CNN-RNN-Based Framework Towards Lens Irregularity Detection in Cataract Surgery Videos}}, year = {2021}, month = sep, number = {12908}, pages = {76--86}, abstract = {A critical complication after cataract surgery is the dislocation of the lens implant leading to vision deterioration and eye trauma. In order to reduce the risk of this complication, it is vital to discover the risk factors during the surgery. However, studying the relationship between lens dislocation and its suspicious risk factors using numerous videos is a time-extensive procedure. Hence, the surgeons demand an automatic approach to enable a larger-scale and, accordingly, more reliable study. In this paper, we propose a novel framework as the major step towards lens irregularity detection. In particular, we propose (I) an end-to-end recurrent neural network to recognize the lens-implantation phase and (II) a novel semantic segmentation network to segment the lens and pupil after the implantation phase. The phase recognition results reveal the effectiveness of the proposed surgical phase recognition approach. Moreover, the segmentation results confirm the proposed segmentation network’s effectiveness compared to state-of-the-art rival approaches.}, doi = {10.1007/978-3-030-87237-3_8}, keywords = {Semantic segmentation, Surgical phase recognition, Cataract surgery}, url = {https://link.springer.com/chapter/10.1007/978-3-030-87237-3_8} } @InProceedings{Ghamsarian2021, author = {Negin Ghamsarian and Mario Taschwer and Doris Putzgruber-Adamitsch and Stephanie Sarny and Klaus Schoeffmann}, booktitle = {2020 25th International Conference on Pattern Recognition (ICPR)}, title = {{Relevance Detection in Cataract Surgery Videos by Spatio- Temporal Action Localization}}, year = {2021}, month = {jan}, pages = {10720--10727}, publisher = {IEEE}, abstract = {In cataract surgery, the operation is performed with the help of a microscope. Since the microscope enables watching real-time surgery by up to two people only, a major part of surgical training is conducted using the recorded videos. To optimize the training procedure with the video content, the surgeons require an automatic relevance detection approach. In addition to relevance-based retrieval, these results can be further used for skill assessment and irregularity detection in cataract surgery videos. In this paper, a three-module framework is proposed to detect and classify the relevant phase segments in cataract videos. Taking advantage of an idle frame recognition network, the video is divided into idle and action segments. To boost the performance in relevance detection, the cornea where the relevant surgical actions are conducted is detected in all frames using Mask R-CNN. The spatiotemporally localized segments containing higher-resolution information about the pupil texture and actions, and complementary temporal information from the same phase are fed into the relevance detection module. This module consists of four parallel recurrent CNNs being responsible to detect four relevant phases that have been defined with medical experts. The results will then be integrated to classify the action phases as irrelevant or one of four relevant phases. Experimental results reveal that the proposed approach outperforms static CNNs and different configurations of feature-based and end-to-end recurrent networks.}, doi = {10.1109/icpr48806.2021.9412525}, url = {https://ieeexplore.ieee.org/document/9412525} } @InProceedings{Farahani2021b, author = {Reza Farahani}, booktitle = {Proceedings of the 12th ACM Multimedia Systems Conference}, title = {{CDN and SDN Support and Player Interaction for HTTP Adaptive Video Streaming}}, year = {2021}, month = {jun}, pages = {398--402}, publisher = {ACM}, abstract = {Video streaming has become one of the most prevailing, bandwidth-hungry, and latency-sensitive Internet applications. HTTP Adaptive Streaming (HAS) has become the dominant video delivery mechanism over the Internet. Lack of coordination among the clients and lack of awareness of the network in pure client-based adaptive video bitrate approaches have caused problems, such as sub-optimal data throughput from Content Delivery Network (CDN) or origin servers, high CDN costs, and non-satisfactory users' experience. Recent studies have shown that network-assisted HAS techniques by utilizing modern networking paradigms, e.g., Software Defined Networking (SDN), Network Function Virtualization(NFV), and edge computing can significantly improve HAS system performance. In this doctoral study, we leverage the aforementioned modern networking paradigms and design network-assistance for/by HAS clients to improve HAS systems performance and CDN/network utilization. We present four fundamental research questions to target different challenges in devising a network-assisted HAS system.}, doi = {10.1145/3458305.3478464}, url = {https://dl.acm.org/doi/abs/10.1145/3458305.3478464} } @InProceedings{Farahani2021a, author = {Reza Farahani and Farzad Tashtarian and Hadi Amirpour and Christian Timmerer and Mohammad Ghanbari and Hellwagner, Hermann}, booktitle = {2021 IEEE 46th Conference on Local Computer Networks (LCN)}, title = {{CSDN: CDN-Aware QoE Optimization in SDN-Assisted HTTP Adaptive Video Streaming}}, year = {2021}, month = {oct}, pages = {525--532}, publisher = {IEEE}, abstract = {Recent studies have revealed that network-assisted techniques, by providing a comprehensive view of the network, improve HTTP Adaptive Streaming (HAS) system performance significantly. This paper leverages the capability of Software-Defined Networking, Network Function Virtualization, and edge computing to introduce a CDN-Aware QoE Optimization in SDN-Assisted Adaptive Video Streaming (CSDN) framework. We employ virtualized edge entities to collect various information items and run an optimization model with a new server/segment selection approach in a time-slotted fashion to serve the clients’ requests by selecting optimal cache servers. In case of a cache miss, a client’s request is served by an optimal replacement quality from a cache server, by a quality transcoded from an optimal replacement quality at the edge, or by the originally requested quality from the origin server. Comprehensive experiments conducted on a large-scale testbed demonstrate that CSDN outperforms other approaches in terms of the users’ QoE and network utilization.}, doi = {10.1109/lcn52139.2021.9524970}, keywords = {Dynamic Adaptive Streaming over HTTP (DASH), Edge Computing, Network-Assisted Video Streaming, Quality of Experience (QoE), Software Defined Networking (SDN), Network Function Virtualization (NFV), Video Transcoding, Content Delivery Network (CDN)}, url = {https://ieeexplore.ieee.org/document/9524970} } @InProceedings{Farahani2021, author = {Reza Farahani and Farzad Tashtarian and Alireza Erfanian and Christian Timmerer and Mohammad Ghanbari and Hellwagner, Hermann}, booktitle = {Proceedings of the 31st ACM Workshop on Network and Operating Systems Support for Digital Audio and Video}, title = {{ES-HAS: an edge- and SDN-assisted framework for HTTP adaptive video streaming}}, year = {2021}, month = {jul}, pages = {50--57}, publisher = {ACM}, abstract = {Recently, HTTP Adaptive Streaming (HAS) has become the dominant video delivery technology over the Internet. In HAS, clients have full control over the media streaming and adaptation processes. Lack of coordination among the clients and lack of awareness of the network conditions may lead to sub-optimal user experience and resource utilization in a pure client-based HAS adaptation scheme. Software Defined Networking (SDN) has recently been considered to enhance the video streaming process. In this paper, we leverage the capability of SDN and Network Function Virtualization (NFV) to introduce an edge- and SDN-assisted video streaming framework called ES-HAS. We employ virtualized edge components to collect HAS clients' requests and retrieve networking information in a time-slotted manner. These components then perform an optimization model in a time-slotted manner to efficiently serve clients' requests by selecting an optimal cache server (with the shortest fetch time). In case of a cache miss, a client's request is served (i) by an optimal replacement quality (only better quality levels with minimum deviation) from a cache server, or (ii) by the original requested quality level from the origin server. This approach is validated through experiments on a large-scale testbed, and the performance of our framework is compared to pure client-based strategies and the SABR system [12]. Although SABR and ES-HAS show (almost) identical performance in the number of quality switches, ES-HAS outperforms SABR in terms of playback bitrate and the number of stalls by at least 70% and 40%, respectively.}, doi = {10.1145/3458306.3460997}, keywords = {Dynamic Adaptive Streaming over HTTP (DASH), Edge Computing, Network-Assisted Video Streaming, Quality of Experience (QoE), Software Defined Networking (SDN), Network Function Virtualization (NFV)}, url = {https://dl.acm.org/doi/10.1145/3458306.3460997} } @InProceedings{Erfanian2021c, author = {Alireza Erfanian}, booktitle = {Proceedings of the 12th ACM Multimedia Systems Conference}, title = {{Optimizing QoE and Latency of Live Video Streaming Using Edge Computing and In-Network Intelligence}}, year = {2021}, month = {jun}, pages = {373--377}, publisher = {ACM}, abstract = {Live video streaming traffic and related applications have experienced significant growth in recent years. More users have started generating and delivering live streams with high quality (e.g., 4K resolution) through popular online streaming platforms such as YouTube, Twitch, and Facebook. Typically, the video contents are generated by streamers and watched by many audiences, which are geographically distributed in various locations far away from the streamers' locations. The resource limitation in the network (e.g., bandwidth) is a challenging issue for network and video providers to meet the users' requested quality. In this thesis, we will investigate optimizing QoEand end-to-end (E2E) latency of live video streaming by leveraging edge computing capabilities and in-network intelligence. We present four main research questions aiming to address the various challenges in optimizing live streaming QoE and E2E latency by employing edge computing and in-network intelligence.}, doi = {10.1145/3458305.3478459}, url = {https://dl.acm.org/doi/10.1145/3458305.3478459} } @InProceedings{Erfanian2021b, author = {Alireza Erfanian and Hadi Amirpour and Farzad Tashtarian and Christian Timmerer and Hellwagner, Hermann}, booktitle = {Proceedings of the Workshop on Design, Deployment, and Evaluation of Network-assisted Video Streaming}, title = {{LwTE-Live: Light-weight Transcoding at the Edge for Live Streaming}}, year = {2021}, month = {dec}, pages = {22--28}, publisher = {ACM}, abstract = {Live video streaming is widely embraced in video services, and its applications have attracted much attention in recent years. The increased number of users demanding high quality (e.g., 4K resolution) live videos increases the bandwidth utilization in the backhaul network. To decrease bandwidth utilization in HTTP Adaptive Streaming (HAS), in on-the-fly transcoding approaches, only the highest bitrate representation is delivered to the edge, and other representations are generated by transcoding at the edge. However, this approach is inefficient due to the high transcoding cost. In this paper, we propose a light-weight transcoding at the edge method for live applications, LwTE-Live, to decrease the bandwidth utilization and the overall live streaming cost. During the encoding processes at the origin server, the optimal encoding decisions are saved as metadata and the metadata replaces the corresponding representation in the bitrate ladder. The significantly reduced size of the metadata compared to its corresponding representation decreases the bandwidth utilization. The extracted metadata is then utilized at the edge to decrease the transcoding time. We formulate the problem as a Mixed-Binary Linear Programming (MBLP) model to optimize the live streaming cost, including the bandwidth and computation costs. We compare the proposed model with state-of-the-art approaches, and the experimental results show that our proposed method saves the cost and backhaul bandwidth utilization up to 34% and 45%, respectively.}, doi = {10.1145/3488662.3493829}, url = {https://dl.acm.org/doi/10.1145/3488662.3493829} } @Article{Erfanian2021a, author = {Alireza Erfanian and Hadi Amirpour and Farzad Tashtarian and Christian Timmerer and Hellwagner, Hermann}, journal = {IEEE Access}, title = {{LwTE: Light-Weight Transcoding at the Edge}}, year = {2021}, issn = {2169-3536}, month = aug, pages = {112276--112289}, volume = {9}, abstract = {Due to the growing demand for video streaming services, providers have to deal with increasing resource requirements for increasingly heterogeneous environments. To mitigate this problem, many works have been proposed which aim to ( i ) improve cloud/edge caching efficiency, (ii) use computation power available in the cloud/edge for on-the-fly transcoding, and (iii) optimize the trade-off among various cost parameters, e.g., storage, computation, and bandwidth. In this paper, we propose LwTE, a novel L ight- w eight T ranscoding approach at the E dge, in the context of HTTP Adaptive Streaming (HAS). During the encoding process of a video segment at the origin side, computationally intense search processes are going on. The main idea of LwTE is to store the optimal results of these search processes as metadata for each video bitrate and reuse them at the edge servers to reduce the required time and computational resources for on-the-fly transcoding. LwTE enables us to store only the highest bitrate plus corresponding metadata (of very small size) for unpopular video segments/bitrates. In this way, in addition to the significant reduction in bandwidth and storage consumption, the required time for on-the-fly transcoding of a requested segment is remarkably decreased by utilizing its corresponding metadata; unnecessary search processes are avoided. Popular video segments/bitrates are being stored. We investigate our approach for Video-on-Demand (VoD) streaming services by optimizing storage and computation (transcoding) costs at the edge servers and then compare it to conventional methods (store all bitrates, partial transcoding). The results indicate that our approach reduces the transcoding time by at least 80% and decreases the aforementioned costs by 12% to 70% compared to the state-of-the-art approaches.}, doi = {10.1109/access.2021.3102633}, keywords = {Video Streaming, transcoding, video on demand, edge computing}, publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, url = {https://ieeexplore.ieee.org/document/9507473} } @Article{Erfanian2021, author = {Alireza Erfanian and Farzad Tashtarian and Anatoliy Zabrovskiy and Christian Timmerer and Hellwagner, Hermann}, journal = {IEEE Transactions on Network and Service Management}, title = {{OSCAR: On Optimizing Resource Utilization in Live Video Streaming}}, year = {2021}, issn = {1932-4537}, month = {mar}, number = {1}, pages = {552--569}, volume = {18}, abstract = {Live video streaming traffic and related applications have experienced significant growth in recent years. However, this has been accompanied by some challenging issues, especially in terms of resource utilization. Although IP multicasting can be recognized as an efficient mechanism to cope with these challenges, it suffers from many problems. Applying software-defined networking (SDN) and network function virtualization (NFV) technologies enable researchers to cope with IP multicasting issues in novel ways. In this article, by leveraging the SDN concept, we introduce OSCAR (Optimizing reSourCe utilizAtion in live video stReaming) as a new cost-aware video streaming approach to provide advanced video coding (AVC)-based live streaming services in the network. In this article, we use two types of virtualized network functions (VNFs): virtual reverse proxy (VRP) and virtual transcoder function (VTF). At the edge of the network, VRPs are responsible for collecting clients’ requests and sending them to an SDN controller. Then, by executing a mixed-integer linear program (MILP), the SDN controller determines a group of optimal multicast trees for streaming the requested videos from an appropriate origin server to the VRPs. Moreover, to elevate the efficiency of resource allocation and meet the given end-to-end latency threshold, OSCAR delivers only the highest requested quality from the origin server to an optimal group of VTFs over a multicast tree. The selected VTFs then transcode the received video segments and transmit them to the requesting VRPs in a multicast fashion. To mitigate the time complexity of the proposed MILP model, we present a simple and efficient heuristic algorithm that determines a near-optimal solution in polynomial time. Using the MiniNet emulator, we evaluate the performance of OSCAR in various scenarios. The results show that OSCAR surpasses other SVC- and AVC-based multicast and unicast approaches in terms of cost and resource utilization.}, doi = {10.1109/tnsm.2021.3051950}, keywords = {Dynamic adaptive streaming over HTTP (DASH), live video streaming, software defined networking (SDN), video transcoding, network function virtualization (NFV)}, publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, url = {https://ieeexplore.ieee.org/document/9327491} } @InCollection{Elmenreich2021, author = {Wilfried Elmenreich and Mathias Lux}, booktitle = {A Ludic Society}, publisher = {Donau-Universität Krems}, title = {{Analyzing Usage Patterns in Online Games}}, year = {2021}, month = sep, pages = {347--359}, abstract = {A typical life cycle of an online game is reflected in its usage patterns. A game first builds a user base, then reaches an absolute peak, to then being played by a minimum number of dedicated fans at the end of its life. Apart from this development, extraordinary internal and external events can be observed as changes in usage in games, especially multiplayer and massive multiplayer ones. For the usage of video games, the COVID-19 pandemic has impacted usage as it had on the game business itself. However, research lacks data to investigate these relations further. Usage statistics of games are rarely accessible for researchers. In this paper, we relate usage statistics to viewership and popularity of a game using available data sources like online statistics or activity on Twitch.tv. In a first study, data from the online role-playing game (MMORPG) Eternal Lands is analyzed. Eternal Lands is a free, multiplayer, online game that was created already in 2002. The usage patterns show day/night cycles of players in the prime time of the time zones where most players are located and increased playing activity on weekends. A general trend over time shows a slowly diminishing user base over the years since its introduction. In April 2020, a significant rise in user activities can be observed, attributed to lockdowns in many countries due to the COVID-19 pandemic. This can be attributed to regular players investing more time playing the game during the lockdown and to new or recurring players, who have not played the game intensively before, were looking for a distraction during the lockdown. In a second study, we focus on complementary viewer statistics on the popular game streaming platform Twitch.tv. We can observe that the COVID-19 pandemic impacted the playing time, as mentioned earlier. We relate usage data to viewership and streaming statistics of popular games. With the example of Eternal Lands, being a game that never went viral, we discuss the possibility of approximating a game's popularity through game streaming and viewership.}, url = {https://mobile.aau.at/publications/elmenreich_lux_2021_Analyzing_Usage_Patterns_in_Online_Games.pdf} } @InProceedings{Cetinkaya2021b, author = {Ekrem Cetinkaya}, booktitle = {Proceedings of the 12th ACM Multimedia Systems Conference}, title = {{Machine Learning Based Video Coding Enhancements for HTTP Adaptive Streaming}}, year = {2021}, month = {jun}, pages = {418--422}, publisher = {ACM}, abstract = {Video traffic comprises the majority of today's Internet traffic, and HTTP Adaptive Streaming (HAS) is the preferred method to deliver video content over the Internet. Increasing demand for video and the improvements in the video display conditions over the years caused an increase in the video coding complexity. This increased complexity brought the need for more efficient video streaming and coding solutions. The latest standard video codecs can reduce the size of the videos by using more efficient tools with higher time-complexities. The plans for integrating machine learning into upcoming video codecs raised the interest in applied machine learning for video coding. In this doctoral study, we aim to propose applied machine learning methods to video coding, focusing on HTTP adaptive streaming. We present four primary research questions to target different challenges in video coding for HTTP adaptive streaming.}, doi = {10.1145/3458305.3478468}, url = {https://dl.acm.org/doi/10.1145/3458305.3478468} } @Article{Cetinkaya2021a, author = {Ekrem Cetinkaya and Hadi Amirpour and Christian Timmerer and Mohammad Ghanbari}, journal = {IEEE Open Journal of Signal Processing}, title = {{Fast Multi-Resolution and Multi-Rate Encoding for HTTP Adaptive Streaming Using Machine Learning}}, year = {2021}, issn = {2644-1322}, month = jun, pages = {1--12}, abstract = {Video streaming applications keep getting more attention over the years, and HTTP Adaptive Streaming (HAS) became the de-facto solution for video delivery over the Internet. In HAS, each video is encoded at multiple quality levels and resolutions (i.e., representations) to enable adaptation of the streaming session to viewing and network conditions of the client. This requirement brings encoding challenges along with it, e.g., a video source should be encoded efficiently at multiple bitrates and resolutions. Fast multi-rate encoding approaches aim to address this challenge of encoding multiple representations from a single video by re-using information from already encoded representations. In this paper, a convolutional neural network is used to speed up both multi-rate and multi-resolution encoding for HAS. For multi-rate encoding, the lowest bitrate representation is chosen as the reference. For multi-resolution encoding, the highest bitrate from the lowest resolution representation is chosen as the reference. Pixel values from the target resolution and encoding information from the reference representation are used to predict Coding Tree Unit (CTU) split decisions in High-Efficiency Video Coding (HEVC) for dependent representations. Experimental results show that the proposed method for multi-rate encoding can reduce the overall encoding time by 15.08 % and parallel encoding time by 41.26 %, with a 0.89 % bitrate increase compared to the HEVC reference software. Simultaneously, the proposed method for multi-resolution encoding can reduce the encoding time by 46.27 % for the overall encoding and 27.71 % for the parallel encoding on average with a 2.05 % bitrate increase.}, doi = {10.1109/ojsp.2021.3078657}, keywords = {HTTP Adaptive Streaming, HEVC, Multirate Encoding, Machine Learning}, publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, url = {https://ieeexplore.ieee.org/document/9427195} } @Article{Cetinkaya2021, author = {Ekrem Cetinkaya and Hadi Amirpour and Mohammad Ghanbari and Christian Timmerer}, journal = {Signal Processing: Image Communication}, title = {{CTU depth decision algorithms for HEVC: A survey}}, year = {2021}, issn = {0923-5965}, month = {nov}, pages = {116442}, volume = {99}, abstract = {High Efficiency Video Coding (HEVC) surpasses its predecessors in encoding efficiency by introducing new coding tools at the cost of an increased encoding time-complexity. The Coding Tree Unit (CTU) is the main building block used in HEVC. In the HEVC standard, frames are divided into CTUs with the predetermined size of up to 64 × 64 pixels. Each CTU is then divided recursively into a number of equally sized square areas, known as Coding Units (CUs). Although this diversity of frame partitioning increases encoding efficiency, it also causes an increase in the time complexity due to the increased number of ways to find the optimal partitioning. To address this complexity, numerous algorithms have been proposed to eliminate unnecessary searches during partitioning CTUs by exploiting the correlation in the video. In this paper, existing CTU depth decision algorithms for HEVC are surveyed. These algorithms are categorized into two groups, namely statistics and machine learning approaches. Statistics approaches are further subdivided into neighboring and inherent approaches. Neighboring approaches exploit the similarity between adjacent CTUs to limit the depth range of the current CTU, while inherent approaches use only the available information within the current CTU. Machine learning approaches try to extract and exploit similarities implicitly. Traditional methods like support vector machines or random forests use manually selected features, while recently proposed deep learning methods extract features during training. Finally, this paper discusses extending these methods to more recent video coding formats such as Versatile Video Coding (VVC) and AOMedia Video 1(AV1).}, doi = {10.1016/j.image.2021.116442}, keywords = {HEVC, Coding tree unit, Complexity, CTU partitioning, Statistics, Machine learning}, publisher = {Elsevier BV}, url = {https://www.sciencedirect.com/science/article/pii/S0923596521002113} } @InProceedings{Barcis2021a, author = {Michal Barcis and Hellwagner, Hermann}, booktitle = {2021 Wireless Days (WD)}, title = {{Information Distribution in Multi-Robot Systems: Adapting to Varying Communication Conditions}}, year = {2021}, month = {jun}, pages = {1--8}, publisher = {IEEE}, abstract = {This work addresses the problem of application-layer congestion control in multi-robot systems (MRS). It is motivated by the fact that many MRS constrain the amount of transmitted data in order to avoid congestion in the network and ensure that critical messages get delivered. However, such constraints often need to be manually tuned and assume constant network capabilities. We introduce the adaptive goodput constraint, which smoothly adapts to varying communication conditions. It is suitable for long-term communication planning, where rapid changes are undesirable. We analyze the introduced method in a simulation-based study and show its practical applicability using mobile robots.}, doi = {10.1109/wd52248.2021.9508324}, keywords = {Wireless communication, Adaptation models, Adaptive systems, Limiting, Control systems, Data models, Planning}, url = {https://ieeexplore.ieee.org/document/9508324} } @Article{Barcis2021, author = {Michal Barcis and Agata Barcis and Nikolaos Tsiogkas and Hellwagner, Hermann}, journal = {Frontiers in Robotics and AI}, title = {{Information Distribution in Multi-Robot Systems: Generic, Utility-Aware Optimization Middleware}}, year = {2021}, issn = {2296-9144}, month = {jul}, pages = {1--11}, volume = {8}, abstract = {This work addresses the problem of what information is worth sending in a multi-robot system under generic constraints, e.g., limited throughput or energy. Our decision method is based on Monte Carlo Tree Search. It is designed as a transparent middleware that can be integrated into existing systems to optimize communication among robots. Furthermore, we introduce techniques to reduce the decision space of this problem to further improve the performance. We evaluate our approach using a simulation study and demonstrate its feasibility in a real-world environment by realizing a proof of concept in ROS 2 on mobile robots.}, doi = {10.3389/frobt.2021.685105}, keywords = {multi-robot systems, information distribution, adaptive communication, information utility, communication optimization, Monte Carlo tree search}, publisher = {Frontiers Media (SA)}, url = {https://www.frontiersin.org/articles/10.3389/frobt.2021.685105/full} } @InCollection{Amirpourazarian2021b, author = {Hadi Amirpour and Ekrem Cetinkaya and Christian Timmerer and Mohammad Ghanbari}, booktitle = {Proceedings of the 27th Internationl Conference on Multimedia Modeling (MMM 2021)}, publisher = {Springer International Publishing}, title = {{Towards Optimal Multirate Encoding for {HTTP} Adaptive Streaming}}, year = {2021}, month = jan, number = {12572}, pages = {469--480}, series = {Lecture Notes in Computer Science}, abstract = {HTTP Adaptive Streaming (HAS) enables high quality stream-ing of video contents. In HAS, videos are divided into short intervalscalled segments, and each segment is encoded at various quality/bitratesto adapt to the available bandwidth. Multiple encodings of the same con-tent imposes high cost for video content providers. To reduce the time-complexity of encoding multiple representations, state-of-the-art methods typically encode the highest quality representation first and reusethe information gathered during its encoding to accelerate the encodingof the remaining representations. As encoding the highest quality rep-resentation requires the highest time-complexity compared to the lowerquality representations, it would be a bottleneck in parallel encoding scenarios and the overall time-complexity will be limited to the time-complexity of the highest quality representation. In this paper and toaddress this problem, we consider all representations from the highestto the lowest quality representation as a potential, single reference toaccelerate the encoding of the other, dependent representations. We for-mulate a set of encoding modes and assess their performance in terms ofBD-Rate and time-complexity, using both VMAF and PSNR as objec-tive metrics. Experimental results show that encoding a middle qualityrepresentation as a reference, can significantly reduce the maximum en-coding complexity and hence it is an efficient way of encoding multiplerepresentations in parallel. Based on this fact, a fast multirate encodingmethod is proposed which utilizes depth and prediction mode of a middle quality representation to accelerate the encoding of the dependentrepresentations.}, doi = {10.1007/978-3-030-67832-6_38}, keywords = {HEVC, Video Encoding, Multirate Encoding, DASH}, url = {https://link.springer.com/chapter/10.1007/978-3-030-67832-6_38} } @InProceedings{Amirpourazarian2021a, author = {Hadi Amirpourazarian and Christian Timmerer and Mohammad Ghanbari}, booktitle = {2021 Data Compression Conference (DCC)}, title = {{SLFC: Scalable Light Field Coding}}, year = {2021}, month = {mar}, pages = {43-52}, publisher = {IEEE}, abstract = {Light field imaging enables some post-processing capabilities like refocusing, changing view perspective, and depth estimation. As light field images are represented by multiple views they contain a huge amount of data that makes compression inevitable. Although there are some proposals to efficiently compress light field images, their main focus is on encoding efficiency. However, some important functionalities such as viewpoint and quality scalabilities, random access, and uniform quality distribution have not been addressed adequately. In this paper, an efficient light field image compression method based on a deep neural network is proposed, which classifies multiple views into various layers. In each layer, the target view is synthesized from the available views of previously encoded/decoded layers using a deep neural network. This synthesized view is then used as a virtual reference for the target view inter-coding. In this way, random access to an arbitrary view is provided. Moreover, uniform quality distribution among multiple views is addressed. In higher bitrates where random access to an arbitrary view is more crucial, the required bitrate to access the requested view is minimized.}, doi = {10.1109/dcc50243.2021.00012}, keywords = {Light field, Compression, Scalable, Random Access}, url = {https://ieeexplore.ieee.org/document/9418753} } @InProceedings{Amirpourazarian2021, author = {Hadi Amirpourazarian and Christian Timmerer and Mohammad Ghanbari}, booktitle = {2021 IEEE International Conference on Multimedia and Expo (ICME)}, title = {{PSTR: Per-Title Encoding Using Spatio-Temporal Resolutions}}, year = {2021}, month = jun, pages = {1--6}, publisher = {IEEE}, abstract = {Current per-title encoding schemes encode the same video content (or snippets/subsets thereof) at various bitrates and spatial resolutions to find an optimal bitrate ladder for each video content. Compared to traditional approaches, in which a predefined, content-agnostic ("fit-to-all") encoding ladder is applied to all video contents, per-title encoding can result in (i) a significant decrease of storage and delivery costs and (ii) an increase in the Quality of Experience (QoE). In the current per-title encoding schemes, the bitrate ladder is optimized using only spatial resolutions, while we argue that with the emergence of high framerate videos, this principle can be extended to temporal resolutions as well. In this paper, we improve the per-title encoding for each content using spatio-temporal resolutions. Experimental results show that our proposed approach doubles the performance of bitrate saving by considering both temporal and spatial resolutions compared to considering only spatial resolutions.}, doi = {10.1109/icme51207.2021.9428247}, keywords = {Bitrate ladder, per-title encoding, framerate, spatial resolution}, url = {https://ieeexplore.ieee.org/document/9428247} } @InProceedings{Amirpour2021b, author = {Hadi Amirpour and Raimund Schatz and Christian Timmerer and Mohammad Ghanbari}, booktitle = {2021 International Conference on Visual Communications and Image Processing (VCIP)}, title = {{On the Impact of Viewing Distance on Perceived Video Quality}}, year = {2021}, month = {dec}, pages = {1--5}, publisher = {IEEE}, abstract = {Due to the growing importance of optimizing the quality and efficiency of video streaming delivery, accurate assessment of user-perceived video quality becomes increasingly important. However, due to the wide range of viewing distances encountered in real-world viewing settings, the perceived video quality can vary significantly in everyday viewing situations. In this paper, we investigate and quantify the influence of viewing distance on perceived video quality. A subjective experiment was conducted with full HD sequences at three different fixed viewing distances, with each video sequence being encoded at three different quality levels. Our study results confirm that the viewing distance has a significant influence on the quality assessment. In particular, they show that an increased viewing distance generally leads to increased perceived video quality, especially at low media encoding quality levels. In this context, we also provide an estimation of potential bitrate savings that knowledge of actual viewing distance would enable in practice. Since current objective video quality metrics do not systematically take into account viewing distance, we also analyze and quantify the influence of viewing distance on the correlation between objective and subjective metrics. Our results confirm the need for distance-aware objective metrics when the accurate prediction of perceived video quality in real-world environments is required.}, doi = {10.1109/vcip53242.2021.9675431}, keywords = {Measurement, Image coding, Visual communication, Video sequences, Estimation, Streaming media, Media, video streaming, QoE, viewing distance, subjective testing}, url = {https://ieeexplore.ieee.org/document/9675431} } @InProceedings{Amirpour2021a, author = {Hadi Amirpour and Hannaneh Barahouei Pasandi and Christian Timmerer and Mohammad Ghanbari}, booktitle = {2021 International Conference on Visual Communications and Image Processing (VCIP)}, title = {{Improving Per-title Encoding for HTTP Adaptive Streaming by Utilizing Video Super-resolution}}, year = {2021}, month = {dec}, pages = {1--5}, publisher = {IEEE}, abstract = {In per-title encoding, to optimize a bitrate ladder over spatial resolution, each video segment is downscaled to a set of spatial resolutions, and they are all encoded at a given set of bitrates. To find the highest quality resolution for each bitrate, the low-resolution encoded videos are upscaled to the original resolution, and a convex hull is formed based on the scaled qualities. Deep learning-based video super-resolution (VSR) approaches show a significant gain over traditional upscaling approaches, and they are becoming more and more efficient over time. This paper improves the per-title encoding over the upscaling methods by using deep neural network-based VSR algorithms. Utilizing a VSR algorithm by improving the quality of low-resolution encodings can improve the convex hull. As a result, it will lead to an improved bitrate ladder. To avoid bandwidth wastage at perceptually lossless bitrates, a maximum threshold for the quality is set, and encodings beyond it are eliminated from the bitrate ladder. Similarly, a minimum threshold is set to avoid low-quality video delivery. The encodings between the maximum and minimum thresholds are selected based on one Just Noticeable Difference. Our experimental results show that the proposed per-title encoding results in a 24% bitrate reduction and 53% storage reduction compared to the state-of-the-art method.}, doi = {10.1109/vcip53242.2021.9675403}, keywords = {Image coding, Visual communication, Bit rate, Superresolution, Bandwidth, Streaming media, Spatial resolution, HAS, per-title, deep learning, compression, bitrate ladder}, url = {https://ieeexplore.ieee.org/document/9675403} } @InProceedings{AguilarArmijo2021a, author = {Jesus Aguilar-Armijo}, booktitle = {Proceedings of the 12th ACM Multimedia Systems Conference}, title = {{Multi-access Edge Computing for Adaptive Bitrate Video Streaming}}, year = {2021}, month = {jun}, pages = {378--382}, publisher = {ACM}, abstract = {Video streaming is the most used service in mobile networks and its usage will continue growing in the upcoming years. Due to this increase, content delivery should be improved as a key aspect of video streaming service, supporting higher bandwidth demand while assuring high quality of experience (QoE) for all the users. Multi-access edge computing (MEC) is an emerging paradigm that brings computational power and storage closer to the user. It is seen in the industry as a key technology for 5G mobile networks, with the goals of reducing latency, ensuring highly efficient network operation, improving service delivery and offering an improved user experience, among others. In this doctoral study, we aim to leverage the possibilities of MEC to improve the content delivery of video streaming services. We present four main research questions to target the different challenges in content delivery for HTTP Adaptive Streaming.}, doi = {10.1145/3458305.3478460}, url = {https://dl.acm.org/doi/10.1145/3458305.3478460} } @InProceedings{AguilarArmijo2021, author = {Jesus Aguilar-Armijo and Christian Timmerer and Hellwagner, Hermann}, booktitle = {2021 IEEE 46th Conference on Local Computer Networks (LCN)}, title = {{EADAS: Edge Assisted Adaptation Scheme for HTTP Adaptive Streaming}}, year = {2021}, month = {oct}, pages = {487--494}, publisher = {IEEE}, abstract = {Mobile networks equipped with edge computing nodes enable access to information that can be leveraged to assist client-based adaptive bitrate (ABR) algorithms in making better adaptation decisions to improve both Quality of Experience (QoE) and fairness. For this purpose, we propose a novel on-the-fly edge mechanism, named EADAS (Edge Assisted Adaptation Scheme for HTTP Adaptive Streaming), located at the edge node that assists and improves the ABR decisions on-the-fly. EADAS proposes (i) an edge ABR algorithm to improve QoE and fairness for clients and (ii) a segment prefetching scheme. The results show a QoE increase of 4.6%, 23.5%, and 24.4% and a fairness increase of 11%, 3.4%, and 5.8% when using a buffer-based, a throughput-based, and a hybrid ABR algorithm, respectively, at the client compared with client-based algorithms without EADAS. Moreover, QoE and fairness among clients can be prioritized using parameters of the EADAS algorithm according to service providers’ requirements.}, doi = {10.1109/lcn52139.2021.9524883}, keywords = {Edge Computing, HTTP Adaptive Streaming, Network-assisted Video Streaming, Quality of Experience}, url = {https://ieeexplore.ieee.org/document/9524883} } @Article{Abdullah2021, author = {Fatima Abdullah and Dragi Kimovski and Radu Prodan and Kashif Munir}, journal = {Computing}, title = {Handover authentication latency reduction using mobile edge computing and mobility patterns}, year = {2021}, issn = {1436-5057}, month = {jun}, pages = {1--20}, abstract = {With the advancement in technology and the exponential growth of mobile devices, network traffic has increased manifold in cellular networks. Due to this reason, latency reduction has become a challenging issue for mobile devices. In order to achieve seamless connectivity and minimal disruption during movement, latency reduction is crucial in the handover authentication process. Handover authentication is a process in which the legitimacy of a mobile node is checked when it crosses the boundary of an access network. This paper proposes an efficient technique that utilizes mobility patterns of the mobile node and mobile Edge computing framework to reduce handover authentication latency. The key idea of the proposed technique is to categorize mobile nodes on the basis of their mobility patterns. We perform simulations to measure the networking latency. Besides, we use queuing model to measure the processing time of an authentication query at an Edge servers. The results show that the proposed approach reduces the handover authentication latency up to 54% in comparison with the existing approach.}, doi = {10.1007/s00607-021-00969-z}, keywords = {Mobile edge computing, Handover authentication, Mobility patterns}, publisher = {Springer Science and Business Media (LLC)}, url = {https://link.springer.com/article/10.1007/s00607-021-00969-z} }