% Prateek Agrawal % Encoding: utf-8 @InProceedings{Zabrovskiy2021c, author = {Anatoliy Zabrovskiy and Prateek Agrawal and Christian Timmerer and Radu Prodan}, booktitle = {2021 30th Conference of Open Innovations Association (FRUCT)}, title = {{FAUST: Fast Per-Scene Encoding Using Entropy-Based Scene Detection and Machine Learning}}, year = {2021}, month = {oct}, pages = {292--302}, publisher = {IEEE}, abstract = {HTTP adaptive video streaming is a widespread and sought-after technology on the Internet that allows clients to dynamically switch between different stream qualities presented in the bitrate ladder to optimize overall received video quality. Currently, there exist several approaches of different complexity for building such a ladder. The simplest method is to use a static bitrate ladder, and the more complex one is to compute a per-title encoding ladder. The main drawback of these approaches is that they do not provide bitrate ladders for scenes with different visual complexity within the video. Moreover, most modern methods require additional computationally-intensive test encodings of the entire video to construct the convex hull, used to calculate the bitrate ladder. This paper proposes a new fast per-scene encoding approach called FAUST based on 1) quick entropy-based scene detection and 2) prediction of optimized bitrate ladder for each scene using an artificial neural network. The results show that our model reduces the mean absolute error to 0.15, the mean square error to 0.08, and the bitrate to 13.5 % while increasing the difference in video multimethod assessment fusion to 5.6 points.}, doi = {10.23919/fruct53335.2021.9599963}, keywords = {Visualization, Technological innovation, Bit rate, Switches, Mean square error methods, Streaming media, Encoding}, url = {https://ieeexplore.ieee.org/document/9599963} } @Article{Verma2021, author = {Pawan Kumar Verma and Prateek Agrawal and Ivone Amorim and Radu Prodan}, journal = {IEEE Transactions on Computational Social Systems}, title = {{WELFake: Word Embedding Over Linguistic Features for Fake News Detection}}, year = {2021}, issn = {2329-924X}, month = {aug}, number = {4}, pages = {881--893}, volume = {8}, abstract = {Social media is a popular medium for the dissemination of real-time news all over the world. Easy and quick information proliferation is one of the reasons for its popularity. An extensive number of users with different age groups, gender, and societal beliefs are engaged in social media websites. Despite these favorable aspects, a significant disadvantage comes in the form of fake news, as people usually read and share information without caring about its genuineness. Therefore, it is imperative to research methods for the authentication of news. To address this issue, this article proposes a two-phase benchmark model named WELFake based on word embedding (WE) over linguistic features for fake news detection using machine learning classification. The first phase preprocesses the data set and validates the veracity of news content by using linguistic features. The second phase merges the linguistic feature sets with WE and applies voting classification. To validate its approach, this article also carefully designs a novel WELFake data set with approximately 72,000 articles, which incorporates different data sets to generate an unbiased classification output. Experimental results show that the WELFake model categorizes the news in real and fake with a 96.73% which improves the overall accuracy by 1.31% compared to bidirectional encoder representations from transformer (BERT) and 4.25% compared to convolutional neural network (CNN) models. Our frequency-based and focused analyzing writing patterns model outperforms predictive-based related works implemented using the Word2vec WE method by up to 1.73%.}, doi = {10.1109/tcss.2021.3068519}, keywords = {Bidirectional encoder representations from transformer (BERT), convolutional neural network (CNN), fake news, linguistic feature, machine learning (ML), text classification, voting classifier, word embedding (WE)}, publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, url = {https://ieeexplore.ieee.org/document/9395133} } @InCollection{Prodan2021, author = {Shajulin Benedict and Prateek Agrawal and Radu Prodan}, booktitle = {Communications in Computer and Information Science}, publisher = {Springer Singapore}, title = {{Energy Consumption Analysis of R-Based Machine Learning Algorithms for Pandemic Predictions}}, year = {2021}, month = jun, pages = {192--204}, volume = {1393}, abstract = {The push for agile pandemic analytic solutions has attained development-stage software modules of applications instead of functioning as full-fledged production-stage applications – i.e., performance, scalability, and energy-related concerns are not optimized for the underlying computing domains. And while the research continues to support the idea that reducing the energy consumption of algorithms improves the lifetime of battery-operated machines, advisable tools in almost any developer setting, an energy analysis report for R-based analytic programs is indeed a valuable suggestion. This article proposes an energy analysis framework for R-programs that enables data analytic developers, including pandemic-related application developers, to analyze the programs. It reveals an energy analysis report for R programs written to predict the new cases of 215 countries using random forest variants. Experiments were carried out at the IoT cloud research lab and the energy efficiency aspects were discussed in the article. In the experiments, ranger-based prediction program consumed 95.8 J.}, doi = {10.1007/978-981-16-3660-8_18}, keywords = {Analysis, Energy consumption, Machine learning, R-program, Tools}, url = {https://link.springer.com/chapter/10.1007/978-981-16-3660-8_18} } @Article{Madaan2021, author = {Vishu Madaan and Aditya Roy and Charu Gupta and Prateek Agrawal and Anand Sharma and Cristian Bologa and Radu Prodan}, journal = {New Generation Computing}, title = {{XCOVNet: Chest X-ray Image Classification for COVID-19 Early Detection Using Convolutional Neural Networks}}, year = {2021}, issn = {1882-7055}, month = {feb}, pages = {1--15}, abstract = {COVID-19 (also known as SARS-COV-2) pandemic has spread in the entire world. It is a contagious disease that easily spreads from one person in direct contact to another, classified by experts in five categories: asymptomatic, mild, moderate, severe, and critical. Already more than 66 million people got infected worldwide with more than 22 million active patients as of 5 December 2020 and the rate is accelerating. More than 1.5 million patients (approximately 2.5% of total reported cases) across the world lost their life. In many places, the COVID-19 detection takes place through reverse transcription polymerase chain reaction (RT-PCR) tests which may take longer than 48 h. This is one major reason of its severity and rapid spread. We propose in this paper a two-phase X-ray image classification called XCOVNet for early COVID-19 detection using convolutional neural Networks model. XCOVNet detects COVID-19 infections in chest X-ray patient images in two phases. The first phase pre-processes a dataset of 392 chest X-ray images of which half are COVID-19 positive and half are negative. The second phase trains and tunes the neural network model to achieve a 98.44% accuracy in patient classification.}, doi = {10.1007/s00354-021-00121-7}, keywords = {Coronavirus, SARS-COV-2, COVID-19 disease diagnosis, Machine learning, Image classification}, publisher = {Springer Science and Business Media LLC}, url = {https://link.springer.com/article/10.1007/s00354-021-00121-7} } @InProceedings{Zabrovskiy2020, author = {Anatoliy Zabrovskiy and Prateek Agrawal and Roland Matha and Christian Timmerer and Radu Prodan}, booktitle = {2020 IEEE Sixth International Conference on Multimedia Big Data (BigMM)}, title = {{ComplexCTTP: Complexity Class Based Transcoding Time Prediction for Video Sequences Using Artificial Neural Network}}, year = {2020}, month = sep, pages = {316--325}, publisher = {{IEEE}}, abstract = {HTTP Adaptive Streaming of video content is becoming an integral part of the Internet and accounts for the majority of today’s traffic. Although Internet bandwidth is constantly increasing, video compression technology plays an important role and the major challenge is to select and set up multiple video codecs, each with hundreds of transcoding parameters. Additionally, the transcoding speed depends directly on the selected transcoding parameters and the infrastructure used. Predicting transcoding time for multiple transcoding parameters with different codecs and processing units is a challenging task, as it depends on many factors. This paper provides a novel and considerably fast method for transcoding time prediction using video content classification and neural network prediction. Our artificial neural network (ANN) model predicts the transcoding times of video segments for state of the art video codecs based on transcoding parameters and content complexity. We evaluated our method for two video codecs/implementations (AVC/x264 and HEVC/x265) as part of large-scale HTTP Adaptive Streaming services. The ANN model of our method is able to predict the transcoding time by minimizing the mean absolute error (MAE) to 1.37 and 2.67 for x264 and x265 codecs, respectively. For x264, this is an improvement of 22\% compared to the state of the art.}, doi = {10.1109/bigmm50055.2020.00056}, keywords = {Transcoding time prediction, adaptive streaming, video transcoding, neural networks, video encoding, video complexity class, HTTP adaptive streaming, MPEG-DASH}, url = {https://ieeexplore.ieee.org/document/9232616} } @InCollection{Verma2020, author = {Pawan Kumar Verma and Prateek Agrawal}, booktitle = {Data Management, Analytics and Innovation}, publisher = {Springer Singapore}, title = {{Study and Detection of Fake News: P2C2-Based Machine Learning Approach}}, year = {2020}, month = {sep}, pages = {261--278}, abstract = {News is the most important and sensitive piece of information which affects the society nowadays. In the current scenario, there are two ways to propagate news all over the world; first one is the traditional way, i.e., newspaper and second is electronic media like social media websites. Electronic media is the most popular medium these days because it helps to propagate news to huge audience in few seconds. Besides these benefits of electronic media, it has one disadvantage also, i.e., “spreading the Fake News”. Fake news is the most common problem these days. Even big companies like Twitter, Facebook, etc. are facing fake news problems. Several researchers are working in these big companies to solve this problem. Fake news can be defined as the news story that is not true. In some specific words, we can say that news is fake if any news agency declares a piece of news deliberately written as false and it is also verifiably as false. This paper focuses on some key characteristics of fake news and how it is affecting the society nowadays. It also includes various key viewpoints which are useful to categorize whether the news is fake or not. At last, this paper discussed some key challenges and future directions that help in increasing accuracy in detection of fake news on the basis of P2C2 (Propagation, Pattern, Comprehension & Credibility) approach having two phases: Detection and Verification. This paper helps readers in two ways (i) Newcomer can easily get the basic knowledge and impact of fake news; (ii) They can get knowledge of different perspectives of fake news which are helpful in the detection process.}, doi = {10.1007/978-981-15-5619-7_18}, keywords = {Credibility-based content classification, Comprehension content study on social media}, url = {https://link.springer.com/chapter/10.1007/978-981-15-5619-7_18} } @Article{Torre2020, author = {Ennio Torre and Juan J. Durillo and Vincenzo de Maio and Prateek Agrawal and Shajulin Benedict and Nishant Saurabh and Radu Prodan}, journal = {Information and Software Technology}, title = {{A dynamic evolutionary multi-objective virtual machine placement heuristic for cloud data centers}}, year = {2020}, issn = {0950-5849}, month = {dec}, pages = {106390}, volume = {128}, abstract = {Minimizing the resource wastage reduces the energy cost of operating a data center, but may also lead to a considerably high resource overcommitment affecting the Quality of Service (QoS) of the running applications. The effective tradeoff between resource wastage and overcommitment is a challenging task in virtualized Clouds and depends on the allocation of virtual machines (VMs) to physical resources. We propose in this paper a multi-objective method for dynamic VM placement, which exploits live migration mechanisms to simultaneously optimize the resource wastage, overcommitment ratio and migration energy. Our optimization algorithm uses a novel evolutionary meta-heuristic based on an island population model to approximate the Pareto optimal set of VM placements with good accuracy and diversity. Simulation results using traces collected from a real Google cluster demonstrate that our method outperforms related approaches by reducing the migration energy by up to 57% with a QoS increase below 6%.}, doi = {10.1016/j.infsof.2020.106390}, keywords = {VM placement, Multi-objective optimisation, Resource overcommitment, Resource wastage, Live migration, Energy consumption, Pareto optimal set, Genetic algorithm, Data center simulation}, publisher = {Elsevier BV}, url = {https://www.sciencedirect.com/science/article/pii/S0950584919302101} } @InCollection{Limbasiya2020, author = {Nivid Limbasiya and Prateek Agrawal}, booktitle = {Algorithms for Intelligent Systems}, publisher = {Springer Singapore}, title = {{Bidirectional Long Short-Term Memory-Based Spatio-Temporal in Community Question Answering}}, year = {2020}, month = jan, pages = {291--310}, abstract = {Community-based question answering (CQA) is an online-based crowdsourcing service that enables users to share and exchange information in the field of natural language processing. A major challenge of CQA service is to determine the high-quality answer with respect to the given question. The existing methods perform semantic matches between a single pair of a question and its relevant answer. In this paper, a Spatio-Temporal bidirectional Long Short-Term Memory (ST-BiLSTM) method is proposed to predict the semantic representation between the question–answer and answer–answer. ST-BiLSTM has two LSTM network instead of one LSTM network (i.e., forward and backward LSTM). The forward LSTM controls the spatial relationship and backward LSTM for examining the temporal interactions for accurate answer prediction. Hence, it captures both the past and future context by using two networks for accurate answer prediction based on the user query. Initially, preprocessing is carried out by name-entity recognition (NER), dependency parsing, tokenization, part of speech (POS) tagging, lemmatization, stemming, syntactic parsing, and stop word removal techniques to filter out the useless information. Then, a par2vec is applied to transform the distributed representation of question and answer into a fixed vector representation. Next, ST-BiLSTM cell learns the semantic relationship between question–answer and answer–answer to determine the relevant answer set for the given user question. The experiment performed on SemEval 2016 and Baidu Zhidao datasets shows that our proposed method outperforms than other state-of-the-art approaches.}, doi = {10.1007/978-981-15-1216-2_11}, keywords = {Answer quality prediction, BiLSTM, Community question answering, Deep learning, Par2vec, Spatio-Temporal}, url = {https://link.springer.com/chapter/10.1007/978-981-15-1216-2_11} } @InProceedings{Kashansky2020, author = {Vladislav Kashansky and Dragi Kimovski and Radu Prodan and Prateek Agrawal and Fabrizio Marozzo and Gabriel Iuhasz and Marek Marozzo and Javier Garcia-Blas}, booktitle = {2020 28th Euromicro International Conference on Parallel, Distributed and Network-Based Processing (PDP)}, title = {{M3AT: Monitoring Agents Assignment Model for Data-Intensive Applications}}, year = {2020}, month = {mar}, pages = {72--79}, publisher = {IEEE}, abstract = {Nowadays, massive amounts of data are acquired, transferred, and analyzed nearly in real-time by utilizing a large number of computing and storage elements interconnected through high-speed communication networks. However, one issue that still requires research effort is to enable efficient monitoring of applications and infrastructures of such complex systems. In this paper, we introduce a Integer Linear Programming (ILP) model called M3AT for optimised assignment of monitoring agents and aggregators on large-scale computing systems. We identified a set of requirements from three representative data-intensive applications and exploited them to define the model’s input parameters. We evaluated the scalability of M3AT using the Constraint Integer Programing (SCIP) solver with default configuration based on synthetic data sets. Preliminary results show that the model provides optimal assignments for systems composed of up to 200 monitoring agents while keeping the number of aggregators constant and demonstrates variable sensitivity with respect to the scale of monitoring data aggregators and limitation policies imposed.}, doi = {10.1109/pdp50117.2020.00018}, keywords = {Monitoring systems, high performance computing, aggregation, systems control, data-intensive systems, generalized assignment problem, SCIP optimization suite}, url = {https://ieeexplore.ieee.org/document/9092397} } @Article{Bhadwal2020, author = {Neha Bhadwal and Prateek Agrawal and Vishu Madaan}, journal = {Scalable Computing: Practice and Experience}, title = {{A Machine Translation System from Hindi to Sanskrit Language using Rule based Approach}}, year = {2020}, issn = {1895-1767}, month = {aug}, number = {3}, pages = {543--554}, volume = {21}, abstract = {Machine Translation is an area of Natural Language Processing which can replace the laborious task of manual translation. Sanskrit language is among the ancient Indo-Aryan languages. There are numerous works of art and literature in Sanskrit. It has also been a medium for creating treatise of philosophical work as well as works on logic, astronomy and mathematics. On the other hand, Hindi is the most prominent language of India. Moreover,it is among the most widely spoken languages across the world. This paper is an effort to bridge the language barrier between Hindi and Sanskrit language such that any text in Hindi can be translated to Sanskrit. The technique used for achieving the aforesaid objective is rule-based machine translation. The salient linguistic features of the two languages are used to perform the translation. The results are produced in the form of two confusion matrices wherein a total of 50 random sentences and 100 tokens (Hindi words or phrases) were taken for system evaluation. The semantic evaluation of 100 tokens produce an accuracy of 94% while the pragmatic analysis of 50 sentences produce an accuracy of around 86%. Hence, the proposed system can be used to understand the whole translation process and can further be employed as a tool for learning as well as teaching. Further, this application can be embedded in local communication based assisting Internet of Things (IoT) devices like Alexa or Google Assistant.}, doi = {10.12694/scpe.v21i3.1783}, keywords = {Rule based approach, Natural Language Translation, Parts of speech tagging, Sanskrit Translation, Hindi Translation}, publisher = {Scalable Computing: Practice and Experience}, url = {https://www.scpe.org/index.php/scpe/article/view/1783} } @Article{Agrawal2020a, author = {Prateek Agrawal and Deepak Chaudhary and Vishu Madaan and Anatoliy Zabrovskiy and Radu Prodan and Dragi Kimovski and Christian Timmerer}, journal = {Multimedia Tools and Applications}, title = {{Automated bank cheque verification using image processing and deep learning methods}}, year = {2020}, issn = {1573-7721}, month = {oct}, number = {4}, pages = {5319--5350}, volume = {80}, abstract = {Automated bank cheque verification using image processing is an attempt to complement the present cheque truncation system, as well as to provide an alternate methodology for the processing of bank cheques with minimal human intervention. When it comes to the clearance of the bank cheques and monetary transactions, this should not only be reliable and robust but also save time which is one of the major factor for the countries having large population. In order to perform the task of cheque verification, we developed a tool which acquires the cheque leaflet key components, essential for the task of cheque clearance using image processing and deep learning methods. These components include the bank branch code, cheque number, legal as well as courtesy amount, account number, and signature patterns. our innovation aims at benefiting the banking system by re-innovating the other competent cheque-based monetary transaction system which requires automated system intervention. For this research, we used institute of development and research in banking technology (IDRBT) cheque dataset and deep learning based convolutional neural networks (CNN) which gave us an accuracy of 99.14% for handwritten numeric character recognition. It resulted in improved accuracy and precise assessment of the handwritten components of bank cheque. For machine printed script, we used MATLAB in-built OCR method and the accuracy achieved is satisfactory (97.7%) also for verification of Signature we have used Scale Invariant Feature Transform (SIFT) for extraction of features and Support Vector Machine (SVM) as classifier, the accuracy achieved for signature verification is 98.10%.}, doi = {10.1007/s11042-020-09818-1}, keywords = {Cheque truncation system, Image segmentation, Bank cheque clearance, Image feature extraction, Convolution neural network, Support vector machine, Scale invariant feature transform}, publisher = {Springer Science and Business Media LLC}, url = {https://link.springer.com/article/10.1007/s11042-020-09818-1} } @Article{Agrawal2020, author = {Prateek Agrawal and Anatoliy Zabrovskiy and Adithyan Ilangovan and Christian Timmerer and Radu Prodan}, journal = {Cluster Computing}, title = {{FastTTPS: fast approach for video transcoding time prediction and scheduling for HTTP adaptive streaming videos}}, year = {2020}, issn = {1573-7543}, month = {nov}, pages = {1--17}, abstract = {HTTP adaptive streaming of video content becomes an integrated part of the Internet and dominates other streaming protocols and solutions. The duration of creating video content for adaptive streaming ranges from seconds or up to several hours or days, due to the plethora of video transcoding parameters and video source types. Although, the computing resources of different transcoding platforms and services constantly increase, accurate and fast transcoding time prediction and scheduling is still crucial. We propose in this paper a novel method called fast video transcoding time prediction and scheduling (FastTTPS) of x264 encoded videos based on three phases: (i) transcoding data engineering, (ii) transcoding time prediction, and (iii) transcoding scheduling. The first phase is responsible for video sequence selection, segmentation and feature data collection required for predicting the transcoding time. The second phase develops an artificial neural network (ANN) model for segment transcoding time prediction based on transcoding parameters and derived video complexity features. The third phase compares a number of parallel schedulers to map the predicted transcoding segments on the underlying high-performance computing resources. Experimental results show that our predictive ANN model minimizes the transcoding mean absolute error (MAE) and mean square error (MSE) by up to 1.7 and 26.8, respectively. In terms of scheduling, our method reduces the transcoding time by up to 38% using a Max–Min algorithm compared to the actual transcoding time without prediction information.}, doi = {10.1007/s10586-020-03207-x}, keywords = {Transcoding time prediction, Video transcoding, Scheduling, Artificial neural networks, MPEG-DASH, Adaptive streaming}, publisher = {Springer Science and Business Media LLC}, url = {https://link.springer.com/article/10.1007/s10586-020-03207-x} } @InProceedings{Madaan2019, author = {Vishu Madaan and Rupinder Kaur and Prateek Agrawal}, booktitle = {2019 4th International Conference on Information Systems and Computer Networks (ISCON)}, title = {{Rheumatoid Arthritis anticipation using Adaptive Neuro Fuzzy Inference System}}, year = {2019}, month = nov, pages = {340--346}, publisher = {IEEE}, abstract = {A state of discomfort is known as a disease, also termed as illness or sickness. When the tiniest living things like virus enters our body, it reacts with the cells of the body and results an illness. The Arthritis is very problematic to early forecast. It nurtures with the age and related to the large and small joint pain. The Rheumatoid Arthritis (RA) is chronic disease, its long-term auto-immune and inflammatory disease which damages many joints tissues. It occurs when immune system can't distinguish the cells and tissues. The ANFIS model is used for the prediction of the RA in human mortals. A complete process is mentioned in this study, which helps to a technique for the diagnosis of the Rheumatoid Arthritis in human beings with accuracy 93.5%. This diagnosis is made on the bases of 12 symptoms of RA in human lives like age, stiffness, joint deformity, ESR, CRP, WBC, Uric Acid etc. This paper also compares the ANFIS with Naive Bayes, Bagging algorithm and KNN classifiers.}, doi = {10.1109/iscon47742.2019.9036297}, keywords = {Disease Diagnosis, Arthritis Symptoms, Arthritis Prediction, KNN Classifier, ANFIS, Naive Bayes Classification}, url = {https://ieeexplore.ieee.org/document/9036297} }