%%% GMU CS Technical Reports BibTeX Database %%% %%% This database comprises the joint technical reports of the Department of Computer Science %%% and the Department of Information and Software Engineering, which merged prior to 2008. %%% %%% Please report any errors to tr-admin@cs.gmu.edu %%% %%% The ISE Department has technical report numbers of the form ISE-TR-xx-yy, where xx is %%% a two-digit year and yy is a two-digit number, zero-padded. %%% %%% The CS Department has technical report numbers of the form GMU-CS-TR-xxxx-y*, where %%% xxxx is a four-digit year and y* is a number of any length, not zero-padded. @TechReport{ GMU-CS-TR-2003-0, author = { Sean Luke and Jana Kosecka }, title = { How to Make a Technical Report }, institution = { Department of Computer Science, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2003 }, howpublished = { Available at http://cs.gmu.edu }, number = { GMU-CS-TR-2003-0 }, note = { This document has been superceded by GMU CS Department Technical Report GMU-CS-TR-2008-0, ``How To Make a Technical Report (Rev. 2)'', by Sean Luke, available at http://cs.gmu.edu }, abstract = { The following document describes the preferred formatting of technical reports and submission guidelines. This document itself is formatted according to the preferred formatting rules. }, } @TechReport{ GMU-CS-TR-2003-1, author = { Liviu Panait and Sean Luke }, title = { Cooperative Multi-Agent Learning: The State of the Art }, institution = { Department of Computer Science, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2003 }, howpublished = { Available at http://cs.gmu.edu }, number = { GMU-CS-TR-2003-1 }, abstract = { Cooperative multi-agent systems problems are ones in which several agents attempt, through their interaction, to jointly solve tasks or to maximize their utility. Due to the interactions among the agents, multi-agent problem complexity can rise rapidly with the number of agents or their behavioral sophistication. The challenge this presents to the task of programming solutions to such problems has spawned increasing interest in machine learning (ML) techniques to automate the search and optimization process. We provide a broad survey of the cooperative multiagent learning literature. Previous surveys of this area have largely focused on issues common to specific subareas (for example, reinforcement learning or robotics). In this survey we attempt to draw from multi-agent learning work in a spectrum of areas, including reinforcement learning, evolutionary computation, game theory, complex systems, agent modeling, and robotics. We find that this broad view leads to a division of the work into two categories, each with its own special issues: applying a single learner to discover joint solutions to multi-agent problems (team learning), or using multiple simultaneous learners, often one per agent (concurrent learning). Additionally, we discuss two important topics independent of these categories: problem decomposition and communication. We conclude with a presentation of multi-agent learning problem domains, a discussion of certain challenge topics (scalability and adaptive dynamics), and a list of multi-agent learning resources. }, } @TechReport{ GMU-CS-TR-2004-1, author = { Sean Luke and Keith Sullivan and Gabriel Catalin Balan and Liviu Panait }, title = { Tunably Decentralized Algorithms for Cooperative Target Observation }, institution = { Department of Computer Science, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2004 }, howpublished = { Available at http://cs.gmu.edu }, number = { GMU-CS-TR-2004-1 }, abstract = { Multi-agent problem domains may require distributed algorithms for a variety of reasons: local sensors, limitations of communication, and availability of distributed computational resources. In the absence of these constraints, centralized algorithms are often more efficient, simply because they are able to take advantage of more information. We introduce a variant of the cooperative target observation domain which is free of such constraints. We propose two algorithms, inspired by K-means clustering and hill-climbing respectively, which are scalable in degree of decentralization. Neither algorithm consistenly outperforms the other across over all problem domain settings. Surprisingly, we find that hill-climbing is sensitive to degree of decentralization, while K-means is not. We also experiment with a combination of the two algorithms which draws strength from each. }, } @TechReport{ GMU-CS-TR-2004-2, author = { Jana Kosecka and Xialong Yang }, title = { Location Recognition, Global Localization and Relative Positioning Based on Scale-Invariant Keypoints}, institution = { Department of Computer Science, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2004 }, howpublished = { Available at http://cs.gmu.edu }, number = { GMU-CS-TR-2004-2 }, abstract = { The localization capability of a mobile robot is central to basic navigation and map building tasks. We describe a probabilistic environment model which facilitates global localization scheme by means of location recognition. In the exploration stage the environment is partitioned into a class of locations, each characterized by a set of scale-invariant keypoints. The descriptors associated with these keypoints can be robustly matched despite changes in contrast, scale and viewpoint. We demonstrate the efficacy of these features for location recognition, where given a new view the most likely location from which this view came is determined. The misclassifications due to dynamic changes in the environment or inherent appearance ambiguities are overcome by exploiting neighborhood relationships captured by a Hidden Markov Model. We report the recognition performance of this approach on an indoor environment consisting of eighteen locations and discuss the suitability of this approach for a more general class of recognition problems. Once the most likely location has been determined we show how to compute the relative pose between the representative view and the current view. }, } @TechReport{ GMU-CS-TR-2004-3, author = { Wei Zhang and Jana Kosecka }, title = { Experiments in Building Recognition}, institution = { Department of Computer Science, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2004 }, howpublished = { Available at http://cs.gmu.edu }, number = { GMU-CS-TR-2004-3 }, abstract = { In this report we study the problem of building recognition. Given a database of building images, we are interested in classifying a novel view of a building by finding the closest match from the database. We examine the efficacy of local scale-invariant features and their associated descriptors as representations of building appearance and use a simple voting scheme in the recognition phase. }, } @TechReport{ GMU-CS-TR-2004-4, author = { Jeff Lei and Richard Carver }, title = { A New Algorithm for Reachability Testing of Concurrent Programs }, institution = { Department of Computer Science, George Mason University }, address = { 4400 University Drive, MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2004 }, note = { Superceded by GMU-CS-TR-2005-1 }, number = { GMU-CS-TR-2004-4 }, abstract = { Concurrent programs exhibit non-deterministic behavior, which makes them difficult to test. One approach to testing concurrent programs, called reachability testing, generates test sequences automatically, and on-the-fly, without constructing any static models. This approach guarantees that every partially-ordered synchronization sequence of a program with a given input will be exercised exactly once. Unfortunately, in order to make this guarantee all existing reachability testing algorithms need to save and search through the history of test sequences that have already been exercised, which is impractical for many applications. In this paper, we present a new reachability testing algorithm that does not save the history of test sequences. This new algorithm guarantees that every partially-ordered synchronization sequence is exercised at least once for an arbitrary program and exactly once for a program that satisfies certain conditions. We also describe a reachability testing tool called RichTest. Our empirical studies with RichTest indicate that our new algorithm exercises every synchronization sequence exactly once for many applications. }, } @TechReport{ GMU-CS-TR-2005-1, author = { Jeff Lei and Richard Carver }, title = { Reachability Testing of Concurrent Programs }, institution = { Dept. of Computer Science, George Mason University }, address = { 4400 University Dr., MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2005 }, howpublished = { Available at http://cs.gmu.edu }, number = { GMU-CS-TR-2005-1 }, abstract = { One approach to testing concurrent programs, called reachability testing, generates synchronization sequences automatically, and on-the-fly, without constructing any static models. In this paper, we present a general execution model for concurrent programs that allows reachability testing to be applied to several commonly used synchronization constructs. We also present a new method for performing reachability testing. This new method guarantees that every partially-ordered synchronization sequence will be exercised exactly once without having to save any sequences that have already been exercised. We describe a prototype reachability testing tool called RichTest and report some empirical results, including a comparison between RichTest and a partial order reduction based tool called VeriSoft. RichTest performed significantly better for the programs in our study. Index Terms—Software Testing, Reachability Testing, Concurrent Programming }, } @TechReport{ GMU-CS-TR-2005-2, author = { Fayin Li and Jana Kosecka }, title = { Probabilistic Location Recognition using Reduced Feature Set }, institution = { Dept. of Computer Science, George Mason University }, address = { 4400 University Dr., MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2005 }, howpublished = { Available at http://cs.gmu.edu }, number = { GMU-CS-TR-2005-2 }, abstract = { The localization capability is central to basic navigation tasks and motivates development of various visual navigation systems. These systems can be used both as navigational aids for visually impaired or in the context of autonomous mobile systems. In this paper we describe a two stage approach for localization in indoor environments. In the first stage, the environment is partitioned into several locations, each characterized by a set of scale-invariant keypoints and their associated descriptors. In the second stage the keypoints of the query view are integrated probabilistically yielding an estimate of most likely location. The emphasis of our approach in the environment model acquisition stage is on the selection of discriminative features, best suited for characterizing individual locations. The high recognition rate is maintained with only 10% of the originally detected features, yielding a substantial speedup in recognition. The ambiguities due to the self-similarity and dynamic changes in the environment are resolved by exploiting spatial relationships between locations captured by Hidden Markov Model. Once the most likely location is determined, the relative pose of the camera with respect to the reference view can be computed. }, } @TechReport{ GMU-CS-TR-2005-3, author = { Keith Sullivan and Liviu Panait and Gabriel Balan and Sean Luke }, title = { Can Good Learners Always Compensate for Poor Learners? }, institution = { Dept. of Computer Science, George Mason University }, address = { 4400 University Dr., MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2005 }, howpublished = { Available at http://cs.gmu.edu }, number = { GMU-CS-TR-2005-3 }, abstract = { Can a good learner compensate for a poor learner when paired in a coordination game? Previous work has given an example where a special learning algorithm (FMQ) is capable of doing just that when paired with a specific capable algorithm even in games which stump the poorer algorithm when paired with itself. In this paper, we argue that this result is not general. We give a straightforward extension to the coordination game in which FMQ cannot compensate for the lesser algorithm. We also provide other problematic pairings, and argue that another highquality algorithm cannot do so either. }, } @TechReport{ GMU-CS-TR-2005-4, author = { Liviu Panait and Keith Sullivan and Sean Luke }, title = { Lenience Towards Teammates Helps in Cooperative Multiagent Learning }, institution = { Dept. of Computer Science, George Mason University }, address = { 4400 University Dr., MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2005 }, howpublished = { Available at http://cs.gmu.edu }, number = { GMU-CS-TR-2005-4 }, abstract = { Concurrent learning is a form of cooperative multiagent learning in which each agent has an independent learning process and little or no control over its teammates’ actions. In such learning algorithms, an agent’s perception of the joint search space depends on the reward received by both agents, which in turn depends on the actions currently chosen by the other agents. The agents will tend to converge towards certain areas of the space because of their learning processes. As a result, an agent’s perception of the search space may benefit if computed over multiple rewards at early stages of learning, but additional rewards have little impact towards the end. We thus suggest that agents should be lenient with their teammates: ignore many of the low rewards initially, and fewer rewards as learning progresses. We demonstrate the benefit of lenience in a cooperative co-evolution algorithm and in a new reinforcement learning algorithm. }, } @TechReport{ GMU-CS-TR-2005-5, author = { Liviu Panait and Sean Luke }, title = { Selecting Informative Actions Improves Cooperative Multiagent Learning }, institution = { Dept. of Computer Science, George Mason University }, address = { 4400 University Dr., MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2005 }, howpublished = { Available at http://cs.gmu.edu }, number = { GMU-CS-TR-2005-5 }, abstract = { In concurrent cooperative multiagent learning, each agent in a team simultaneously learns to improve the overall performance of the team, with no direct control over the actions chosen by its teammates. An agent’s action selection directly influences the rewards received by all the agents; this results in a co-adaptation among the concurrent learning processes. Co-adaptation can drive the team towards suboptimal solutions because agents tend to select those actions that are rewarded better, without any consideration for how such actions may affect the search of their teammates. We argue that to counter this tendency, agents should also prefer actions that inform their teammates about the structure of the joint search space in order to help them choose from among various action options. We analyze this approach in a cooperative coevolutionary framework, and we propose a new algorithm, oCCEA, that highlights the advantages of selecting informative actions. We show that oCCEA generally outperforms other cooperative coevolution algorithms on our test problems. }, } @TechReport{ GMU-CS-TR-2005-6, author = { Wei Zhang and Jana Kosecka }, title = { Nonparametric estimation of multiple structurers with outliers }, institution = { Department of Computer Science, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2005 }, howpublished = { Available upon request }, number = { GMU-CS-TR-2005-6 }, abstract = { In many computer vision problems, several instances of a particular model need to be recovered from the noisy data. In such cases one is faced with the problem of simultaneous estimation of the number of models and their parameters. This problem becomes difficult as the measurement noise in the data increases and the data are further corrupted by outliers. This is especially the case in a variety of motion estimation problems, where the displacement between the views is large and the process of establishing correspondences is difficult. In this paper we propose a novel nonparametric sampling based method for solving this problem. The main novelty of the proposed method lies in the analysis of the distribution of residuals of individual data points with respect to the set of hypotheses, generated by a RANSAC-like sampling process. We will show that the modes of the residual distributions directly reveal the presence of multiple models and facilitate the recovery of the individual models, without making any assumptions about the distribution of the outliers or the noise process. The proposed approach is capable of handling data with large fraction of outliers. Experiments with both synthetic and real data are presented to demonstrate the effectiveness of the proposed approach. }, } @TechReport{ GMU-CS-TR-2005-7, author = { Wei Zhang and Jana Kosecka }, title = { A new inlier identification scheme for robust estimation problems }, institution = { Department of Computer Science, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2005 }, howpublished = { Available upon request }, number = { GMU-CS-TR-2005-7 }, abstract = { The data in vision problems, often heavily contaminated by outliers, call for efficient robust techniques to identify inliers for correct estimation. RANSAC algorithm is a frequently used robust estimator for computer vision problems. In traditional RANSAC scheme, when data contain significant fraction of outliers, large number of samples is needed in order to obtain at least one outlier free sample. In addition to that, each hypothesis generated from the samples is typically evaluated using all data points, which further lowers the efficiency. In this paper, we propose a novel hypothesis evaluation scheme, which enables efficient classification of the data points as outliers and inliers, while requiring a small fixed number of samples. The method is based on the observation that for each data point the properties of the distribution of the residuals with respect to the generated hypotheses reveal whether the point is an outlier or inlier. The problem of inlier/outlier identification can then be formulated as a classification problem. We demonstrate the proposed method on motion estimation problems with large fraction of outliers on both synthetic and real data. }, } @TechReport{ GMU-CS-TR-2005-8, author = { Elena Popovici }, title = { Evolving Families of Designs Using L-Systems }, institution = { Department of Computer Science, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2005 }, howpublished = { Available at http://cs.gmu.edu }, number = { GMU-CS-TR-2005-8 }, abstract = { Evolutionary computation has proven its utility in automating the process of engineering design. However, little attention has been paid to the scalability of generated designs, which is an important issue. This paper addresses this issue and proves the viability of evolving families of designs using parameterized L-Systems as a representation. The rest of the paper is organized as follows: first, an introduction as to why scalability is important and difficult; second, a review of existing work on evolving L-Systems; the third section contains a description of the application domain used for this feasibility study, details on the L-Systems and the EA used; section four presents experiments conducted and their results; the paper ends with a discussion, drawing conclusions and setting goals for future work. }, } @TechReport{ ISE-TR-05-01, author = { Aybar C. Acar and Amihai Motro }, title = { Intensional Encapsulations of Database Subsets via Genetic Programming }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2005 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-05-01 }, abstract = { Finding intensional encapsulations of database subsets is the inverse of query evaluation. Whereas query evaluation transforms an intensional expression (the query) to its extension (a set of data values), intensional encapsulation assigns an intensional expression to a given set of data values. We describe a method for deriving intensional representations of subsets of records in large database tables. Our method is based on the paradigm of genetic programming. It is shown to achieve high accuracy and maintain compact expression size, while requiring cost that is acceptable to all applications, but those that require instantaneous results. Intensional encapsulation has a broad range of applications including cooperative answering, information integration, security and data mining. }, } @TechReport{ ISE-TR-05-02, author = { Amihai Motro and Francesco Parisi-Presicce }, title = { Blind Custodians: A Database Service Architecture that Supports Privacy without Encryption }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2005 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-05-02 }, abstract = { We describe an architecture for a database service that does not assume that the service provider can be trusted. Unlike other architectures that address this problem, this architecture, which we call blind custodians, does not rely on encryption. Instead, it offers confidentiality by means of information dissociation: The server only stores “fragments” of information that are considered safe (i.e., each fragment does not violate privacy), while the client stores the associations between the fragments that are necessary to reconstruct the information. We argue that this architecture allows satisfactory confidentiality, while offering two important advantages: (1) It does not restrict the types of queries that can be submitted by clients (as encryption-based methods invariably do), and (2) it requires only light processing at the client, assigning the bulk of the processing to the server (as befits a true service). Moreover, the architecture permits flexible control over the level of confidentiality that should be maintained (at the cost of additional overhead). }, } @TechReport{ ISE-TR-05-03, author = { Amihai Motro and Alessandro D'Atri }, title = { Optimal Manufacturing Trees }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2005 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-05-03 }, abstract = { We consider the process of manufacturing a product from a given number of elementary components. By assembling intermediate products, the target product can be manufactured in a variety of processes, each modeled by a tree. We are interested in manufacturing turnaround: the time between receiving an order at the root and its completion. We express the turnaround time of each manufacturing process (tree) with a formula that incorporates three parameters: the time required to create elementary components, the time required to assemble a product from its components and the time required to deliver the product to its procurer (another manufacturer). We show that this turnaround formula is optimized in a manufacturing process that corresponds to a perfect (or nearly perfect) tree. The degree of the optimal tree (i.e., the ideal number of components in each sub-assembly) is shown to be independent of the number of elementary components, suggesting that in each manufacturing environment there is an ideal assembly size, which is optimal for the manufacturing of products of any scale. }, } @TechReport{ ISE-TR-05-04, author = { Aynur Abdurazik and Jeff Offutt and Andrea Baldini }, title = { A Comparative Evaluation of Tests Generated from Different UML Diagrams: Diagrams and Data }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2005 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-05-04 }, abstract = { This paper presents a single project experiment on the fault revealing capabilities of model-based test sets. The tests are generated from UML statecharts and UML sequence diagrams. This experiment found that the statechart test sets did better at revealing unit level faults than the sequence diagram test sets, and the sequence diagram test sets did better at revealing integration level faults than the statechart test sets. The statecharts also resulted in more test cases than the sequence diagrams. The experiment showed that model-based testing can be used to systematically generate test data and indicates that different UML models can play different roles in testing. }, } @TechReport{ ISE-TR-05-05, author = { Alessandro D'Atri and Amihai Motro }, title = { VirtuE: a Formal Model of Virtual Enterprises for Information Markets }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2005 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-05-05 }, abstract = { A vital part of a modern economy is an information market. In this market, information products are being traded in countless ways. Information is bought, modified, integrated, incorporated into other products, and then sold again. Often, the manufacturing of an information product requires the collaboration of several participants. A virtual enterprise is a community of business entities that collaborate on the manufacturing of complex products. This collaboration is often ad hoc, for a specific product only, after which the virtual enterprise may dismantle. The virtual enterprise paradigm is particularly appealing for modeling collaborations for manufacturing information products, and in this paper we present a new model, called VirtuE, for modeling such activities. VirtuE has three principal components. First, it defines a distributed infrastructure with concepts such as members, products, inventories, and production plans. Second, it defines transactions among members, to enable collaborative production of complex products. Finally, it provides means for the instrumentation of enterprises, to measure their performance and to govern their behavior. }, } @TechReport{ ISE-TR-05-06, author = { Alexander Brodsky and Hadon Nash }, title = { CoJava: A Unified Language for Simulation and Optimization }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2005 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-05-06 }, abstract = { We have proposed and implemented the language CoJava, which offers both the advantages of simulation-like process modeling in Java, and the capabilities of true decision optimization. By design, the suntax of CoJava is identical to the programming language Java, extended with special constructs to (1) make a non-deterministic choice of a numeric value, (2) assert a constraint, and (3) designate a program variable as the objective to be optimized. The semantics of CoJava interprets a program as an optimal nondeterministic execution path, namely, a path that (1) satisfies the range conditions in the choice statements, (2) satisfies the assert-constraint statements, and (3) produces the optimal value in a designated program variable, among all execution paths that satisfy (1) and (2). To run a CoJava program amounts to first finding an optimal execution path, and then procedurally executing it. We have developed a CoJava constraint compiler based on a reduction of the problem of finding an optimal execution trace to a standard symbolic formulation, reinterpreting Java code as a symbolic constraint construction, and solving the resulting optimization problem on an external solver. To demonstrate the power of CoJava, we have implemented a realistic problem in the area of robot arm control in CoJava. The robot arm is constructed using self-contained components implemented as CoJava classes, that model robot's arm movements based on Newton's laws. }, } @TechReport{ ISE-TR-06-01, author = { Daniel Barbar{\'a} and Carlotta Domeniconi and James P. Rogers }, title = { Detecting outliers using transduction and statistical significance testing }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2006 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-06-01 }, abstract = { Finding points that are outliers with respect to a set of other points is an important task in data mining. Outlier detection can uncover important anomalies in fields like intrusion detection and fraud analysis. In data streaming, the presence of a large number of outliers indicates that the underlying process that is generating the data is undergoing significant changes and the models that attempt to characterize it need to be updated. Although there has been a significant amount of work in outlier detection, most of the algorithms in the literature resort to a particular definition of what an outlier is (e.g., density-based), and use thresholds to detect them. In this paper we present a novel technique to detect outliers that does not impose any particular definition for them. The test we propose aims to diagnose whether a given point is an outlier with respect to an existing clustering model (i.e., a set of points partitioned in groups). However, the test can also be successfully utilize to recognize outliers when the clustering information is not available. This test is based on Transductive Confidence Machines, which have been previously proposed as a mechanism to provide individual confidence measures on classification decisions. The test uses hypothesis testing to prove or disprove whether a point is fit to be in each of the clusters of the model. We demonstrate, experimentally, that the test is highly robust, and produces very few misdiagnosed points, even when no clustering information is available. We also show that the test can be succesfully applied to identify outliers present inside a data set for which no other information is available, thereby provinding the user with a clean data set to identify future outliers. Our experiments also show that even if the data set used to identify further outliers is contaminated with some outliers, the test can perform succesfully. }, } @TechReport{ ISE-TR-06-02, author = { Jon Whittle }, title = { Specifying Precise Use Cases }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2006 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-06-02 }, abstract = { Despite attempts to formalize the semantics of use cases, they remain an informal notation. The informality of use cases is both a blessing and a curse. Whilst it admits an easy learning curve and enables communication between software stakeholders, it is also a barrier to the application of automated methods for test case generation, validation or simulation. This paper presents a precise way of specifying use cases based on a three-level modeling paradigm strongly influenced by UML. The formal syntax and semantics of use case charts are given, along with an example that illustrates how they can be used in practice. }, } @TechReport{ ISE-TR-06-03, author = { Mats Grindal and Jeff Offutt and Jonas Mellin }, title = { On the Testing Maturity of Software Producing Organizations: Detailed Data }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2006 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-06-03 }, abstract = { This paper presents data from a study of the current state of practice of software testing. Test managers from twelve different software organizations were interviewed. The interviews focused on the amount of resources spent on testing, how the testing is conducted, and the knowledge of the personnel in the test organizations. The data indicate that the overall test maturity is low. Test managers are aware of this but have trouble improving. One problem is that the organizations are commercially successful, suggesting that products must already be ``good enough.'' Also, the current lack of structured testing in practice makes it difficult to quantify the current level of maturity and thereby articulate the potential gain from increasing testing maturity to upper management and developers. }, } @TechReport{ ISE-TR-06-04, author = { Carlotta Domeniconi and Dimitrios Gunopulos and Sheng Ma and Dimitris Papadopoulos and Bojun Yan }, title = { Locally Adaptive Metrics for Clutering High Dimensional Data }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2006 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-06-04 }, abstract = { Clustering suffers from the curse of dimensionality, and similarity functions that use all input features with equal relevance may not be effective. We introduce an algorithm that discovers clusters in subspaces spanned by different combinations of dimensions via local weightings of features. This approach avoids the risk of loss of information encountered in global dimensionality reduction techniques, and does not assume any data distribution model. Our method associates to each cluster a weight vector, whose values capture the relevance of features within the corresponding cluster. We experimentally demonstrate the gain in perfomance our method achieves with respect to competitive methods, using both synthetic and real datasets. In particular, our results show the feasibility of the proposed technique to perform simultaneous clustering of genes and conditions in gene expression data, and clustering of very high dimensional data such as text data. }, } @TechReport{ ISE-TR-06-05, author = { Saket Kaushik and William Winsborough and Duminda Wijesekera and Paul Ammann }, title = { Policy Transformations for Preventing Leakage of Sensitive Information in Email Systems }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2006 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-06-05 }, abstract = { In this paper we identify an undesirable side-effect of combining different email-control mechanisms for protection from unwanted messages, namely, leakage of recipients’ private information to message senders. This is because some email-control mechanisms like bonds, graph-turing tests, etc., inherently leak information, and without discontinuing their use, leakage channels cannot be closed. We formalize the capabilities of an attacker and show how she can launch guessing attacks on recipient’s mail acceptance policy that utilizes leaky mechanism in its defence against unwanted mail. As opposed to the classical Dolev-Yao attacker and its extensions, attacker in our model guesses the contents of a recipient’s private information. The use of leaky mechanisms allow the sender to verify her guess. We assume a constraint logic programming based policy language for specification and evaluation of mail acceptance criteria and present two different program transformations that can prevent guessing attacks while allowing recipients to utilize any email-control mechanism in their policies. }, } @TechReport{ ISE-TR-06-06, author = { Ning Kang and Carlotta Domeniconi and Daniel Barbar{\'a} }, title = { Categorization of Unlabeled Documents driven by Word Weighting }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2006 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-06-06 }, abstract = { In text mining we often have to handle large document collections. The labeling of such large corpuses of documents is too expensive and impractical. Thus, there is a need to develop (unsupervised) clustering techniques for text data, where the distributions of words can vary significantly from one category to another. The vector space model of documents easily leads to a 30000 or more dimensions. In such high dimensionality, the effectiveness of any distance function that equally uses all input features is severely compromised. Furthermore, it is expected that different words may have different degrees of relevance for a given category of documents, and a single word may have a different importance across different categories. In this paper we first propose a global unsupervised feature selection approach for text, based on frequent itemset mining. As a result, each document is represented as a set of words that co-occur frequently in the given corpus of documents. We then introduce a locally adaptive clustering algorithm, designed to estimate (local) word relevance and, simultaneously, to group the documents. We present experimental results to demonstrate the feasibility of our approach. Furthermore, the analysis of the weights credited to terms provide evidence that the identified keywords can guide the process of label assignment to clusters. We take into consideration both spam email filtering and general classification datasets. Our analysis of the distribution of weights in the two cases provides insights on how the spam problem distinguishes from the general classification case. }, } @TechReport{ ISE-TR-06-07, author = { Sasket Kaushik and Csilla Farkas and Duminda Wijesekera and Paul Ammann }, title = { An algebra for composing ontologies }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2006 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-06-07 }, abstract = { Ontologies are used as a means of expressing agreements to a vocabulary shared by a community in a coherent and consistent community members in a decentralized manner. As it happens on the internet, ontologies are created by community members in a decentralized manner, requiring that they be merged before being used by the community. We develop an algabra to do so in the Resource Discription Framework (RDF). To provide formal semantics of the proposed algebraic property names, while ontology C has been composed by ooperators, we type a fragment of the RDF syntax. }, } @TechReport{ ISE-TR-06-08, author = { Saket Kaushik and Duminda Wijesekera and Paul Ammann }, title = { BPEL Orchestration of Secure WebMail }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2006 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-06-08 }, abstract = { Web Services offer an excellent opportunity to redesign and replace old and insecure applications with more flexible and robust ones. WSEmail is one such application that replaces conventional message delivery systems with a family of Web Services that achieve the same goal. In this paper we analyze the existing WSEmail specification against the standard set of use cases (and misuse cases) supported (resp. prevented) by SMTP implementations – the current default message delivery infrastructure – and augment it with several missing pieces. In addition, we show how the WSEmail family of Web Services, specified in WSDL, can be orchestrated using BPEL. Finally, we provide a synchronization analysis of our WSEmail orchestration and show its correctness. }, } @TechReport{ ISE-TR-06-09, author = { Bojun Yan and Carlotta Domeniconi }, title = { Kernel Optimization using Pairwise Constraints for Semi-Supervised Clustering }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2006 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-06-09 }, abstract = { A critical problem related to kernel-based methods is the selection of an optimal kernel for the problem at hand. The kernel function in use must conform with the learning target in order to obtain meaningful results. While solutions to estimate optimal kernel functions and their parameters have been proposed in a supervised setting, the problem presents open challenges when no labeled data are provided, and all we have available is a set of pairwise must-link and cannot-link constraints. In this paper we address the problem of optimizing the kernel function using pairwise constraints for semi-supervised clustering. To this end we derive a new optimization criterion to automatically estimate the optimal parameters of composite Gaussian kernels, directly from the data and the given constraints. We combine the optimal kernel function computed by our technique with a recently introduced semi-supervised kernel-based algorithm to demonstrate experimentally the effectivess of our approach. The results show that our method enables the practical utilization of powerful kernel-based semi-supervised clustering approaches by providing a mechanism to automatically set the involved critical parameters. }, } @TechReport{ ISE-TR-06-10, author = { Mark Hartong and Rajini Goel and Duminda Wijesekera }, title = { Trust-based Secure Positive Train Control ({PTC}) Interoperation }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2006 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-06-10 }, abstract = { Positive Train Control (PTC) is a wireless control system ensuring railroad safety by enforcing train separation, speed enforcement, roadway worker protection and other safety functions. Due to shared track rights over each-other’s tracks in North America, company A’s trains must be safely operated by company B’s crew on company C’s tracks, requiring different PTC systems to securely interoperate with each other. For a security framework to ensure that, we propose using a trust management system with certificates and over the air re-keying (OTAR). Back of the envelope calculations show that our solution meets timing needs of PTC. Index Terms: Security, Rail Transportation Control, SCADA Systems, Cryptography }, } @TechReport{ GMU-CS-TR-2007-1, author = { Adrian Grajdeanu }, title = { Modeling Diffusion in a Discrete Environment }, institution = { Department of Computer Science, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2007 }, howpublished = { Available at http://cs.gmu.edu }, number = { GMU-CS-TR-2007-1 }, abstract = { The present report details a model for implementing diffusion in a discrete space. Formulated in 2004 in support of artificial development experiments, the model is mathematically justified starting from the diffusion equation. It has physical plausibility and handles well different shaped and sized diffusion neighborhoods in the sense that it achieves isotropic diffusion in spite of the bias introduced by the discretizing grid. It provides one formulation that encapsulates the diffusion neighborhood details and renders it applicable to linear, planar, spatial and even n-dimensional constructs. }, } @TechReport{ ISE-TR-07-01, author = { Jon Whittle and Ana Moreira and Jo{\~a}o Ara{\'u}jo and Rasheed Rabbi }, title = { Graphical Composition of State-Dependent Use Case Behavioral Models }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2007 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-07-01 }, abstract = { Maintaining a clear separation of concerns throughout the software lifecycle has long been a goal of the software community. Concerns that are separated, however, must be composed at some point. This paper presents a technique for keeping statedependent use cases separate throughout the software modeling process and a method for composing statedependent use cases. The composition method is based on the graph transformations formalism. This provides a rich yet user-friendly way of composing state dependent use cases that is built on solid foundations. To evaluate our approach, it has been applied to seven student design solutions. Each solution was originally developed using a use case-driven methodology and was reengineered to evaluate whether our technique could have been applied. The findings are that it is possible to maintain the separation of state-dependent use cases during software design and, furthermore, that expressive model composition methods are necessary to do this in practice. }, } @TechReport{ ISE-TR-07-02, author = { Alessandro D'Atri and Amihai Motro }, title = { Evolving VirtuE }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2007 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-07-02 }, abstract = { One of the most attractive aspects of virtual databases is their agility: the inherent ability to adapt and evolve in response to changing market conditions. Evolving VirtuE is a formal framework within which such agility can be realized. Through the concepts of enterprise time, activity logging, and log mining, the recent behavior and performance of an enterprise may be studied, and corresponding evolutionary steps can be induced. These steps may be intended to benefit the operation of individual enterprise members, as well the enterprise as a whole. In addition, we also examine enterprise creation, a period of rapid evolution that concludes when the enterprise reaches stability and begins transacting its business activities. }, } @TechReport{ ISE-TR-07-03, author = { Vijayant Dhankhar and Saket Kaushik and Duminda Wijesekera }, title = { XACML Policies for Exclusive Resource Usage }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2007 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-07-03 }, abstract = { The extensible access control markup language (XACML) is the standard access control policy specification language of the World Wide Web. XACML does not provide exclusive accesses to globally resources, and we do so by enhancing the policy execution framework with locking. }, } @TechReport{ ISE-TR-07-04, author = { Alfredo Cuzzocrea and Alessandro D'Atri and Andrea Gualtieri and Amihai Motro and Domenico Sacc{\`a} }, title = { Grid-VirtuE: A Layered Architecture for Grid Virtual Enterprises }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2007 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-07-04 }, abstract = { A grid virtual enterprise is a community of independent enterprises concerned with a particular sector of the economy. Its members (nodes) are small or medium size enterprises (SME) engaged in bilateral transactions. An important principle of a grid virtual enterprise is the lack of any global “guiding force”, with each member of the community making its own independent decisions. In this paper we describe Grid-VirtuE, a three-layer architecture for grid virtual enterprises. The top layer of the architecture, representing its ultimate purpose, is an environment in which grid virtual enterprises can be modeled and implemented. This layer is supported by middleware infrastructure for grids, providing a host of grid services, such as node-to-node communication, bilateral transactions, and data collection. The bottom layer is essentially a distributed data warehouse for storing, sharing and analyzing the large amounts of data generated by the grid. Among other functionalities, the warehouse handles the dissemination of data among the members of the grid; it confronts issues of data magnitude with an aging mechanism that aggregates old data at a lower level of detail; and it incorporates privacy-preserving features that retain the confidentiality of individual members. Warehouse information is also used for data and process mining, aimed at analyzing the behavior of the enterprise, and subsequently inducing evolutionary changes that will improve its performance. }, } @TechReport{ ISE-TR-07-05, author = { Maurizio Filippone }, title = { Fuzzy Clustering of Patterns Represented by Pairwise Dissimilarities }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2007 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-07-05 }, abstract = { Clustering is the problem of grouping objects on the basis of a similarity measure between them. This paper considers the approaches belonging to the K-means family, in particular those based on fuzzy memberships. When patterns are represented by means of non-metric pairwise dissimilarities, these methods cannot be directly applied, since they are not guaranteed to converge. Symmetrization and shift operations have been proposed, to transform the dissimilarities between patterns from non-metric to metric. It has been shown that they modify the K-means objective function by a constant, that does not influence the optimization procedure. Some fuzzy clustering algorithms have been extended, in order to handle patterns described by means of pairwise dissimilarities. The literature, however, lacks of an explicit analysis on what happens to K-means style fuzzy clustering algorithms, when the dissimilarities are transformed to let them become metric. This paper shows how the objective functions of four clustering algorithms based on fuzzy memberships change, due to dissimilarities transformations. The experimental analysis conducted on a synthetic and a real data set shows the effect of the dissimilarities transformations for four clustering algorithms based on fuzzy memberships. }, } @TechReport{ ISE-TR-07-06, author = { Carlotta Domeniconi and Muna Al-Razagan }, title = { Weighted Cluster Ensembles: Methods and Analysis }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2007 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-07-06 }, abstract = { Cluster ensembles offer a solution to challenges inherent to clustering arising from its ill-posed nature. Cluster ensembles can provide robust and stable solutions by leveraging the consensus across multiple clustering results, while averaging out emergent spurious structures that arise due to the various biases to which each participating algorithm is tuned. In this paper, we address the problem of combining multiple weighted clusters which belong to different subspaces of the input space. We leverage the diversity of the input clusterings in order to generate a consensus partition that is superior to the participating ones. Since we are dealing with weighted clusters, our consensus functions make use of the weight vectors associated with the clusters. We demostrate the effectiveness of our techniques by running experiments with several real datasets, including high dimensional text data. Furthermore, we investigate in depth the issue of diversity and accuracy for our ensemble methods. Our analysis and experimental results show that the proposed techniques are capable of producing a partition that is as good as or better than the best individual clustering. Categories and Subject Descriptors: ... [...]: ... General Terms: Clustering, cluster ensembles, subspace clustering, consensus functions, accuracy and diversity measures, text data. }, } @TechReport{ ISE-TR-07-07, author = { Aybar C. Acar and Amihai Motro }, title = { Query Consolidation: Interpreting a Set of Independent Queries Using a Multidatabase Architecture in the Reverse Direction }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2007 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-07-07 }, abstract = { We introduce the problem of query consolidation, which seeks to interpret a set of disparate queries submitted to independent databases with a single “global” query. This problem has multiple applications, from improving database design to protecting information from a seemingly innocuous set of apparently unrelated queries. The problem exhibits attractive duality with the much-researched problem of query decomposition, which has been addressed intensively in the context of multidatabase environments: How to decompose a query submitted to a virtual database into a set of local queries that are evaluated in individual databases. We set the new problem in the architecture of a canonical multidatabase system, using it in the “reverse direction”. The process incorporates two steps where multiplicity of solutions must be considered: At one point the system must infer the most likely set of equi-joins for a set of relations; at another point it must discover the most likely selection constraints that would be applied to a relation. In each case we develop a procedure that ranks solutions according to their perceived likelihood. The final result is therefore a ranked list of suggested consolidations. }, } @TechReport{ ISE-TR-04-01, author = { Ye Wu, Jeff Offutt and Xiaochen Du }, title = { Modeling and Testing of Dynamic Aspects of Web Applications }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2004 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-04-01 }, abstract = { Web software applications have become complex, sophisticated programs that are based on novel computing technologies. Although powerful, these technologies bring new challenges to developers and testers. Checking static HTML links is no longer suffcient; Web applications must be evaluated as complex software products. This paper focuses on two unique aspects of Web applications, an extremely loose form of coupling that features dynamic integration and the ability that users have to directly change the potential flow of execution. Taken together, these allow the control flow to vary with each execution, and means the possible control flows cannot be determined statically. Thus we cannot perform standard analysis techniques that are fundamental to many software engineering activities. This paper presents a new theoretical model of new couplings and the dynamic flow of control of Web applications. This model is based on atomic sections, which allow tools to build the equivalent of a control flow graph for Web applications. The model is used to propose new test criteria for Web applications, and results are shown from a case study on a moderate-size application. }, } @TechReport{ ISE-TR-04-02, author = { Randy Howard and Larry Kerschberg }, title = { A Framework for Dynamic Semantic Web Services Management }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2004 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-04-02 }, abstract = { The concept of Web services as a means of dynamically discovering, negotiating, composing, executing and managing services to materialize enterprise-scale workflow is an active research topic. However its realization has thus far been elusive. Existing approaches involve many disparate concepts, frameworks and technologies. What is needed is a comprehensive and overarching framework that handles the processing and workflow requirements of Virtual Enterprises, maps them to a collection of service-oriented tasks, dynamically configures these tasks from available services, and manages the choreography and execution of these services. The goal is to add semantics to Web services to endow them with capabilities currently lacking in the literature, but necessary for their successful deployment in future systems. This paper introduces such a framework, called the Knowledge-based Dynamic Semantic Web Services (KDSWS) Framework, that addresses in an integrated end-to-end manner, the life-cycle of activities involved in preparing, publishing, requesting, discovering, selecting, configuring, deploying, and delivering Semantic Web Services. In particular, the following issues are addressed: 1) semantic specification of services capabilities including quality of service, trust, and security; 2) transaction control and workflow management; and 3) resource management, interoperation and evolution of the Virtual Enterprise. Two models and their associated languages are introduced to specify the features for these enhanced services: 1) the KDSWS Meta-Model, and 2) the KDSWS Process Language. Both are based on the Knowledge/Data Model. This integrated and comprehensive approach provides a unified end-to-end approach to enable dynamically-composable, semantically-rich, service-oriented systems of Web services. }, } @TechReport{ ISE-TR-04-03, author = { Aynur Abdurazik and Jeff Offutt and Andrea Baldini }, title = { A Controlled Experimental Evaluation of Test Cases Generated from UML Diagrams }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2004 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-04-03 }, abstract = { This paper presents a single project experiment on the fault revealing capabilities of test sets that are generated from UML statecharts and sequence diagrams. The results of this experiment show that the statechart test sets do better at revealing unit level faults than the sequence diagram test sets, and the sequence diagram test sets do better at revealing integration level faults than the statechart test sets. The experimental data also show that the statecharts result in more test cases than the sequence diagrams. The experiment showed that UML diagrams can be used in generating test data systematically, and different UML diagrams can play different roles in testing. }, } @TechReport{ ISE-TR-04-04, author = { Leonard Gallagher and Jeff Offutt }, title = { Integration Testing of Object-oriented Components Using FSMS: Theory and Experimental Details }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2004 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-04-04 }, abstract = { In object-oriented terms, one of the goals of integration testing is to ensure that messages from objects in one class or component are sent and received in the proper order and have the intended effect on the state of external objects that receive the messages. This research extends an existing single-class testing technique to integration testing. The previous method models the behavior of a single class as a finite state machine, transforms that representation into a data flow graph that explicitly identifies the definitions and uses of each state variable of the class, and then applies conventional data flow testing to produce test case specifications that can be used to test the class. This paper extends those ideas to inter-class testing by developing flow graphs and tests for an arbitrary number of classes and components. It introduces flexible representations for message sending and receiving among objects and allows concurrency among any or all classes and components. A second major result is the introduction of a novel approach to performing data flow analysis. Data flow graphs are stored in a relational database, and database queries are used to gather def-use information. This approach is conceptually simple, mathematically precise, quite powerful, and general enough to be used for traditional data flow analysis. This testing approach relies on finite state machines, database modeling and processing techniques, and algorithms for analysis and traversal of directed graphs. A proof-of-concept implementation is used to illustrate how the approach works on an extended example. }, } @TechReport{ ISE-TR-04-05, author = { Mats Grindal and Jeff Offutt and Sten F. Andler }, title = { Combination Testing Strategies: A Survey }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2004 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-04-05 }, abstract = { Combination strategies are test-case selection methods where test cases are identified by combining values of the different test object input parameters based on some combinatorial strategy. This survey presents 15 different combination strategies, and covers more than 30 papers that focus on one or several combination strategies. We believe this collection represents most of the existing work performed on combination strategies. This survey describes the basic algorithms used by the combination strategies. Some properties of combination strategies, including coverage criteria and theoretical bounds on the size of test suites, are also included in this description. This survey also includes a subsumption hierarchy that attempts to relate the various coverage criteria associated with the identified combination strategies. Finally, this survey contains short summaries of all the papers that cover combination strategies. }, } @TechReport{ ISE-TR-03-01, author = { Sencun Zhu and Shouhuai Xu and Sanjeev Setia and Sushil Jajodia }, title = { Establishing Pair-wise Keys For Secure Communication in Ad Hoc Networks: A Probabilistic Approach }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2003 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-03-01 }, abstract = { We present a scalable distributed protocol that enables two mobile nodes in an ad hoc network to establish a pairwise shared key on the fly, without requiring the use of a on-line key distribution center. The design of our protocol is based on a novel combination of two techniques -- probabilistic key sharing and threshold secret sharing. Our protocol is scalable since every node only needs to possess a small number of keys, independent of the network size, and it is computationally efficient because it only relies on symmetric key operations. We show that a pairwise key established between two nodes using our protocol is secure against a collusive attack by a certain number of compromised nodes, and that our protocol can be parameterized to meet the appropriate levels of performance, security and storage for the application under consideration. }, } @TechReport{ ISE-TR-03-02, author = { Sencun Zhu and Sanjeev Setia and Sushil Jajodia }, title = { Adding Reliable and Self-Healing Key Distribution to the Subset Difference Group Rekeying Method for Secure Multicast }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2003 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-03-02 }, abstract = { The Subset Difference Rekeying (SDR) method is the most efficient stateless group rekeying method proposed in the literature. We study two important issues related to the SDR method. First, we address the issue of reliable rekey transport for SDR. We present a key distribution scheme, called FEC-BKR, that enables members to receive the current group key in a reliable and timely fashion despite packet losses in the network. Through simulation, we show that in most scenarios, FEC-BKR outperforms previously proposed schemes for reliable rekey transport. Second, we address the issue of self-healing key distribution for SDR. We present a group key recovery scheme that adds the self-healing property to SDR, i.e., our scheme enables a member that has missed up to a certain number m of previous rekey operations to recover the missing group keys without asking the key server for retransmission. The additional communication overhead imposed by our key recovery scheme is quite small (less than 3m additional keys). }, } @TechReport{ ISE-TR-03-03, author = { Lingyu Wang and Yingjiu Li and Duminda Wijesekera and Sushil Jajodia }, title = { Precisely Answering Multi-dimensional Range Queries Without Privacy Breaches }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2003 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-03-03 }, abstract = { This paper investigates the privacy breaches caused by multi-dimensional range (MDR) sum queries in OLAP systems. We show that existing inference control methods are generally ineffective or infeasible for MDR queries. We then consider restricting users to even MDR queries (that is, the MDR queries involving even number of data values). We show that the collection of such even MDR queries is safe if and only if a special set of sum-two queries (that is, queries involving exactly two values) is safe. On the basis of this result, we give an efficient method to decide the safety of even MDR queries. Besides safe even MDR queries we show that any odd MDR query is unsafe. Moreover, any such odd MDR query is different from the union of some even MDR queries by only one tuple. We also extend those results to the safe subsets of unsafe even MDR queries. }, } @TechReport{ ISE-TR-03-04, author = { Shiping Chen and Duminda Wijesekera and Sushil Jajodia }, title = { FlexFlow: A Flexible Flow Control Policy Specification Framework }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2003 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-03-04 }, abstract = { Flow control policies are important in data-flow, work-flow, transaction systems and software design. Previous work in this area concentrates either on modelling security aspects of information flow control or applying flow control policies in some specific application domain. These models permit either permissions or prohibitions for flows and normally are based on a specific meta-policy (usually the closed policy). We propose FlexFlow, a logic based flexible flow control framework to specify data-flow, work-flow and transaction systems policies that go beyond point-to-point flows. Both permissions and prohibitions are specifiable in FlexFlow and meta-policies such as permissions take precedence themselves can be specified over the meta-policy neutral policy specification environment of FlexFlow. We further show how to specify and prevent inter-flow conflicts such as those arising in role-based work-flow policies. }, } @TechReport{ ISE-TR-03-05, author = { Lingyu Wang and Duminda Wijesekera and Sushil Jajodia }, title = { {OLAP} Means On-Line Anti-Privacy }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2003 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-03-05 }, abstract = { In this paper we investigate the privacy breaches caused by multi-dimensional range sum queries in OLAP (Online Analytic Processing) systems. Our results show that the sensitive information stored in the underlying data warehouses can be easily compromised by malicious users with legitimate range queries. This compromise is possible even when users are restricted to some special class of range queries. We present algorithms that compromise individual tuples with only range queries. We study the conditions under which the compromises are possible. We also analyze the number of queries required for each compromise, as well as the complexity and completeness of those algorithms. Our study reveals the seriousness of the privacy issue in OLAP systems and provide better understanding of the problem for further research on the control methods. }, } @TechReport{ ISE-TR-03-06, author = { Philipp Anokhin and Amihai Motro }, title = { Fusionplex: Resolution of Data Inconsistencies in the Integration of Heterogeneous Information Sources }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2003 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-03-06 }, abstract = { Fusionplex is a system for integrating multiple heterogeneous and autonomous information sources, that uses data fusion to resolve factual inconsistencies among the individual sources. To accomplish this, the system relies on source features, which are meta-data on the quality of each information source; for example, the recentness of the data, its accuracy, its availability, or its cost. The fusion process is controlled with several parameters: (1) With a vector of feature weights, each user defines an individual notion of data utility; (2) with thresholds of acceptance, users ensure minimal performance of their data, excluding from the fusion process data that are too old, too costly, or lacking in authority, or data that are too high, too low, or obvious outliers; and, ultimately, (3) in naming a particular fusion function to be used for each attribute (for example, average, maximum, or simply any) users implement their own interpretation of fusion. Several simple extensions to SQL are all that is needed to allow users to state these resolution parameters, thus ensuring that the system is easy to use. Altogether, Fusionplex provides its users with powerful and flexible, yet simple, control over the fusion process. In addition, Fusionplex supports other critical integration requirements, such as information source heterogeneity, dynamic evolution of the information environment, quick ad-hoc integration, and intermittent source availability. The methods described in this paper were implemented in a prototype system that provides complete Web-based integration services for remote clients. }, } @TechReport{ ISE-TR-03-07, author = { Amihai Motro, Philipp Anokhin and Aybar C. Acar }, title = { Utility-based Resolution of Data Inconsistencies }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2003 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-03-07 }, abstract = { A virtual database system is software that provides unified access to multiple information sources. If the sources are overlapping in their contents and independently maintained, then the likelihood of inconsistent answers is high. Solutions are often based on ranking (which sorts the different answers according to recurrence) and on fusion (which synthesizes a new value from the different alternatives according to a specific formula). In this paper we argue that both methods are flawed, and we offer alternative solutions that are based on knowledge about the performance of the source data; including features such as recentness, availability, accuracy and cost. These features are combined in a flexible utility function that expresses the overall value of a data item to the user. Utility allows us to (1) define meaningful ranking on the inconsistent set of answers, and offer the top-ranked answer as a preferred answer; (2) determine whether a fusion value is indeed better than the initial values, by calculating its utility and comparing it to the utilities of the initial values; and (3) discover the best fusion: the fusion formula that optimizes the utility. The advantages of such performance-based and utility-driven ranking and fusion are considerable. }, } @TechReport{ ISE-TR-03-08, author = { Mohamad Sharif and Duminda Wijesekera }, title = { Providing Voice Privacy as a Service over the Public Telephone Network }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2003 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-03-08 }, abstract = { Existing design of the public telephone network is susceptible to eavesdropping on its voice stream. Consequently, any wire taper can listen to supposedly private conversations. In order to prevent such eavesdropping, we propose an architecture and its implementation for end-to-end voice encryption as a service available for interested subscribers. Our proposal consists of appropriately placing servers to authenticate telephone equipment and subscribers of the service, and certificate authorities that can cross-certify over telecommunication service providers. We show how these entities and necessary signaling mechanisms between them can be implemented using the transaction capabilities application layer (TCAP) of the signal system seven (SS7) protocol and the D channel of the digital subscriber line (DSL) connecting telephone equipment to the SS7 grid. Using published specifications, we show that the duration to initiate an encrypted telephone call takes about 19 seconds including user authentication under average load conditions. That makes our design and protocols comparable to existing intelligent services provided over public telephones and only four times larger that initiating a normal telephone call. }, } @TechReport{ ISE-TR-03-09, author = { Aybar C. Acar and Amihai Motro }, title = { Why Is this User Asking so Many Questions? Explaining Sequences of Queries }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2003 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-03-09 }, abstract = { A sequence of queries submitted by a database user within a short period of time may have a single, illuminating explanation. In this paper we consider sequences of single-record queries, and attempt to guess what information their authors may be trying to accumulate. Query sequences may reflect clandestine intentions, where users attempt to avoid direct queries which may disclose their true interests, preferring instead to obtain the same information by means of sequences of smaller, less conspicuous, queries. Sequences of queries may also reflect attempts to circumvent retrieval restrictions, where users attempt to approximate information which is inaccessible, with sequences of legitimate requests (in the latter case, our explanations may lead database owners to either tighten access, or, conversely, to reorganize their interfaces to facilitate access). Because the true objective of a sequence may be clouded by the retrieval of spurious records, our approach considers all the possible aggregates that a user may accumulate with a sequence, and to rank them, search-engine style, according to their plausibility as retrieval objectives. Our method is probabilistic in nature and postulates that the likelihood that a set of records is the true objective of the user is inverse proportional to the likelihood that this set results from random selection. Our method is shown to have good performance even in the presence of noise (spurious records) as high as 40-50%. }, } @TechReport{ ISE-TR-03-10, author = { Manuel Koch and Francesco Parisi-Presicce }, title = { UML Specification of Access Control Policies and their Formal Verification }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2003 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-03-10 }, abstract = { Security requirements have become an integral part of most modern software systems. In order to produce secure systems, it is necessary to provide software engineers with the appropriate systematic support. We propose a methodology to integrate the specification of access control policies into UML and provide a graph-based formal semantics for the UML access control specification which permits to reason about the coherence of the access control specification. The main concepts in the UML access control specification are illustrated with an example access control model for distributed object systems. }, } @TechReport{ ISE-TR-03-11, author = { Paolo Bottoni and Francesco Parisi-Presicce and Gabriele Taentzer }, title = { Specifying Coherent Refactoring of Software Artefacts with Distributed Graph Transformations }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2003 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-03-11 }, abstract = { Refactoring is an important source of software transformation, which changes the internal structure of a software system, while preserving its behavior. Even though the input/output view of a system's behavior does not change, refactoring can have several consequences for the computing process, as expressed for instance by the sequence of method calls or by state changes of an object or an activity. Such modifications must be reflected in the system model, generally expressed through UML diagrams. We propose a formal approach, based on distributed graph transformation, to the coordinated evolution of code and model, as effect of refactorings. The approach can be integrated into existing refactoring tools. Due to its formal base, it makes it possible to reason about the behavior preservation of each specified refactoring. }, } @TechReport{ ISE-TR-02-01, author = { Claudio Bettini and Sushil Jajodia and X. Sean Wang and Duminda Wijesekera }, title = { Provisions and Obligations in Policy Management and Security Applications }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2002 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-02-01 }, abstract = { Policies are widely used in many different systems and applications. Recently, it has been recognized that a ``yes/no'' response to every scenario is just not enough for many modern systems and applications. Many policies require certain conditions to be satisfied and actions to be performed before or after a decision is made in accordance with the policy. To address this need, this paper introduces the notions of provisions and obligations. Provisions are those conditions that need to be satisfied or actions that must be performed before a decision is rendered, while obligations are those conditions or actions that must be fulfilled by either the users or the system after the decision. This paper formalizes a rule-based policy framework that includes provisions and obligations, and investigates a reasoning mechanism within this framework. A policy decision may be supported by more than one derivation, each associated with a potentially different set of provisions and obligations (called a global PO set). The reasoning mechanism can derive all the global PO sets for each specific policy decision, and facilitates the selection of the best one based on numerical weights assigned to provisions and obligations as well as on semantic relationships among them. The paper also shows the use of the proposed policy framework in a security application and discusses through an example various aspects of how the system may compensate unfulfilled obligations. }, } @TechReport{ ISE-TR-02-02, author = { Sanjeev Setia and Sencun Zhu and Sushil Jajodia }, title = { A Scalable and Reliable Key Distribution Protocol for Multicast Group Rekeying }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2002 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-02-02 }, abstract = { Scalable group rekeying is one of the important problems that needs to be addressed in order to support secure communications for large and dynamic groups. One of the challenging issues that arises in scalable group rekeying is the problem of delivering the updated keys to the members of the group in a reliable and timely manner. In this paper, we present a new scalable and reliable key distribution protocol for group key management schemes that use logical key hierarchies for scalable group rekeying. Our protocol, called WKA-BKR, is based upon two key ideas, weighted key assignment and batched key retransmission, both of which exploit the special properties of logical key hierarchies to reduce the overhead and increase the reliability of the key delivery protocol. We have evaluated the performance of our approach using detailed simulations. Our results show that for most network loss scenarios, the bandwidth used by our protocol is lower than that of previously proposed key delivery protocols. }, } @TechReport{ ISE-TR-02-03, author = { Jacob Berlin and Amihai Motro }, title = { TupleRank: Ranking Discovered Content in Virtual Databases }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2002 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-02-03 }, abstract = { Recently, the problem of data integration has been newly addressed by methods based on machine learning and discovery. Such methods are intended to automate, at least in part, the laborious process of information integration, by which existing data sources are incorporated in a virtual database. Essentially, these methods scan new data sources, attempting to discover possible mappings to the virtual database. Like all discovery processes, this process is intrinsically probabilistic; that is, each discovery is associated with a specific value that denotes assurance of its appropriateness. Consequently, the rows in a discovered virtual table have mixed assurance levels, with some rows being more credible than others. In this paper we argue that rows in discovered virtual databases should be ranked, and we describe a ranking method, called TupleRank, for calculating such a ranking order. Roughly speaking, TupleRank calibrates the probabilities calculated during a discovery process with historical information about the performance of the system. The work is done in the framework of the Autoplex system for discovering content for virtual databases, and initial experimentation is reported and discussed. }, } @TechReport{ ISE-TR-02-04, author = { Lingyu Wang and Duminda Wijesekera and Sushil Jajodia }, title = { Cardinality-based Inference Control in Sum-only Data Cubes (Extended Version) }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2002 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-02-04 }, abstract = { This paper deals with the inference problems in data warehouses and decision support systems such as on-line analytical processing (OLAP)systems. Even though OLAP systems restrict user accesses to predefined aggregations, the possibility of inappropriate disclosure of sensitive attribute values still exists. Based on a definition of non- compromiseability to mean that any member of a set of variables satisfying a given set of their aggregates can have more than one value, we derive sufficient conditions for non-compromiseability in sum-only data cubes. Specifically, (1) the non-compromiseability of multi-dimensional aggregates can be reduced to that of one dimensional aggregates, (2) full or dense core cuboids are non-compromiseable, and (3) there is a tight lower bound for the cardinality of a core cuboid to remain non- compromiseable. Based on those conditions, and a three-tiered model for controlling inferences, we provide a divide-and-conquer algorithm that uniformly divides data sets into chunks and builds a data cube on each such chunk. The union of those data cubes are then used to provide users with inference-free OLAP queries. }, } @TechReport{ ISE-TR-02-05, author = { Sanjeev Setia Sencun Zhu and Sushil Jajodia }, title = { A Comparative Performance Analysis of Reliable Group Rekey Transport Protocols for Secure Multicast }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2002 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-02-05 }, abstract = { In this paper, we present a new scalable and reliable key distribution protocol for group key management schemes that use logical key hierarchies for scalable group rekeying. Our protocol, called WKA-BKR, is based upon two ideas -- weighted key assignment and batched key retransmission -- both of which exploit the special properties of logical key hierarchies and the group rekey transport payload to reduce the bandwidth overhead of the reliable key delivery protocol. Using both analytic modeling and simulation, we compare the performance of WKA-BKR with that of other rekey transport protocols, including a recently proposed protocol based on proactive FEC. Our results show that for most network loss scenarios, the bandwidth used by WKA-BKR is lower than that of the other protocols. }, } @TechReport{ ISE-TR-02-06, author = { Sencun Zhu Sanjeev Setia and Sushil Jajodia }, title = { Performance Optimizations for Group Key Management Schemes for Secure Multicast }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2002 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-02-06 }, abstract = { Scalable group rekeying is one of the biggest challenges that need to be addressed to support secure communications for large and dynamic groups. In recent years, many group key management approaches based on the use of logical key trees have been proposed to address this issue. Using logical key trees reduces the complexity of group rekeying operation from O(N) to O(log N), where N is the group size. In this paper, we present two optimizations for logical key tree organizations that utilize information about the characteristics of group members to further reduce the overhead of group rekeying. First, we propose a partitioned key tree organization that exploits the temporal patterns of group member joins and departures to reduce the overhead of rekeying. Using an analytic model, we show that our optimization can achieve up to 31.4% reduction in key server bandwidth overhead over the un-optimized scheme. Second, we propose an approach under which the key tree is organized based on the loss probabilities of group members. Our analysis shows this optimization can reduce the rekeying overhead by up to 12.1%. }, } @TechReport{ ISE-TR-02-07, author = { Ye Wu and Jeff Offutt }, title = { Maintaining Evolving Component-Based Software with UML }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2002 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-02-07 }, abstract = { Component-based software engineering has been increasingly adopted for software development. This approach relies on using reusable components as the building blocks for constructing software. On the one hand, this helps improve software quality and productivity; on the other hand, it necessitates frequent maintenance activities, such as upgrading third party components or adding new features. The cost of maintenance for conventional software can account for as much as two-thirds of the total cost, and it is likely to be more for component-based software. Thus, an effective maintenance technique for component-based software is strongly desired. This paper presents a UML-based technique that attempts to help resolve difficulties introduced by the implementation transparent characteristics of component-based software systems. This technique can also be useful for other maintenance activities. For corrective maintenance activities, the technique starts with UML diagrams that represent changes to a component, and uses them to support regression testing. To accommodate this approach for perfective maintenance activities, more challenges are encountered. We provide a UML-based framework to evaluate the similarities of the old and new components, and corresponding retesting strategies are provided. }, } @TechReport{ ISE-TR-02-08, author = { Ye Wu and Jeff Offutt }, title = { Modeling and Testing Web-based Applications }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2002 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-02-08 }, abstract = { The Internet is quietly becoming the body of the business world, with web applications as the brains. This means that software faults in web applications have potentially disastrous consequences. Most work on web applications has been on making them more powerful, but relatively little has been done to ensure their quality. Important quality attributes for web applications include reliability, availability, interoperability and security. Web applications share some characteristics of client-server, distributed, and traditional programs, however there are a number of novel aspects of web applications. These include the fact that web applications are ``dynamic'', due to factors such as the frequent changes of the application requirement as well as dramatic changes of the web technologies, the fact that the roles of the clients and servers change dynamically, the heterogeneity of the hardware and software components, the extremely loose coupling and dynamic integration, and the ability of the user to directly affect the control of execution. In this paper, we first analyze the distinct features of web-based applications, and then define a generic analysis model that characterizes the typical behaviors of web-based applications independently of different technologies. Based on this analysis, a family of testing techniques is discussed to ensure different level of quality control of web applications under various situations. }, } @TechReport{ ISE-TR-02-09, author = { Sencun Zhu and Shouhuai Xu and Sanjeev Setia and Sushil Jajodia }, title = { LHAP: A Lightweight Hop-by-Hop Authentication Protocol For Ad-Hoc Networks }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2002 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-02-09 }, abstract = { Most ad hoc networks do not implement any network access control, leaving these networks vulnerable to resource consumption attacks where a malicious node injects packets into the network with the goal of depleting the resources of the nodes relaying the packets. To thwart or prevent such attacks, it is necessary to employ authentication mechanisms that ensure that only authorized nodes can inject traffic into the network. In this paper, we present LHAP, a scalable and light-weight authentication protocol for ad hoc networks. LHAP is based on two techniques: (i) hop-by-hop authentication for verifying the authenticity of all the packets transmitted in the network and (ii) one-way key chain and TESLA for packet authentication and for reducing the overhead for establishing trust among nodes. We analyze the security properties of LHAP. Furthermore, our performance analysis shows that LHAP is a very lightweight security protocol. }, } @TechReport{ ISE-TR-01-01, author = { Alexander Brodsky and Victor E. Segal }, title = { Large Constraint Joins and Disjoint Decompositions }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2001 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-01-01 }, abstract = { This paper is devoted to the problem of evaluation of a conjunction of N disjunctions of linear constraints in a multidimensional space (constraint join), which is a common constraint database operation. Existing methods are exponential in N. Here combinatorial geometry methods are applied to establish conditions for a polynomial size of the join output. A polynomial method to compute a join for an arbitrary input is then given. As part of the solution, a new algorithm for convex decomposition of an arbitrary non-convex polyhedron is developed. The H-tree is proposed - a new data structure combining constraint storage and indexing. H-tree and R-tree implementations of the algorithm are given, and analytical considerations regarding the performance are provided. }, } @TechReport{ ISE-TR-01-02, author = { Alessandro D'Atri and Amihai Motro }, title = { VirtuE: Virtual Enterprises for Information Markets }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2001 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-01-02 }, abstract = { An essential part of a modern economy is an information market. In this market, information products are being traded in countless ways. Information is bought, modified, integrated, incorporated into other products and then sold again. Usually, the production of an information product requires the collaboration of several participants. A virtual enterprise is a community of business entities that collaborate on the manufacturing of new products. This collaboration is often ad hoc, for a specific product only, after which the virtual enterprise may dismantle. The virtual enterprise paradigm is particularly appealing for modeling collaborations for manufacturing information products, and in this paper we present a new model, called VirtuE, for modeling such activities. }, } @TechReport{ ISE-TR-01-03, author = { Jeff Offutt }, title = { Generating Test Data From Requirements/Specifications: Phase IV Final Report }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, number = { ISE-TR-01-01 }, abstract = { This paper is devoted to the problem of evaluation of a conjunction of N disjunctions of linear constraints in a multidimensional space (constraint join), which is a common constraint database operation. Existing methods are exponential in N. Here combinatorial geometry methods are applied to establish conditions for a polynomial size of the join output. A polynomial method to compute a join for an arbitrary input is then given. As part of the solution, a new algorithm for convex decomposition of an arbitrary non-convex polyhedron is developed. The H-tree is proposed - a new data structure combining constraint storage and indexing. H-tree and R-tree implementations of the algorithm are given, and analytical considerations regarding the performance are provided. }, } @TechReport{ ISE-TR-01-02, author = { Alessandro D'Atri and Amihai Motro }, title = { VirtuE: Virtual Enterprises for Information Markets }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2001 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-01-02 }, abstract = { An essential part of a modern economy is an information market. In this market, information products are being traded in countless ways. Information is bought, modified, integrated, incorporated into other products and then sold again. Usually, the production of an information product requires the collaboration of several participants. A virtual enterprise is a community of business entities that collaborate on the manufacturing of new products. This collaboration is often ad hoc, for a specific product only, after which the virtual enterprise may dismantle. The virtual enterprise paradigm is particularly appealing for modeling collaborations for manufacturing information products, and in this paper we present a new model, called VirtuE, for modeling such activities. }, } @TechReport{ ISE-TR-01-03, author = { Jeff Offutt }, title = { Generating Test Data From Requirements/Specifications: Phase IV Final Report }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2001 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-01-03 }, abstract = { This report presents results for the Rockwell Collins Inc. sponsored project on generating test data from requirements/specifications, which started January 1, 2000. The purpose of this project is to improve our ability to test software that needs to be highly reliable by developing formal techniques for generating test cases from formal specificational descriptions of the software. Formal specifications give software developers the opportunity to describe exactly what services the software should provide, providing information to software designers in a clear, unambiguous manner. Formal specifications give test engineers information about the expectations of the software in a form that can be automatically manipulated. This Phase IV, 2000 report presents progress on an empirical evaluation of the efficacy of the transition-pair criterion developed in previous years. Transition-pair tests have been developed for the Flight Guidance System (FGS) and they were run on the faulty versions of FGS developed last year. These tests and the results are summarized in this preliminary report. This report also presents progress in our test data generation tool. This tool has been significantly expanded to handle multiple SCR tables, recursively defined tables, event and condition tables, non-boolean variables, and multiple-variable expressions. It is integrated with the Naval Research Laboratory's SCRTool and Rational Software Corporation's Rational Rose tool. }, } @TechReport{ ISE-TR-01-04, author = { Stephen R. Schach and Bo Jin and David R. Wright and Gillian Z. Heller and A. Jefferson Offutt }, title = { Maintainability of the Linux Kernel }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2001 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-01-04 }, abstract = { We have examined 365 versions of Linux. For every version, we counted the number of instances of common (global) coupling between each of the 17 kernel modules and all the other modules in that version of Linux. We found that the number of instances of common coupling grows exponentially with version number. This result is significant at the 99.99% level, and no additional variables are needed to explain this increase. On the other hand, the number of lines of code in each kernel modules grows only linearly with version number. We conclude that, unless Linux is restructured with a bare minimum of common coupling, the dependencies induced by common coupling will, at some future date, make Linux exceedingly hard to maintain without inducing regression faults. }, } @TechReport{ ISE-TR-01-05, author = { Changzhou Wang and X. Sean Wang }, title = { Covering Bitmap with Trapezoids is Hard }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2001 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-01-05 }, abstract = { Each sub-series of a time series can be mapped to a point on a plane whose two coordinates represent the starting time and the ending time, respectively. In certain applications, points with similar properties are likely to be clustered in a special trapezoid shape. To save these points in a compact way, the following optimization problem needs to be studied: find the minimum number of trapezoids which cover a given set of points. This paper formalizes the optimization problem as a decision problem, namely, Bitmap Cover with special Trapezoids (BCT), and proves that BCT is NP-hard. }, } @TechReport{ ISE-TR-01-06, author = { Jacob Berlin and Amihai Motro }, title = { Database Schema Matching Using Machine Learning with Feature Selection }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2001 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-01-06 }, abstract = { Schema matching, the problem of finding mappings between the attributes of two semantically related database schemas, is an important aspect of many database applications such as schema integration, data warehousing, and electronic commerce. Unfortunately, schema matching remains largely a manual, labor-intensive process. Furthermore, the effort required is typically linear in the number of schemas to be matched; the next pair of schemas to match is not any easier than the previous pair. In this paper we describe a system, called Automatch, that uses machine learning techniques to automate schema matching. Based primarily on Bayesian learning, the system acquires probabilistic knowledge from examples that have been provided by domain experts. This knowledge is stored in a knowledge base called the attribute dictionary. When presented with a pair of new schemas that need to be matched (and their corresponding database instances), Automatch uses the attribute dictionary to find an optimal matching. We also report initial results from the Automatch project. }, } @TechReport{ ISE-TR-00-01, author = { Aynur Abdurazik }, title = { Suitability of the UML as an Architecture Description Language with Applications to Testing }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2000 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-00-01 }, abstract = { Increasingly, very high level designs of large software systems are being described by software architectures. A software architecture expresses the overall structure of the system in an abstract, structured way. The Unified Modeling Language (UML) is widely used to express mid- and low-level designs of software, and recent proposals have been made to adapt the UML for use as an architecture design language (ADL). This research is looking into problems associated with creating system-level software tests that are based on architecture descriptions. This paper discusses issues with using the UML as an ADL, and general problems with then using the architecture descriptions as a basis for generating tests. }, } @TechReport{ ISE-TR-00-02, author = { Jeff Offutt }, title = { Generating Test Data From Requirements/Specifications: Phase III Final Report }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2000 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-00-02 }, abstract = { This report presents results for the Rockwell Collins Inc. sponsored project on generating test data from requirements/specifications, which started January 1, 1999. The purpose of this project is to improve our ability to test software that needs to be highly reliable by developing formal techniques for generating test cases from formal specificational descriptions of the software. Formal specifications represent a significant opportunity for testing because they precisely describe what functions the software is supposed to provide in a form that can be easily manipulated by automated means. This Phase III, 1999 report presents results from an empirical evaluation of the full predicate specification-based testing criterion developed during the first two phases of this project, and a proof-of-concept test data generation tool. The evaluation used a comparative study on a large industrial system, a research version of the Flight Guidance Mode Logic System (FGS) provided by Rockwell Collins. Full predicate tests were generated for FGS, and compared against the T-Vec generation scheme. T-Vec tests for FGS were also provided by Rockwell Collins. While creating and running the tests, one problem was found in the SCR specifications for FGS, and one problem was found in the already well tested implementation of FGS. Both T-Vec and the full predicate tests found almost the same number of faults, but T-Vec required more than five times as many tests, thus the full predicate tests were more efficient. The proof-of-concept test data generator creates full predicate and transition-pair tests from an SCR specification. Currently, the tool requires the SCR specification to be a single mode transition table, all variables must be boolean, and the transition predicates must be single-variable expressions. }, } @TechReport{ ISE-TR-00-03, author = { Jacob Berlin and Amihai Motro }, title = { Autoplex: Automated Discovery of Contents for Virtual Databases }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 2000 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-00-01 }, abstract = { Most virtual database systems are suitable for environments in which the set of member information sources is small and stable. Consequently, present virtual database systems do not scale up very well. The main reason is the complexity and cost of incorporating new information sources into the virtual database. In this paper we describe a system, called Autoplex, which uses machine learning techniques for automating the discovery of new content for virtual database systems. Autoplex assumes that several information sources have already been incorporated (``mapped'') into the virtual database system by human experts (as done in standard virtual database systems). Autoplex learns the features of these examples. It then applies this knowledge to new candidate sources, trying to infer views that ``resemble'' the examples. In this paper we report initial results from the Autoplex project. }, } @TechReport{ ISE-TR-99-01, author = { Jeff Offutt }, title = { Generating Test Data From Requirements/Specifications: Phase II Final Report }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1999 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-99-01 }, abstract = { This report presents results for the Rockwell Collins Inc. sponsored project on generating test data from requirements/specifications, which started January 1, 1998. The purpose of this project is to improve our ability to test software that needs to be highly reliable by developing formal techniques for generating test cases from formal specificational descriptions of the software. Formal specifications represent a significant opportunity for testing because they precisely describe what functions the software is supposed to provide in a form that can be easily manipulated by automated means. This Phase II, 1998 report presents results and strategies for practically applying test cases generated according to the criteria presented in the Phase I, 1997 report. This report presents a small empirical evaluation of the test criteria, and algorithms for solving various problems that arise when applying test cases developed from requirements/specifications. One significant problem in specification-based test data generation is that of reaching the proper program state necessary to execute a particular test case. Given a test case that must start in a particular state S, the test case prefix is a sequence of inputs that will put the software into state S. We have addressed this problem in two ways. First is to combine various test cases to be run in test sequences that are ordered in such a way that each test case leaves the software in the state necessary to run the subsequent test case. An algorithm is presented that attempts to find test case sequences that are optimal in the sense that the fewest possible number of test cases are used. To handle situations where it is desired to run each test case independently, an algorithm for directly deriving test sequences is presented. This report also presents procedures for removing redundant test case values, and develops the idea of ``sequence-pair'' testing, which was presented in the 1997 Phase I report, into a more general idea of ``interaction-pair'' testing. }, } @TechReport{ ISE-TR-99-02, author = { Daniel Barbar{\'a} and Xintao Wu }, title = { Using Approximations to Scale Exploratory Data Analysis in Datacubes }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1999 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-99-02 }, abstract = { Exploratory Data Analysis is a widely used technique to determine which factors have the most influence on data values in a multi-way table, or which cells in the table can be considered anomalous with respect to the other cells. In particular, median polish is a simple, yet robust method to perform Exploratory Data Analysis. Median polish is resistant to holes in the table (cells that have no values), but it may require a lot of iterations through the data. This factor makes it difficult to apply to large multidimensional tables, since the I/O requirements may be prohibitive. This paper describes a technique that uses median polish over an approximation of a datacube, easing the burden of I/O. The results obtained are tested for quality, using a variety of measures. The technique scales to large datacubes and proves to give a good approximation of the results that would have been obtained by median polish in the original data. }, } @TechReport{ ISE-TR-99-03, author = { Daniel Barbar{\'a} }, title = { Chaotic Mining: Knowledge Discovery Using the Fractal Dimension }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1999 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-99-03 }, abstract = { A lot of real datasets behave in a fractal fashion, i.e., they show an invariance with respect to the scale used to look at them. Fractal sets are characterized by a family of fractal dimensions, each with a particular interpretation. In this paper we show examples of how the fractal dimension(s) can be used to extract knowledge from datasets. Techniques that use the fractal dimension to detect anomalies in time series, to characterize time patterns of association rules, discover patterns in datacubes and cluster multidimensional datasets are described as part of an on-going research effort. }, } @TechReport{ ISE-TR-99-04, author = { Zoran Duric, Neil F. Johnson, Sushil Jajodia }, title = { Recovering Watermarks from Images }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1999 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-99-04 }, abstract = { Many techniques for watermarking of digital images have appeared recently. Most of these techniques are sensitive to cropping and/or affine distortions (e.g., rotations and scaling). In this paper we describe a method for recognizing images based on the concept of identification mark; the method does not require the use of the original image, it requires only a small number of salient image points. We show that, using our method, it is possible to recognize distorted images and recover their ``original'' appearance. Once the image is recognized we use a second technique based on the normal flow to fine-tune image parameters. The restored image can be used to recover the watermark that had been embedded in the image by its owner. }, } @TechReport{ ISE-TR-99-05, author = { Roger T. Alexander }, title = { Testing the Polymorphic Relationships of Object-Oriented Components }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1999 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-99-05 }, abstract = { As we move from developing procedure-oriented to object-oriented programs, the complexity traditionally found in functions and procedures is moving to the connections among components. More faults occur as components are integrated to form higher level aggregates of behavior and state. Consequently, we need to place more effort on testing the connections among components. Although object-oriented technology provides abstraction mechanisms to build components to integrate, it also adds new compositional relations that can contain faults, which must be found during integration testing. This paper describes new techniques for analyzing and testing the polymorphic relationships that occur in object-oriented software. The application of these techniques can result in an increased ability to find faults and overall higher quality software. }, } @TechReport{ ISE-TR-99-06, author = { Michelle L. Lee }, title = { Change Impact Analysis of Object-oriented Software }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1999 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-99-06 }, abstract = { As the software industry has matured, we have shifted our resources from being devoted todeveloping new software systems to making modifications in evolving software systems. A major problem for developers in an evolutionary environment is that seemingly small changes can ripple throughout the system to cause major unintended impacts elsewhere. As such, software developers need mechanisms to understand how a change to a software system will impact the rest of the system. Although the effects of changes in object-oriented software can be restricted, they are also more subtle and more difficult to detect. Maintaining the current object-oriented systems is more of an art, similar to where we were 15 years ago with procedural systems, than an engineering skill. We are beginning to see ``legacy'' object-oriented systems in industry. A difficult problem is how to maintain these objects in large, complex systems. Although objects are more easily identified and packaged, features such as encapsulation, inheritance, aggregation, polymorphism and dynamic binding can make the ripple effects of object-oriented systems far more difficult to control than in procedural systems. The research presented here addresses the problems of change impact analysis for object-oriented software. Major results of this research include a set of object-oriented data dependency graphs, a set of algorithms that allow software developers to evaluate proposed changes on object-oriented software, a set of object-oriented change impact metrics to evaluate the change impact quantitatively, and a prototype tool, ChaT, to evaluate the algorithms. This research also results in efficient regression testing by helping testers decide what classes and methods need to be retested, and in supporting cost estimation and schedule planning. }, } @TechReport{ ISE-TR-99-07, author = { Philipp Anokhin and Amihai Motro }, title = { Resolving Inconsistencies in the Multiplex Multidatabase System }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1999 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-99-07 }, abstract = { With the development of the Internet and the on-line availability of large numbers of information sources, the problem of integrating multiple heterogeneous information sources requires reexamination, its basic underlying assumptions having changed drastically. Integration methodologies must now contend with situations in which the number of potential data sources is very large, and the set of sources changes continuously. In addition, the ability to create quick, ad-hoc virtual databases for short-lived applications is now considered attractive. Under these new assumptions, a single, complete answer can no longer be guaranteed. It is now possible that a query could not be answered in its entirety, or it might result in several different answers. Multiplex is a multidatabase system designed to operate under these new assumptions. In this paper we describe how Multiplex handles queries that do not have a single, complete answer. The general approach is to define flexible and comprehensive strategies that direct the behavior of the query processing subsystem. These strategies may be defined either as part of the multidatabase design or as part of ad-hoc queries. }, } @TechReport{ ISE-TR-99-08, author = { Daniel Barbar{\'a} and Ping Chen }, title = { Using the Fractal Dimension to Cluster Datasets }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1999 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-99-08 }, abstract = { Clustering is a widely used knowledge discovery technique. It helps uncovering structures in data that were not previously known. Clustering of large datasets has received a lot of attention in recent years. However, clustering is a still a challenging task since many published algorithms fail to do well in scaling with the size of the dataset and the number of dimensions that describe the points, or in finding arbitrary shapes of clusters, or dealing effectively with the presence of noise. In this paper, we present a new clustering algorithm, based in the fractal properties of the datasets. The new algorithm, which we call Fractal Clustering (FC) places points incrementally in the cluster for which the change in the fractal dimension after adding the point is the least. This is a very natural way of clustering points, since points in the same cluster have a great degree of self-similarity among them (and much less self-similarity with respect to points in other clusters). FC requires one scan of the data, is suspendable at will, providing the best answer possible at that point, and is incremental. We show via experiments that FC effectively deals with large datasets, high-dimensionality and noise and is capable of recognizing clusters of arbitrary shape. }, } @TechReport{ ISE-TR-99-09, author = { Aynur Abdurazik and Jeff Offutt }, title = { Generating Test Cases from UML Specifications }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1999 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISE-TR-99-09 }, abstract = { Unified Modeling Language (UML) is a third generation modeling language in object-oriented software engineering. It provides constructs to specify, construct, visualize, and document artifacts of software-intensive systems. This paper presents a technique that uses Offutt's state-based specification test data generation model to generate test cases from UML statecharts. A tool TCGen has been developed to demonstrate this technique with specifications written in Software Cost Reduction (SCR) method and Unified Modeling Language (UML). Experimental results from using this tool are presented. }, } @TechReport{ ISSE-TR-98-02, author = { Jane Hayes and Jeff Offutt }, title = { Input Validation Testing: A Requirements-Driven, System Level, Early Lifecycle Technique }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1998 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-98-02 }, abstract = { Syntax-directed software accepts inputs from the user, constructed and arranged properly, that control the execution of the application. Input validation testing chooses test data that attempt to show the presence or absence of specific faults pertaining to input tolerance. A large amount of syntax-directed software currently exists and will continue to be developed that should be subjected to input validation testing. System level testing techniques that currently address this area are not well developed or formalized. There is a lack of system level testing formal research and accordingly a lack of formal, standard criteria, general purpose techniques, and tools. Systems are large (size and domain) so unit testing techniques have had limited applicability. Input validation testing techniques have not been developed or automated to assist in static input syntax evaluation and test case generation. This paper addresses the problem of statically analyzing input command syntax as defined in interface and requirements specifications and then generating test cases for input validation testing. The IVT (Input Validation Testing) technique has been developed, a proof-of-concept tool (MICASA) has been implemented, and validation has been performed. Empirical validation on actual industrial software (for the Tomahawk Cruise Missile) shows that as compared with senior, experienced testers, MICASA found more requirement specification defects, generated test cases with higher syntactic coverage, and found additional defects. Additionally, the tool performed at significantly less cost. }, } @TechReport{ ISSE-TR-98-03, author = { Daniel Barbar{\'a} and Mark Sullivan }, title = { Quasi-Cubes: A Space-Efficient Way to Support Approximate Multidimensional Databases }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1998 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-98-03 }, abstract = { A data cube is a popular organization for summary data. A cube is simply a multidimensional structure that contains at each point an aggregate value, i.e., the result of applying an aggregate function to an underlying relation. In practical situations, cubes can require a large amount of storage. The typical approach to reducing storage cost is to materialize parts of the cube on demand. Unfortunately, this lazy evaluation can be a time-consuming operation. In this paper, we propose an approximation technique that reduces the storage cost of the cube without incurring the run time cost of lazy evaluation. The idea is to characterize regions of the cube by using statistical models whose description take less space than the data itself. Then, the model parameters can be used to estimate the cube cells with a certain level of accuracy. To increase the accuracy, some of the ``outliers,'' i.e., cells that incur in the largest errors when estimated can be retained. The storage taken by the model parameters and the retained cells, of course, should take a fraction of the space of the full cube and the estimation procedure should be faster than computing the data from the underlying relations. We study three methods for modeling cube regions: linear regression, singular value decomposition and the independence approach. We study the tradeoff between the amount of storage used for the description and the accuracy of the estimation. Experiments show that the errors introduced by the estimation are small even when the description is substantially smaller than the full cube. Since cubes are used to support data analysis and analysts are rarely interested in the precise values of the aggregates (but rather in trends), providing approximate answers is, in most cases, a satisfactory compromise. Our technique allows systems to handle very large cubes that cannot either be fully stored or efficiently materialized on demand. Even for applications that can only accept tiny error in the cube data, our techniques can be used to present successive on-line refinements of the answer to a query so that a coarse picture of the answer can be presented while the remainder of the answer is fetched from disk (i.e., to support on-line aggregation). }, } @TechReport{ ISSE-TR-97-01, author = { Yvo Desmedt and Sushil Jajodia }, title = { Redistributing Secret Shares to New Access Structures and Its Applications }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1997 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-97-01 }, abstract = { Proactive secret sharing deals with refreshing secret shares, i.e., redistributing the shares of a secret to the original access structure. In this paper we focus on the general problem of redistributing shares of a secret key. Shares of a secret have been distributed such that access sets specified in the access structure (e.g., t-out-of-l) can access (or use) the secret. The problem is how to redistribute the secret, without recovering it, in such a way that those specified in the new access structure will be able to recover the secret. We also adapt our scheme such that it can be used in the context of threshold cryptography and discuss its applications to secure databases. }, } @TechReport{ ISSE-TR-97-02, author = { A. Jefferson Offutt and Shaoying Liu }, title = { Generating Test Data from SOFL Specifications }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1997 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-97-02 }, abstract = { Software testing can only be formalized and quantified when a solid basis for test generation can be defined. Bases that are commonly used include the source code, control flow graphs, design representations, and specifications/requirements. Formal specifications represent a significant opportunity for testing because they precisely describe what functions the software is supposed to provide in a form that can be easily manipulated. In this paper, we present a new method for generating tests from formal specifications. This method is comprehensive in specification coverage, applies at several levels of abstraction, and can be highly automated. We apply our method to SOFL specifications, describe the technique, and demonstrate the application on a case study. A preliminary evaluation using a code-level coverage criterion (mutation testing), indicates that the method can result in very effective tests. }, } @TechReport{ ISSE-TR-96-01, author = { A. Jefferson Offutt and Jeffrey M. Voas }, title = { Subsumption of Condition Coverage Techniques by Mutation Testing }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1996 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-96-01 }, abstract = { Condition coverage testing is a family of testing techniques that are based on the logical flow of control through a program. The condition coverage techniques include a variety of requirements, including that each statement in the program is executed and that each branch is executed. Mutation testing is a fault-based testing technique that is widely considered to be very powerful, and that imposes requirements on testing that include, and go beyond, many other techniques. In this paper, we consider the six common condition coverage techniques, and formally show that these techniques are subsumed by mutation testing, in the sense that if mutation testing is satisfied, then the condition coverage techniques are also satisfied. The fact that condition coverage techniques are subsumed by mutation has immediate practical significance because the extensive research that has already been done for mutation can be used to support condition coverage techniques, including automated tools for performing mutation testing and generating test cases. }, } @TechReport{ ISSE-TR-96-02, author = { Li Li and A. Jefferson Offutt }, title = { Algorithms for Change Impact Analysis of Object-oriented Software }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1996 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-96-02 }, abstract = { As the software industry has matured, we have shifted from almost exclusively devoting our resources to the development of new software systems to devoting large portions of our resources to making modifications to evolving software systems. A major problem for developers in an evolutionary environment is that seemingly small changes in one place can ripple throughout the system to have major unintended impacts elsewhere, thus, software developers need mechanisms to understand what impact a change to a software system will have on the rest of the system. With object-oriented software, the impacts of changes are restricted (largely because of encapsulation), but are also more subtle and harder to determine. This paper presents algorithms to analyze potential impacts of changes to object-oriented software, taking into account encapsulation, inheritance, and polymorphism. This technique allows software developers to perform ``what if'' analysis on the impacts of proposed changes, and thereby choose the change that has the least impact on the rest of the system. The analysis also offers valuable information to regression testing, by suggesting what classes and methods need to be retested, and to project managers, who can use the results for cost estimation and schedule planning. }, } @TechReport{ ISSE-TR-96-03, author = { H. Gomaa and L. Kerschberg and V. Sugumaran and C. Bosch and I. Tavakoli }, title = { A Prototype Domain Modeling Environment for Reusable Software Architectures }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1996 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-96-03 }, abstract = { Contact authors for abstract and paper }, } @TechReport{ ISSE-TR-96-04, author = { Hassan Gomaa and Larry Kerschberg and Ghulam Ahmad Farrukh and Rajan Girish Mohan }, title = { Domain Modeling of the Spiral Process Model }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1996 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-96-04 }, abstract = { Contact authors for abstract and paper }, } @TechReport{ ISSE-TR-96-05, author = { Larry Kerschebrg and Hassan Gomaa and Rajan Girish Mohan and Ghulam Ahmad Farrukh }, title = { {PROGEN}: A Knowledge-based System for Process Model Generation, Tailoring and Reuse }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1996 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-96-05 }, abstract = { Contact authors for abstract and paper }, } @TechReport{ ISSE-TR-96-06, author = { Claudio Bettini and X. Sean Wang and Sushil Jajodia and Jia-Ling Lin }, title = { Discovering Temporal Relationships with Multiple Granularities in Time Sequences }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1996 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-96-06 }, abstract = { An important usage of time sequences is to discover temporal patterns. The discovery process usually starts with a user-specified skeleton, called an ``event structure'', which consists of a number of variables representing events and temporal constraints among these variables; and the goal of the discovery is to find temporal patterns, i.e., instantiations of the variables in the structure, which frequently appear in the time sequence. This paper introduces event structures that have temporal constraints with multiple granularities, defines the pattern discovery problem with these structures, and studies effective algorithms to solve it. The basic components of the algorithms include timed automata with granularities (TAGs) and a number of heuristics. The TAGs are for testing whether a specific temporal pattern, called a ``candidate complex event type'', appears frequently in a time sequence. Since there are often a huge number of candidate event types for a usual event structure, heuristics are presented aiming at reducing the number of candidate event types and reducing the time spent by the TAGs to test whether a candidate type does appear frequently in the sequence. These heuristics exploit the information provided by explicit and implicit temporal constraints with granularity in the given event structure. The paper also gives the results of an experiment to show the effectiveness of the heuristics on a real data set. }, } @TechReport{ ISSE-TR-96-07, author = { Alexander Brodsky and Victor E. Segal and Pavel A. Exarkhopoulo }, title = { The C3 Constraints Object-Oriented Database System }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1996 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-96-02 }, abstract = { Constraints provide a flexible and uniform way to conceptually represent diverse data capturing spatio-temporal behavior, complex modeling requirements, partial and incomplete information etc, and have been used in a wide variety of application domains. Constraint databases have recently emerged to deeply integrate data captured by constraints in databases. This paper reports on the development of the first constraint object-oriented database system, Ccube, and describes its specification, design and implementation. The Ccube system is designed to be used for both implementation and optimization of high-level constraint object-oriented query languages such as LyriC or constraint extensions of OQL, and for directly building software systems requiring extensible use of constraint database features. The Ccube data manipulation language, Constraint Comprehension Calculus, is an integration constraint calculus for extensible constraint domains within monoid comprehensions, which serve as an optimization-level language for object-oriented queries. The data model for constraint calculus is based on constraint spatio-temporal (CST) objects that may hold spatial, temporal or constraint data, conceptually represented by constraints. New CST objects are constructed, manipulated and queried by means of constraint calculus. The model for monoid comprehensions, in turn, is based on the notion of monoids, which is a generalization of collection and aggregation types to structures over which one can iterate and apply merge operator; this includes disjunctions and conjunctions of constraints. The focal point of our work is achieving the right balance between expressiveness, complexity and representation usefulness, without which the practical use of the system would not be possible. To that end, C3 constraint calculus guarantees polynomial time data complexity, and, furthermore, is tightly integrated with monoid comprehensions to allow deep global optimization. }, } @TechReport{ ISSE-TR-96-08, author = { Li Li and Jeff Offutt }, title = { Applying Logic-based Database to Impact Analysis of Object-oriented Software }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1996 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-96-08 }, abstract = { As the software industry has matured, we have shifted our resources, once devoted to developing new software systems, to making modifications in evolving software systems. A major problem for developers in an evolutionary environment is that seemingly small changes can ripple throughout the system to have major unintended impacts elsewhere. As a result, software developers need mechanisms to understand how a change to a software system will affect the rest of the system. Although the effects of changes in object-oriented are restricted, they are also more subtle and more difficult to detect. The technique presented in this paper allows software developers to perform ``what if'' analysis on the effect of proposed changes, and thereby choose the change that has the least influence on the rest of the system. The analysis also adds valuable information to regression testing, by suggesting what classes and methods need to be re-tested, and to project managers, who can use the results for cost estimation and schedule planning. This paper presents algorithms to analyze the potential impacts of changes to object-oriented software, taking into account encapsulation, inheritance, and polymorphism, and implements the algorithms using datalog, a deductive database model. Using the datalog makes the implementation of the algorithms relatively easier. It also expands the range of the impact analysis of the object-oriented system to any interesting questions that can be expressed by the datalog query. Contact authors for paper }, } @TechReport{ ISSE-TR-96-09, author = { A. Jefferson Offutt and Jeff Voas and Jeff Payne }, title = { Mutation Operators for Ada }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1996 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-96-09 }, abstract = { Mutation analysis is a method for testing software. It provides a method for assessing the adequacy of test data. This report describes the mutation operators defined for the Ada programming language. The mutation operators are categorized using syntactic criteria, in a form suitable for an implementor of a mutation-based system, or a tester wishing to understand how mutation analysis can be used to test Ada programs. Each mutation operator is carefully defined, and when appropriate, implementation notes and suggestions are provided. We include operators for all syntactic elements of Ada, including exception handling, generics, and tasking. A summary table listing all operators for Ada, and compared with C and Fortran operators is also provided. The design described here is the result of deliberations among the authors in which all aspects of the Ada language and software development in Ada were considered. These operators can also be viewed as the culmination of previous mutation operator definitions for other languages. This report is intended to serve as a manual for the Ada mutation operators. }, } @TechReport{ ISSE-TR-96-10, author = { Claudio Bettini and X. Sean Wang and Sushil Jajodia }, title = { A General Framework for Time Granularity and Its Application to Temporal Reasoning }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1996 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-96-10 }, abstract = { This paper presents a general framework to define time granularity systems. We identify the main dimensions along which different systems can be characterized, and investigate the formal relationships among granularities in these systems. The paper also introduces the notion of a network of temporal constraints with (multiple) granularities emphasizing the semantic and computational differences from constraint networks with a single granularity. Consistency of networks with multiple granularities is shown to be NP-hard in general and approximate solutions for this problem and for the ``minimal network'' problem are proposed. }, } @TechReport{ ISSE-TR-95-00, author = { Zhenyi Jin and A. Jefferson Offutt }, title = { Integration Testing Based on Software Couplings }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1995 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-95-00 }, abstract = { Integration testing is an important part of the testing process, but few integration testing techniques have been systematically studied or defined. This paper presents an integration testing technique based on couplings between software components. The coupling-based testing technique is described, and 12 coverage criteria are defined. The coupling-based technique was compared with the category-partition method. Results shows that the coupling-based technique detected more faults, and used fewer test cases than category-partition on a subject program. This modest result indicates that the coupling-based testing approach can benefit practitioners who are performing integration testing on software. }, } @TechReport{ ISSE-TR-95-01, author = { Fang Chen and Ravi S. Sandhu }, title = { Multilevel Relational (MLR) Data Model }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1995 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-95-01 }, abstract = { Many multilevel data models have been proposed, and different models have different merits. In this paper, we unify many of these merits to build a new multilevel data model as a natural extension of the traditional relational data model. We describe our new model called the Multilevel Relational (MLR) data model for multilevel relations with element-level labeling. The MLR model builds upon prior work of numerous authors in this area, and integrates ideas from a number of sources. The new {\em data-based} semantics is given to the MLR data model which combines ideas from SeaView, belief-based semantics and LDV model, and has the advantages of both eliminating ambiguity and retaining upward information flow. The model is simple, unambiguous and powerful. It has five integrity properties and five operation statements for manipulating multilevel relations. In order to support this integration, several new concepts are introduced and many old ones are redefined. A central contribution of this paper is proofs of soundness, completeness and security of the MLR data model. These proofs respectively show that any of the provided operation statements can keep database state legal (i.e., satisfying all integrity properties), every legal database state can be constructed, and the MLR data model is non-interfering. This is the first time that such properties have been formally proved for a multilevel relational data model. The expressive power of the MLR model is also discussed in this paper, and is compared with several other models. }, } @TechReport{ ISSE-TR-95-02, author = { Alisa Irvine and A. Jefferson Offutt }, title = { The Effectiveness of Category-Partition Testing of Object-Oriented Software }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1995 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-95-02 }, abstract = { When migrating from conventional to object-oriented programming, developers face difficult decisions in modifying their development process to best use the new technology. In particular, ensuring that the software is highly reliable in this new environment poses different challenges. Developers need understanding of effective ways to test the software. This paper presents empirical data that show that the existing technique of category-partition testing can effectively find faults in object-oriented software, and new techniques are not necessarily needed. For this study, we identified types of faults that are common to C++ software and inserted faults of these types into two C++ programs. Test cases generated using the category-partition method were used to test the programs. A fault was considered detected if it caused the program to terminate abnormally or if the output was different from the output of the original program. The results show that the combination of the category-partition method and a tool for detecting memory management faults may be effective for testing C++ programs in general. Since there is no evidence that traditional techniques are not effective, software developers may not need new testing methods when migrating to object-oriented development. }, } @TechReport{ ISSE-TR-95-03, author = { Amihai Motro }, title = { Multiplex: A Formal Model for Multidatabases and Its Implementation }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1995 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-95-03 }, abstract = { The integration of information from multiple databases has been an enduring subject of research for almost 20 years, and many different solutions have been attempted or proposed. Missing from this research has been a uniform framework. Usually, each solution develops its own ad-hoc framework, designed to address the particular aspects of the problem that are being attacked and the particular methodology that is being used. To address this situation, in this paper we define a formal model for multidatabases, which we call Multiplex. Mulitplex is a simple extension of the relational model, which may serve as a uniform abstraction for many previous ad-hoc solutions. Multiplex is based on formal assumptions of integrability, which distinguish between scheme and instance reconcilability among independent databases. Multiplex supports database heterogeneity, and it provides several degrees of freedom that allow it to model actual situations encountered in multidatabase applications. In addition, in situations in which a single answer is not obtainable (either because the global query is not answerable, or there are multiple candidate answers), Multiplex defines approximative answers. Finally, Multiplex provides a practical platform for implementation. A prototype of such an implementation is described briefly. }, } @TechReport{ ISSE-TR-95-04, author = { Alexander Brodsky and Amihai Motro }, title = { The Problem of Optimal Approximations of Queries Using Views and Its Applications }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1995 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-95-04 }, abstract = { In several recent works a common scenario is described: a relational database scheme R and a set of views V of this scheme are defined, and queries on the database scheme R must be transformed to equivalent queries on the views V. Given a query Q on R, the first problem is whether there exists a query Q_V expressed exclusively on V which is equivalent to Q, and, if it exists, how to find it. Clearly, an equivalent Q_V may not always exist. In such a case, it is important to approximate Q as best as possible with a query on V. The problems here are to define approximations, to determine whether a best approximation exists, whether it is unique, and how to find it. In this paper we formalize and answer these questions. Specifically, we show that the following problems are decidable for a conjunctive query Q and a set of conjunctive views V: (1) is there a conjunctive query Q_V on V that is equivalent to Q, and (2) is there a union Q_U of conjunctive queries on V that is equivalent to Q? Furthermore, we show that Q_V and Q_U are effectively computable if they exist. Moreover, the greatest lower bound of Q exists and is unique up to equivalence. This is done by introducing lower bound classifications, and proving their existence, finiteness and effective computability. This problem has many interesting applications, including physical database design (aimed at optimization of query processing), cooperative answering (where answers are annotated with some of their properties), and multidatabase query decomposition. We examine these applications, and we show how our work extends earlier results obtained in these applications. }, } @TechReport{ ISSE-TR-95-05, author = { Claudio Bettini and X. Sean Wang and Sushil Jajodia }, title = { Temporal Semantic Assumptions and Their Use in Database Query Evaluation }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1995 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-95-05 }, abstract = { Temporal data explicitly stored in a temporal database are often associated with certain semantic assumptions. Each assumption can be viewed as a way of deriving implicit information from the explicitly stored data. Rather than leaving the task of deriving (possibly infinite) implicit data to application programs, as is the case currently, it is desirable that this be handled by the database management systems. To achieve this, this paper formalizes and studies two types of semantic assumptions: point-based and interval-based. The point-based assumptions include those assumptions that use interpolation methods, while the interval-based assumptions include those that involve different temporal types (time granularities). In order to incorporate semantic assumptions into query evaluation, this paper introduces a translation procedure that converts a user query into a system query such that the answer of this system query over the explicit data is the same as that of the user query over the explicit AND the implicit data. The paper also investigates the finiteness (safety) of user queries and system queries. }, }, } @TechReport{ ISSE-TR-95-06, author = { Paul Ammann and Sushil Jajodia and Indrakshi Ray }, title = { Using Formal Methods To Reason About Semantics-Based Decompositions Of Transactions }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1995 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-95-06 }, abstract = { Many researchers have investigated the process of decomposing transactions into smaller pieces to increase concurrency. The research typically focuses on implementing a decomposition supplied by the database application developer, with relatively little attention to what constitutes a desirable decomposition and how the developer should obtain such a decomposition. In this paper, we argue that the decomposition process itself warrants attention. A decomposition generates a set of proof obligations that must be satisfied to show that a particular decomposition correctly models the original collection of transactions. We introduce the notion of semantic histories to formulate and prove the necessary properties. Since the decomposition impacts not only the atomicity of transactions, but isolation and consistency as well, we present a technique based on formal methods that allows these properties to be surrendered in a carefully controlled manner. Note: The text of this paper appears in VLDB '95, Proceedings of the Twenty-First International Conference on Very Large Databases, Zurich, Switzerland, September, 1995. In addition, the appendices of this report contain proofs that space constraints precluded from the VLDB paper. }, } @TechReport{ ISSE-TR-95-07, author = { Daniel A. Menasc{\'e} and Hassan Gomaa and Larry Kerschberg }, title = { A Performance Oriented Design Methodology for Large-Scale Distributed Data Intensive Information Systems }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1995 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-95-07 }, abstract = { The Earth Observing System (EOS) Data and Information System (EOSDIS) is perhaps one of the most important examples of large-scale, geographically distributed, and data intensive systems. Designing such systems in a way that guarantees that the resulting design will satisfy all functional and performance requirements is not a trivial task. This paper presents a performance-oriented methodology to design large-scale distributed data intensive information systems. The methodology is then applied to the design of EOSDIS Core System (ECS). Performance results, based on queuing network models of ECS are also presented. }, } SEANSEAN @TechReport{ ISSE-TR-95-08, author = { Hassan Gomaa and Larry Kerschberg and Daniel A. Menasc{\'e} }, title = { A Software Architectural Design Method for Large-Scale Distributed Data Intensive Information Systems }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1995 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-95-08 }, abstract = { This paper describes a Software Architectural Design Method for Large-Scale Distributed Data Intensive Information Systems. The method starts by developing a domain model of the system. The domain model is then mapped to a federated client/server software architecture, where the servers need to cooperate with each other to service client requests. To demonstrate the viability of the architecture, specific user scenarios are applied to the federated client / server software architecture using event sequence diagrams. The method is illustrated by applying it to the design of a complex software system, the Earth Observing System Data and Information System (EOSDIS) Core System. }, } @TechReport{ ISSE-TR-95-09, author = { Larry Kerschberg and Hassan Gomaa and Daniel A. Menasc{\'e} }, title = { Data and Information Architectures for Large-Scale Distributed Data Intensive Information Systems }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1995 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-95-09 }, abstract = { No abstract or online report available. }, } @TechReport{ ISSE-TR-95-10, author = { Jeff Offutt and Jane Hayes }, title = { A Semantic Model of Program Faults }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1995 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-95-10 }, abstract = { Program faults are artifacts that are widely studied, but there are many aspects of faults that we still do not understand. In addition to the simple fact that one important goal during testing is to detect faults, a full understanding of the characteristics of faults is crucial to several research areas in testing. These include fault-based testing, testability, mutation testing, and the comparative evaluation of testing strategies. In this preliminary paper, we explore the fundamental nature of faults by looking at the differences between a syntactic and semantic characterization of faults. We offer definitions of these characteristics and explore the differentiation. Specifically, we discuss the concept of ``size'' of program faults --- the measurement of size provides interesting and useful distinctions between the syntactic and semantic characterization of faults. We use the fault size observations to make several predictions about testing and present data that supports this model. We also use the model to offer explanations about several questions that have intrigued testing researchers. }, } @TechReport{ ISSE-TR-95-11, author = { Zhenyi Jin }, title = { Finding Mode Invariants in SCR Specifications }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1995 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-95-11 }, abstract = { This paper presents an algorithm to derive modeclass invariants from SCR specifications. Matrices are formed to help to find the invariants. A case study of a cruise control system shows that the algorithm is capable of determining the same invariants that were proved via model checking in earlier investigations. This algorithm provides a mechanical procedure that are much easier and clearer for computing invariants than computing from reachability graphs. It can be easily automated and handle big systems. It views the SCR specifications from the SCR structure's point of view and analyzes mode invariants as structural properties. It presents a new way to analyze software requirements documents and detects implicitly implied system property information that can be very useful and important in understanding, developing, and testing the software system. Related issues such as generating test cases that are independent of the design structure based on the SCR specifications are discussed to reveal possible research problems and directions in the future. }, } @TechReport{ ISSE-TR-95-12, author = { Zhenyi Jin and A. Jefferson Offutt }, title = { Integrating Testing with the Software Development Process }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1995 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-95-12 }, abstract = { Software testing is usually postponed to the end of the development, after coding has started, or even after it has ended. By waiting until this late in the process, testing winds up being compressed -- there are not enough resources (time and budget) remaining, problems with previous stages have been solved by taking time and dollars from testing, and there is insufficient time to adequately plan for testing. In this paper, we present an integrated approach to testing software, where testing activities begin as soon as development activities begin, and are carried out in parallel with the development activities. We present specific activities -- including planning, active testing, and development influencing activities -- that are associated with each of the traditional lifecycle phases. These activities can be carried out by the developers or separate test engineers, and can be associated with development activities within the confines of any development process. These activities allows the tester to detect and prevent throughout the software development process, leading to more reliable and higher quality software. }, } @TechReport{ ISSE-TR-95-13, author = { Elisa Bertino and Sushil Jajodia and Luigi Mancini and Indrajit Ray }, title = { Advanced Transaction Processing in Multilevel Secure File Stores }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1995 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-95-13 }, abstract = { The concurrency control requirements for transaction processing in a multilevel secure file system are different from those in conventional transaction processing systems; in particular, there is the need to coordinate transactions at different security levels avoiding both potential timing covert channels and the starvation of transactions at high security levels. Suppose a transaction at a low security level attempts to write a data item that is being read by a transaction at a higher security level. On the one hand, a timing covert channel arises if the transaction at the low security level is either delayed or aborted by the scheduler. On the other hand, the transaction at the high security level may be subjected to an indefinite delay if it is forced to abort repeatedly. This paper extends the two-phase locking mechanism that is suitable for conventional transaction processing systems, to multilevel secure data management systems. The scheme presented here, avoids the abort of higher level transactions, nonetheless guaranteeing serializability. The system programmer is provided with a powerful set of linguistic constructs which supports exception handling, partial rollback and forward recovery. The proper use of these constructs can prevent the indefinite delay in completion of a high level transaction, and allows the system programmer to trade off starvation with transaction isolation. }, } @TechReport{ ISSE-TR-95-14, author = { Paul Ammann and Sushil Jajodia and Indrakshi Ray }, title = { Applying Formal Methods to Semantic-Based Decomposition of Transactions }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1995 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-95-14 }, abstract = { In some important database applications, performance requirements are not satisfied by the traditional approach of serializability, in which transactions appear to execute atomically and in isolation on a consistent database state. Although many researchers have investigated the process of decomposing transactions into steps to increase concurrency, the focus of the research is typically on implementing a decomposition supplied by the database application developer, with relatively little attention to what constitutes a desirable decomposition and how the developer should obtain such a decomposition. In this paper, we focus on the decomposition process itself. A decomposition generates a set of proof obligations that must be satisfied to show that the decomposition correctly models the original collection of transactions. We introduce the notion of semantic histories to formulate and prove the necessary properties, and the notion of successor sets to describe efficiently the correct interleavings of steps. The successor set constraints use information about conflicts between steps so as to take full advantage of conflict serializability at the level of steps. We implement the resulting, verified decomposition in a two-phase locking environment. }, } @TechReport{ ISSE-TR-95-15, author = { Alessandro D'Atri and Amihai Motro and Laura Tarantino }, title = { ViewFinder: An Object-Oriented Browser }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1995 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-95-15 }, abstract = { ViewFinder is a graphical tool for browsing in databases that provides a flexible, yet intuitive environment for exploratory searches. The design approach has been to provide maximum functionality and generality without sacrificing simplicity. The constructs of ViewFinder's external model are essentially object-oriented: class and token objects, membership relationships between tokens and classes, generalization relationships between classes, inheritance, and so on. This external model is based on an internal model which resembles a semantic network. Such a network may be extracted from a variety of data models, including object-oriented, entity-relationship and extended relational models. This architecture gives ViewFinder a large degree of model independence. The main construct of the external model are displays of objects (either classes or tokens), called views. Commands are available for efficient traversal of the database, displaying views of any class or token. To speed up repetitive searches, views may be synchronized: the user sets up several views, linked in a tree-like structure, so that when the information displayed in the root view is modified (e.g., scrolled by the user), the contents of the other views change automatically. Additional commands are available to search, order, aggregate and select the information displayed in a view, thus providing a simple query facility. }, } @TechReport{ ISSE-TR-94-00, author = { Jeff Offutt and Ammei Lee and Gregg Rothermel and Roland Untch and Christian Zapf }, title = { An Experimental Determination of Sufficient Mutation Operators }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1994 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-94-00 }, abstract = { Mutation testing is a technique for unit testing software that, although powerful, is computationally expensive. The principal expense of mutation is that many variants of the test program, called mutants, must be repeatedly executed. Selective mutation is a way to reduce the cost of mutation testing by reducing the number of mutants that must be executed. This paper reports experimental results that compare selective mutation testing to standard, or non-selective, mutation testing. The results support the hypothesis that selective mutation is almost as strong as non-selective mutation; in experimental trials selective mutation provides almost the same coverage as non-selective mutation, with a four-fold or more reduction in cost. }, } @TechReport{ ISSE-TR-94-01, author = { Ravi S. Sandhu and Srinivas Ganta }, title = { On the Expressive Power of the Unary Transformation Model }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1994 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-94-01 }, abstract = { The Transformation Model (TRM) was recently introduced in the literature by Sandhu and Ganta. TRM is based on the concept of {\em transformation of rights}. The propagation of access rights in TRM is authorized entirely by existing rights for the object in question. It has been demonstrated in earlier work that TRM is useful for expressing various kinds of consistency, confidentiality, and integrity controls. In our previous work a special case of TRM named Binary Transformation Model (BTRM) was defined. We proved that BTRM is equivalent in expressive power to TRM. This result indicates that it suffices to allow testing for only two cells of the matrix. In this paper we study the relationship between TRM and the Unary Transformation Model (UTRM). In UTRM, individual commands are restricted to testing for only one cell of the matrix (whereas individual TRM commands can test for multiple cells of the matrix). Contrary to our initial conjecture, we found, quite surprisingly, that TRM and UTRM are formally equivalent in terms of expressive power. The implications of this result on safety analysis is also discussed in this paper. The construction used to prove the equivalence of TRM and UTRM also indicates that they might not be practically equivalent. }, } @TechReport{ ISSE-TR-94-02, author = { Bo Sanden }, title = { A Graduate Course in Object-Oriented Analysis Based on Student-Generated Projects }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1994 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-94-02 }, abstract = { This paper describes the experience from several offerings of a Software Requirements course based on the Object Modeling Technique by Rumbaugh et al. The paper describes the organization of the course, which includes a semester project carried out by groups of 3-4 students. Descriptions of 20 such projects are given, most of which were based on ideas from the student teams. }, } @TechReport{ ISSE-TR-94-03, author = { Jeffrey R. Carter and Bo I. Sanden }, title = { Entity-Life Modeling and Ada 9X }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1994 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-94-03 }, abstract = { Entity-life modeling (ELM) is a design approach for reactive systems. ELM designs are usually implemented in Ada and rely on its packaging and tasking facilities. An ELM solution of a Flexible Manufacturing System (FMS) has been published with an implementation in Ada 83. Ada 83 has a number of shortcomings for reactive systems, many of which are addressed by the proposed revision, Ada 9X. This paper examines the implementation of the FMS in Ada 9X and compares it to the earlier implementation. Although the FMS problem was conceived independently from the Ada 9X effort, the Ada 83 implementation is virtually a catalog of problems that are elegantly addressed by Ada 9X. This indicates the importance of the 9X revision to make Ada useful in reactive, concurrent applications. }, } @TechReport{ ISSE-TR-94-04, author = { Bo Sanden and Anhtuan Dinh }, title = { Object-Oriented Analysis for Control Systems with Shared Resources }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1994 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-94-04 }, abstract = { This paper proposes a modification of existing approaches to object-oriented analysis (particularly the Object Modeling Technique, OMT) to address the problem of resource sharing in control systems. In control- system analysis, it is important to identify certain objects as the resource users and other as the (shared) resources. Once this has been done, the system can often be made deadlock free simply by ensuring that resource users always acquire shared resources in the same order. Resource users can easily be high-lighted in the object model, while events and actions associated with resource allocation can be shown in the dynamic model. A flexible manufacturing system (FMS) is given as a non-trivial example. In that example, jobs need simultaneous exclusive access to various devices. }, } @TechReport{ ISSE-TR-94-05, author = { Jeff Offutt and Jie Pan and Kanupriya Tewary and Tong Zhang }, title = { Experiments with Data Flow and Mutation Testing }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1994 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-94-05 }, abstract = { This paper presents two experimental comparisons of data flow and mutation testing. These two techniques are widely considered to be effective for unit-level software testing, but can only be analytically compared to a limited extent. We compare the techniques by evaluating the effectiveness of test data developed for each. For a number of programs, we develop ten independent sets of test data; five to satisfy the mutation criterion, and five to satisfy the all-uses data flow criterion. These test sets are developed using automated tools, in a manner consistent with the way a test engineer would apply these testing techniques. We use these test sets in two separate experiments. First we apply a ``cross scoring'', by measuring the effectiveness of the test data that was developed for one technique in terms of the other technique. Second, we investigate the ability of the test sets to find faults. We place a number of faults into each of our subject programs, and measure the number of faults that are detected by the test sets. Our results indicate that while both techniques are effective, mutation-adequate test sets are closer to satisfying data flow, and detect more faults. }, } @TechReport{ ISSE-TR-94-06, author = { Paul Ammann }, title = { A Safety Kernel for Traffic Light Control }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1994 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-94-06 }, abstract = { The success of kernels for enforcing security has led to proposals to use kernels for enforcing safety. As a feasibility demonstration of one such proposal, we describe a kernel for traffic light control. We argue the utility of the kernel and state the safety properties the kernel must ensure. We give a formal specification of the kernel and show that the specification maintains the safety properties. We explain an implementation of the kernel in Ada and discuss use of the kernel. }, } @TechReport{ ISSE-TR-94-07, author = { X. Sean Wang }, title = { Algebraic Query Languages on Temporal Databases with Multiple Time Granularities }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1994 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-94-07 }, abstract = { This paper investigates algebraic query languages on temporal databases. The data model used is a multidimensional extension of the temporal modules introduced in [1]. In a multidimensional temporal module, every non-temporal fact has a timestamp that is a set of n-ary tuples of time points. A temporal module has a set of timestamped facts and has an associated temporal granularity (or temporal type), and a temporal database is a set of multidimensional temporal modules with possibly different temporal types. Temporal algebras are proposed on this database model. Example queries and results of the paper show that the algebras are rather expressive. The operations of the algebras are organized into two groups: snapshot-wise operations and timestamp operations. Snapshot-wise operations are extensions of the traditional relational algebra operations, while timestamp operations are extensions of first-order mappings from timestamps to timestamps. Multiple temporal types are only dealt with by these timestamp operations. Hierarchies of algebras are defined in terms of the dimensions of the temporal modules in the intermediate results. The symbol \(Talg_k^{m,n}\) is used to denote all the algebra queries whose input, output and intermediate modules are of dimensions at most m, n and k, respectively. (Most temporal algebras proposed in the literature are in \(Talg_1^{1,1}\).) Equivalent hierarchies \(Tcalc_k^{m,n}\) are defined in a calculus query language that is formulated by using a first-order logic with linear order. The addition of aggregation functions into the algebras is also studied.}, } @TechReport{ ISSE-TR-94-08, author = { F. G. Patterson Jr. and Jean-Jacques Moortgat }, title = { An Application of Entity Life Modeling to Incremental Re-engineering of Fortran Reactive Program Components }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1994 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-94-08 }, abstract = { This paper describes a case study in which the Entity Life Modeling (ELM) technique is used to reconstruct a single module from a legacy FORTRAN application. The architecture of the module changed as a result of entity identification and mode analysis. Concurrent aspects of the problem domain were identified and modeled as concurrent tasks. To interface with the greater part of the software system that was not reconstructed, an interface object was created. The interface object may be regarded as scaffolding that will be discarded when neighboring increments of the system are reengineered. The reengineered increment has a highly maintainable design that is characteristic of complete systems constructed using the ELM methodology. }, } @TechReport{ ISSE-TR-94-09, author = { Jeff Offutt and Jie Pan }, title = { Using Constraints to Detect Equivalent Mutantsr }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1994 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-94-09 }, abstract = { Mutation testing is a software testing technique that is considered to be very powerful, but is very expensive to apply. One open problem in mutation testing is how to automatically detect equivalent mutant programs. Currently, equivalent mutants are detected by hand, which makes it a very expensive and time-consuming process, and restricts the use of mutation testing. This paper presents a technique that uses mathematical constraints to automatically detect equivalent mutants. A tool Equivalencer has been developed to demonstrate this technique, and experimental results from using this tool are presented. }, } @TechReport{ ISSE-TR-94-10, author = { Jeff Offutt and Zhenyi Jin and Jie Pan }, title = { The Dynamic-domain Reduction Approach to Test Data Generation: Design and Algorithms }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1994 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-94-10 }, abstract = { Although there are many techniques and tools available to support the software testing process, one of the most crucial parts of testing, generating the test data, is usually done by hand. Test data generation is one of the most technically challenging steps in testing software, but unfortunately, most practical systems do not incorporate any automation in this step. This paper presents a new method for automatically generating test data that incorporates ideas from symbolic evaluation, constraint-based testing, and dynamic test data generation. It takes an initial set of values for each input, and ``pushes'' the values through the control-flow graph of the program in a dynamic way, modifying the sets of values as branches in the program are taken. This method is an outgrowth of previous research in constraint-based testing, and combines several of the steps that were previously separate into one coherent process. The major difference is that this method is dynamic, and resolves path constraints immediately; it also includes an intelligent search technique, and improved handling of arrays, loops, and pointers. }, } @TechReport{ ISSE-TR-94-11, author = { X. Sean Wang and Claudio Bettini and Alexander Brodsky and Sushil Jajodia }, title = { Logic Design for Temporals Database with Multiple Temporal Types }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1994 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-94-11 }, abstract = { The purpose of good database logical design is to eliminate data redundancy and insertion and deletion anomalies. In order to achieve this objective for temporal databases, the notions of temporal types and temporal functional dependencies (TFDs) are introduced. A temporal type is a monotonic mapping from ticks of time (represented by positive integers) to time sets (represented by subsets of reals) and is used to capture various standard and user-defined calendars. A TFD is a proper extension of the traditional functional dependency and takes the form X-u->Y, meaning that there is a unique value for Y during one tick of the temporal type u for one particular X value. An axiomatization for TFDs is given. Since a finite set of TFDs usually implies an infinite number of TFDs, we introduce the notion of and give an axiomatization for a finite closure to effectively capture a finite set of implied TFDs that are essential to the logical design. Temporal normalization procedures with respect to TFDs are then given. More specifically, temporal Boyce-Codd normal form (TBCNF) that eliminates all data redundancies, and temporal third normal form (T3NF) that allows dependency preservation, are defined. Both normal forms are proper extensions of their traditional counterparts, BCNF and 3NF. Decomposition algorithms are presented that give lossless TBCNF decompositions and lossless, dependency preserving, T3NF decompositions. }, } @TechReport{ ISSE-TR-94-12, author = { Alexander Brodsky and Yoram Kornatzky }, title = { The LyriC Language: Querying Constraint Objects }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1994 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-94-12 }, abstract = { Presented in this paper is a novel language for querying object-oriented databases where objects may hold spatial, temporal or constraint data, conceptually represented by equality and inequality constraints. The proposed LyriC language is motivated by application realms such as (1) constraint-based design in two-, three-, or higher-dimensional space, (2) large-scale optimization and analysis, based mostly on linear programming techniques, and (3) spatial and geographic databases. LyriC is an extension of flat constraint query languages, and especially those for linear constraint databases, to complex objects. The extension is based on the object-oriented paradigm, where a database is a collection of objects some of whose attributes might be either described explicitly or implicitly by constraints. Constraints are organized in classes with associated methods. The query language is an extension of the language XSQL, suggested by Kifer, Kim and Sagiv, and is built around the idea of extended path expressions. Path expressions in a query traverse nested structures in one sweep. Constraints are used in a query to filter stored constraints and to create new constraint objects. }, } @TechReport{ ISSE-TR-94-13, author = { Jong Pil Yoon and Larry Kerschberg }, title = { Semantic Update Optimization in Active Databases }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1994 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-94-13 }, abstract = { In an active database, an update may be constrained by integrity constraints, and may also trigger rules that, in turn, may affect the database state. The general problem is to effect the update while also managing the ``side-effects'' of constraint enforcement and rule execution. In this paper an update calculus is proposed by which updates, constraints and rules are specified and managed within the same formalism. Constraints and production rules are expressed in a constraint language based on first-order logic. These logic expressions are used to semantically transform an original update into a sequence of updates that reflect the relevant constraints and production rules. The inference mechanism associated with processing a reformulated query ensures that: 1) the pre- and post-conditions of an update are satisfied, 2) update side-effects are propagated, and 3) repairs are made to tuples exhibiting constraint violations. Thus, a user-specified ``update'' is transformed, through semantic reformulation techniques, into a sequence of updates which together ensure semantic integrity of the original update as well as its propagated side-effects. This research presents several contributions. Integrity constraints and production rules are expressed in a declarative formalism so that they may be reasoned about. The update calculus formalism handles the semantic reformulation of an update to reflect relevant constraints and rules governing it. Finally, an algorithm is presented to handle constraint enforcement, production rule firing, and subsequent repair actions. }, } @TechReport{ ISSE-TR-94-14, author = { Bo Sanden }, title = { A Restrictive Definition of Concurrency for Discrete-event Modeling }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1994 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-94-14 }, abstract = { Concurrency is an increasingly popular modeling concept in object-oriented analysis, software design, process modeling and other areas. Many approaches view their problem domains in terms of independent, concurrent processes that occasionally need to be synchronized. This is potentially an extremely concurrent model, where ``essential'' processes that represent independent progress may be lost in the wealth of more trivial processes. This is particularly important in a software implementation, where various overhead costs are incurred by each process. }, } @TechReport{ ISSE-TR-93-00, author = { Paul Ammann and Jeff Offutt }, title = { Functional and Test Specifications for the MiStix File System }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1993 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-93-00 }, abstract = { Mistix is a simple file system based on the Unix file system. Mistix is used in classroom exercises in graduate software engineering classes at George Mason University. In this document, we supply several different specifications for Mistix. First we give an informal specification. Next, since the informal specification turns out to be difficult to formalize directly, we supply a revised informal specification that matches subsequent formal specifications. We give a model-based specification for Mistix in Z, followed by functional test specifications based on the Category-Partition method. Finally, we give an algebraic specification for Mistix. }, } @TechReport{ ISSE-TR-93-01, author = { Jeff Offutt and Kanupriya Tewary }, title = { Empirical Comparisons of Data Flow and Mutation Testing }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1993 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-93-01 }, abstract = { Data flow and mutation testing are two powerful white box testing techniques for unit-level software testing. Unfortunately, they cannot be completely compared on an analytical basis, for example, mutation is incomparable on an inclusion basis with most data flow criteria. This paper shows that mutation includes the All-defs data flow criterion, but is incomparable with other data flow criteria, and presents results from two empirical comparisons of mutation with the All-uses data flow criterion. For the first comparison, we define a coverage measure that is used for the comparison. The coverage of data flow by mutation is between 99% and 100%, which means that test data that satisfied the mutation criterion also satisfied the data flow criterion in almost all cases. The coverage of mutation by data flow was between 80 and 90%. For the second comparison, we use test data that satisfy the two criteria to detect faults, and compare the criteria on the basis of the faults found. The empirical evidence strongly indicates that while data flow-adequate test sets are close to being mutation-adequate, mutation-adequate test sets are almost invariably data flow-adequate. }, } @TechReport{ ISSE-TR-93-02, author = { Paul Ammann and Sushil Jajodia }, title = { Planar Lattice Security Structures for Multi-level Replicated Databases }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1993 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-93-02 }, abstract = { We review three algorithms for concurrency control in secure, multi-level replicated databases. We show by counterexample that the various algorithms, even if suitably modified to correct published problems, still do not guarantee serializability for either general partial orders or general lattices. The counterexamples suggest that the difficulty is a general feature of concurrency control algorithms tailored to the replicated architecture. We make two constructive observations. First, sufficient centralization permits algorithms applicable to general partial orders. Second, the reviewed algorithms are applicable to more restrictive security structures. Specifically, we observe that the intuitively appealing restriction to planar lattices is sufficient. }, } @TechReport{ ISSE-TR-93-03, author = { Ravi Sandhu and Srinivas Ganta }, title = { Recognition and Approximation of NTrees }, institution = { Department of Information and Software Engineering, George Mason University }, address = { 4400 University Drive MS 4A5, Fairfax, VA 22030-4444 USA }, year = { 1993 }, howpublished = { Available at http://cs.gmu.edu }, number = { ISSE-TR-93-03 }, abstract = { The ntree hierarchy for protection groups was recently introduced in the literature and shown to have an efficient representation by assigning a pair of lr-values to each group so that U is a proper subgroup of V if only if l[U]