Merge "X86: Add missing functions for libclcore files"
diff --git a/api/GenerateHeaderFiles.cpp b/api/GenerateHeaderFiles.cpp
index aac7ecb..9d5b49a 100644
--- a/api/GenerateHeaderFiles.cpp
+++ b/api/GenerateHeaderFiles.cpp
@@ -42,7 +42,7 @@
* add a check on a flag that can be set for internal builds. This enables us to keep supporting
* old APIs in the runtime code.
*/
-static void writeVersionGuardStart(GeneratedFile* file, VersionInfo info, int finalVersion) {
+static void writeVersionGuardStart(GeneratedFile* file, VersionInfo info, unsigned int finalVersion) {
if (info.intSize == 32) {
*file << "#ifndef __LP64__\n";
} else if (info.intSize == 64) {
@@ -218,7 +218,7 @@
*file << "void";
}
- *file << makeAttributeTag(spec.getAttribute(), "overloadable",
+ *file << makeAttributeTag(spec.getAttribute(), spec.isOverloadable() ? "overloadable" : "",
function->getDeprecatedApiLevel(), function->getDeprecatedMessage());
*file << "\n";
@@ -364,6 +364,10 @@
set<Function*> documentedFunctions;
for (auto spec : specFile.getFunctionSpecifications()) {
+ // Do not include internal APIs in the header files.
+ if (spec->isInternal()) {
+ continue;
+ }
Function* function = spec->getFunction();
if (documentedFunctions.find(function) == documentedFunctions.end()) {
documentedFunctions.insert(function);
diff --git a/api/GenerateStubsWhiteList.cpp b/api/GenerateStubsWhiteList.cpp
index 9b4297d..69afdbf 100644
--- a/api/GenerateStubsWhiteList.cpp
+++ b/api/GenerateStubsWhiteList.cpp
@@ -15,6 +15,7 @@
*/
#include <algorithm>
+#include <climits>
#include <iostream>
#include <iterator>
#include <sstream>
@@ -25,8 +26,8 @@
using namespace std;
-const int kMinimumApiLevelForTests = 11;
-const int kApiLevelWithFirst64Bit = 21;
+const unsigned int kMinimumApiLevelForTests = 11;
+const unsigned int kApiLevelWithFirst64Bit = 21;
// Used to map the built-in types to their mangled representations
struct BuiltInMangling {
@@ -58,7 +59,7 @@
* substitution for the provided type name, as would be done (mostly) by a
* preprocessor. Returns empty string if there's no substitution.
*/
-static string findSubstitute(const string& typeName, int apiLevel, int intSize) {
+static string findSubstitute(const string& typeName, unsigned int apiLevel, int intSize) {
const auto& types = systemSpecification.getTypes();
const auto type = types.find(typeName);
if (type != types.end()) {
@@ -92,7 +93,7 @@
* the resulting list. 'apiLevel' and 'intSize' specifies the API level and bitness
* we are currently processing.
*/
-list<string> expandTypedefs(const string type, int apiLevel, int intSize) {
+list<string> expandTypedefs(const string type, unsigned int apiLevel, int intSize) {
// Split the string in tokens.
istringstream stream(type);
list<string> tokens{istream_iterator<string>{stream}, istream_iterator<string>{}};
@@ -268,7 +269,7 @@
// Write to the stream the mangled representation of each parameter.
static bool writeParameters(ostringstream* stream, const std::vector<ParameterDefinition*>& params,
- int apiLevel, int intSize) {
+ unsigned int apiLevel, int intSize) {
if (params.empty()) {
*stream << "v";
return true;
@@ -298,7 +299,7 @@
*/
static bool addFunctionManglingToSet(const Function& function,
const FunctionPermutation& permutation, bool overloadable,
- int apiLevel, int intSize, set<string>* allManglings) {
+ unsigned int apiLevel, int intSize, set<string>* allManglings) {
const string& functionName = permutation.getName();
string mangling;
if (overloadable) {
@@ -322,22 +323,25 @@
* of API levels covered.
*/
static bool addManglingsForSpecification(const Function& function,
- const FunctionSpecification& spec, int lastApiLevel,
+ const FunctionSpecification& spec, unsigned int lastApiLevel,
set<string>* allManglings) {
// If the function is inlined, we won't generate an unresolved external for that.
if (spec.hasInline()) {
return true;
}
const VersionInfo info = spec.getVersionInfo();
- const int minApiLevel = info.minVersion ? info.minVersion : kMinimumApiLevelForTests;
- const int maxApiLevel = info.maxVersion ? info.maxVersion : lastApiLevel;
+ unsigned int minApiLevel, maxApiLevel;
+ minApiLevel = info.minVersion ? info.minVersion : kMinimumApiLevelForTests;
+ maxApiLevel = info.maxVersion ? info.maxVersion : lastApiLevel;
const bool overloadable = spec.isOverloadable();
/* We track success rather than aborting early in case of failure so that we
* generate all the error messages.
*/
bool success = true;
- for (int apiLevel = minApiLevel; apiLevel <= maxApiLevel; ++apiLevel) {
+ // Use 64-bit integer here for the loop count to avoid overflow
+ // (minApiLevel == maxApiLevel == UINT_MAX for unreleased API)
+ for (int64_t apiLevel = minApiLevel; apiLevel <= maxApiLevel; ++apiLevel) {
for (auto permutation : spec.getPermutations()) {
if (info.intSize == 0 || info.intSize == 32) {
if (!addFunctionManglingToSet(function, *permutation, overloadable, apiLevel, 32,
@@ -360,13 +364,17 @@
* to validate unresolved external references. 'lastApiLevel' is the largest api level found in
* all spec files.
*/
-static bool generateWhiteListFile(int lastApiLevel) {
+static bool generateWhiteListFile(unsigned int lastApiLevel) {
bool success = true;
// We generate all the manglings in a set to remove duplicates and to order them.
set<string> allManglings;
for (auto f : systemSpecification.getFunctions()) {
const Function* function = f.second;
for (auto spec : function->getSpecifications()) {
+ // Compiler intrinsics are not runtime APIs. Do not include them in the whitelist.
+ if (spec->isIntrinsic()) {
+ continue;
+ }
if (!addManglingsForSpecification(*function, *spec, lastApiLevel, &allManglings)) {
success = false; // We continue so we can generate all errors.
}
@@ -444,7 +452,7 @@
* This file can be used to verify the white list that's also generated in this file. To do so,
* run "llvm-nm -undefined-only -just-symbol-name" on the resulting bit code.
*/
-static bool generateApiTesterFile(const string& slangTestDirectory, int apiLevel) {
+static bool generateApiTesterFile(const string& slangTestDirectory, unsigned int apiLevel) {
GeneratedFile file;
if (!file.start(slangTestDirectory, "all" + to_string(apiLevel) + ".rs")) {
return false;
@@ -470,6 +478,10 @@
for (auto f : systemSpecification.getFunctions()) {
const Function* function = f.second;
for (auto spec : function->getSpecifications()) {
+ // Do not include internal APIs in the API tests.
+ if (spec->isInternal()) {
+ continue;
+ }
VersionInfo info = spec->getVersionInfo();
if (!info.includesVersion(apiLevel)) {
continue;
@@ -503,13 +515,13 @@
return true;
}
-bool generateStubsWhiteList(const string& slangTestDirectory, int maxApiLevel) {
- int lastApiLevel = min(systemSpecification.getMaximumApiLevel(), maxApiLevel);
+bool generateStubsWhiteList(const string& slangTestDirectory, unsigned int maxApiLevel) {
+ unsigned int lastApiLevel = min(systemSpecification.getMaximumApiLevel(), maxApiLevel);
if (!generateWhiteListFile(lastApiLevel)) {
return false;
}
// Generate a test file for each apiLevel.
- for (int i = kMinimumApiLevelForTests; i <= lastApiLevel; ++i) {
+ for (unsigned int i = kMinimumApiLevelForTests; i <= lastApiLevel; ++i) {
if (!generateApiTesterFile(slangTestDirectory, i)) {
return false;
}
diff --git a/api/GenerateTestFiles.cpp b/api/GenerateTestFiles.cpp
index ddb7c78..45cf1f3 100644
--- a/api/GenerateTestFiles.cpp
+++ b/api/GenerateTestFiles.cpp
@@ -46,7 +46,7 @@
}
// Returns true if any permutation of the function have tests to b
-static bool needTestFiles(const Function& function, int versionOfTestFiles) {
+static bool needTestFiles(const Function& function, unsigned int versionOfTestFiles) {
for (auto spec : function.getSpecifications()) {
if (spec->hasTests(versionOfTestFiles)) {
return true;
@@ -974,7 +974,7 @@
* to test.
*/
static bool writeTestFilesForFunction(const Function& function, const string& directory,
- int versionOfTestFiles) {
+ unsigned int versionOfTestFiles) {
// Avoid creating empty files if we're not testing this function.
if (!needTestFiles(function, versionOfTestFiles)) {
return true;
@@ -1026,7 +1026,7 @@
return true;
}
-bool generateTestFiles(const string& directory, int versionOfTestFiles) {
+bool generateTestFiles(const string& directory, unsigned int versionOfTestFiles) {
bool success = true;
for (auto f : systemSpecification.getFunctions()) {
if (!writeTestFilesForFunction(*f.second, directory, versionOfTestFiles)) {
diff --git a/api/Generator.cpp b/api/Generator.cpp
index c44c995..456f214 100644
--- a/api/Generator.cpp
+++ b/api/Generator.cpp
@@ -59,7 +59,7 @@
* Constants are defined as follows:
*
* constant: {The name of the constant.}
- * [version: {Starting API level} [ {Last API level that supports this.}]
+ * [version: ({Starting API level} [ {Last API level that supports this.}] | UNRELEASED)
* [size: {32 or 64. Used if this is available only for 32 or 64 bit code.}]
* value: {The value of the constant.}
* [hidden:] ...If present, don't document the constant. Omit the following two fields.
@@ -73,7 +73,7 @@
* Types can either be simple types, structs, or enums. They have the format:
*
* type: {The typedef name of the type.}
- * [version: {Starting API level} [ {Last API level that supports this.}]
+ * [version: ({Starting API level} [ {Last API level that supports this.}] | UNRELEASED)
* [size: {32 or 64. Used if this is available only for 32 or 64 bit code.}]
* simple: {The C declaration that this type is the typedef equivalent.}
* [hidden:] ...If present, don't document the type. Omit the following two fields.
@@ -85,7 +85,7 @@
* end:
*
* type: {The typedef name of the type.}
- * [version: {Starting API level} [ {Last API level that supports this.}]
+ * [version: ({Starting API level} [ {Last API level that supports this.}] | UNRELEASED)
* [size: {32 or 64. Used if this is available only for 32 or 64 bit code.}]
* struct: [{The name that will appear right after the struct keyword}]
* field: {Type and name of the field}[, "{One line documentation of the field}"]
@@ -99,7 +99,7 @@
* end:
*
* type: {The typedef name of the type.}
- * [version: {Starting API level} [ {Last API level that supports this.}]
+ * [version: ({Starting API level} [ {Last API level that supports this.}] | UNRELEASED)
* [size: {32 or 64. Used if this is available only for 32 or 64 bit code.}]
* enum: [{The name that will appear right after the enum keyword}]
* value: {Type and name of the field}[, "{One line documentation of the field}"]
@@ -114,7 +114,7 @@
* Functions have the following format:
*
* function: {The name of the function.}
- * [version: {Starting API level} [ {Last API level that supports this.}]
+ * [version: ({Starting API level} [ {Last API level that supports this.}] | UNRELEASED)
* [size: {32 or 64. Used if this is available only for 32 or 64 bit code.}]
* [attrib: {Attributes of the function.}]
* [w: {A comma separated list of width supported. Only 1, 2, 3, 4 are supported.
@@ -122,7 +122,7 @@
* ... Up to four w: or t: can be defined. The order matter. These will be replace
* ... the #1, #2, #3, #4 that can be found in the rest of the specification.
* ret: [{The return type} [, "{One line documentation of the return}"]]
- * [arg: {Type}[, {Name}][, {ParameterEntry.testOption}][, "{One line documentation of the field}"]]
+ * [arg:(({Type}[ {Name})]|{Elipsis})[, {ParameterEntry.testOption}][, "{One line documentation of the field}"]]
* [arg: ... Same for all the other arguments of the function.]
* [hidden:] ... If present, don't include in the HTML documentation.
* [deprecated: [{Deprecation message.}] ... This is deprecated. Compiler will issue a wrning.
@@ -153,7 +153,7 @@
using namespace std;
-static bool parseCommandLine(int argc, char* argv[], int* maxApiLevel, bool* forVerification,
+static bool parseCommandLine(int argc, char* argv[], unsigned int* maxApiLevel, bool* forVerification,
vector<string>* specFileNames) {
for (int i = 1; i < argc; i++) {
if (argv[i][0] == '-') {
@@ -189,7 +189,7 @@
int main(int argc, char* argv[]) {
// If there's no restriction, generated test files for the very highest version.
- int maxApiLevel = 999999;
+ unsigned int maxApiLevel = VersionInfo::kUnreleasedVersion;
vector<string> specFileNames;
bool forVerification = false;
if (!parseCommandLine(argc, argv, &maxApiLevel, &forVerification, &specFileNames)) {
diff --git a/api/Generator.h b/api/Generator.h
index 7a9dd28..5d72101 100644
--- a/api/Generator.h
+++ b/api/Generator.h
@@ -21,7 +21,7 @@
bool generateHeaderFiles(const std::string& directory);
// Generates the Java and RenderScript test files. The implementation is in GenerateTestFiles.cpp.
-bool generateTestFiles(const std::string& directory, int versionOfTestFiles);
+bool generateTestFiles(const std::string& directory, unsigned int versionOfTestFiles);
/* Generates the documentation files. The implementation is in GenerateDocumentation.cpp.
* If forVerification is false (the default), we generate the .jd files needed by the
@@ -33,6 +33,6 @@
* when testing slang and that can be used to manually verify the white list.
* The implementation is in GenerateStubsWhiteList.cpp.
*/
-bool generateStubsWhiteList(const std::string& slangTestDirectory, int maxApiLevel);
+bool generateStubsWhiteList(const std::string& slangTestDirectory, unsigned int maxApiLevel);
#endif // ANDROID_RS_API_GENERATOR_GENERATOR_H
diff --git a/api/Scanner.cpp b/api/Scanner.cpp
index 84af581..1bd4973 100644
--- a/api/Scanner.cpp
+++ b/api/Scanner.cpp
@@ -175,7 +175,14 @@
if (!isReturn) {
size_t nameStart = s.rfind(' ');
if (nameStart == string::npos) {
- error() << "Missing variable name\n";
+ if (s == "...") {
+ p->name = s;
+ p->type = "";
+ p->lineNumber = mLineNumber;
+ return p;
+ } else {
+ error() << "Missing variable name\n";
+ }
} else {
p->name = s.substr(nameStart + 1);
s.erase(nameStart);
diff --git a/api/Specification.cpp b/api/Specification.cpp
index f02e429..28e5231 100644
--- a/api/Specification.cpp
+++ b/api/Specification.cpp
@@ -32,7 +32,7 @@
using namespace std;
// API level when RenderScript was added.
-const int MIN_API_LEVEL = 9;
+const unsigned int MIN_API_LEVEL = 9;
const NumericalType TYPES[] = {
{"f16", "FLOAT_16", "half", "float", FLOATING_POINT, 11, 5},
@@ -50,6 +50,8 @@
const int NUM_TYPES = sizeof(TYPES) / sizeof(TYPES[0]);
+static const char kTagUnreleased[] = "UNRELEASED";
+
// The singleton of the collected information of all the spec files.
SystemSpecification systemSpecification;
@@ -201,26 +203,34 @@
}
}
-bool VersionInfo::scan(Scanner* scanner, int maxApiLevel) {
+bool VersionInfo::scan(Scanner* scanner, unsigned int maxApiLevel) {
if (scanner->findOptionalTag("version:")) {
const string s = scanner->getValue();
- sscanf(s.c_str(), "%i %i", &minVersion, &maxVersion);
- if (minVersion && minVersion < MIN_API_LEVEL) {
- scanner->error() << "Minimum version must >= 9\n";
- }
- if (minVersion == MIN_API_LEVEL) {
- minVersion = 0;
- }
- if (maxVersion && maxVersion < MIN_API_LEVEL) {
- scanner->error() << "Maximum version must >= 9\n";
+ if (s.compare(0, sizeof(kTagUnreleased), kTagUnreleased) == 0) {
+ // The API is still under development and does not have
+ // an official version number.
+ minVersion = maxVersion = kUnreleasedVersion;
+ } else {
+ sscanf(s.c_str(), "%u %u", &minVersion, &maxVersion);
+ if (minVersion && minVersion < MIN_API_LEVEL) {
+ scanner->error() << "Minimum version must >= 9\n";
+ }
+ if (minVersion == MIN_API_LEVEL) {
+ minVersion = 0;
+ }
+ if (maxVersion && maxVersion < MIN_API_LEVEL) {
+ scanner->error() << "Maximum version must >= 9\n";
+ }
}
}
if (scanner->findOptionalTag("size:")) {
sscanf(scanner->getValue().c_str(), "%i", &intSize);
}
+
if (maxVersion > maxApiLevel) {
maxVersion = maxApiLevel;
}
+
return minVersion == 0 || minVersion <= maxApiLevel;
}
@@ -331,7 +341,7 @@
}
void ConstantSpecification::scanConstantSpecification(Scanner* scanner, SpecFile* specFile,
- int maxApiLevel) {
+ unsigned int maxApiLevel) {
string name = scanner->getValue();
VersionInfo info;
if (!info.scan(scanner, maxApiLevel)) {
@@ -357,7 +367,7 @@
}
void TypeSpecification::scanTypeSpecification(Scanner* scanner, SpecFile* specFile,
- int maxApiLevel) {
+ unsigned int maxApiLevel) {
string name = scanner->getValue();
VersionInfo info;
if (!info.scan(scanner, maxApiLevel)) {
@@ -522,7 +532,7 @@
}
}
-bool FunctionSpecification::hasTests(int versionOfTestFiles) const {
+bool FunctionSpecification::hasTests(unsigned int versionOfTestFiles) const {
if (mVersionInfo.maxVersion != 0 && mVersionInfo.maxVersion < versionOfTestFiles) {
return false;
}
@@ -533,7 +543,7 @@
}
void FunctionSpecification::scanFunctionSpecification(Scanner* scanner, SpecFile* specFile,
- int maxApiLevel) {
+ unsigned int maxApiLevel) {
// Some functions like convert have # part of the name. Truncate at that point.
const string& unexpandedName = scanner->getValue();
string name = unexpandedName;
@@ -562,6 +572,12 @@
spec->mTest = "scalar"; // default
spec->mVersionInfo = info;
+ if (scanner->findOptionalTag("internal:")) {
+ spec->mInternal = (scanner->getValue() == "true");
+ }
+ if (scanner->findOptionalTag("intrinsic:")) {
+ spec->mIntrinsic = (scanner->getValue() == "true");
+ }
if (scanner->findOptionalTag("attrib:")) {
spec->mAttribute = scanner->getValue();
}
@@ -711,7 +727,7 @@
}
// Read the specification, adding the definitions to the global functions map.
-bool SpecFile::readSpecFile(int maxApiLevel) {
+bool SpecFile::readSpecFile(unsigned int maxApiLevel) {
FILE* specFile = fopen(mSpecFileName.c_str(), "rt");
if (!specFile) {
cerr << "Error opening input file: " << mSpecFileName << "\n";
@@ -804,7 +820,7 @@
return findOrCreate<Function>(name, &mFunctions, created);
}
-bool SystemSpecification::readSpecFile(const string& fileName, int maxApiLevel) {
+bool SystemSpecification::readSpecFile(const string& fileName, unsigned int maxApiLevel) {
SpecFile* spec = new SpecFile(fileName);
if (!spec->readSpecFile(maxApiLevel)) {
cerr << fileName << ": Failed to parse.\n";
@@ -815,12 +831,16 @@
}
-static void updateMaxApiLevel(const VersionInfo& info, int* maxApiLevel) {
+static void updateMaxApiLevel(const VersionInfo& info, unsigned int* maxApiLevel) {
+ if (info.minVersion == VersionInfo::kUnreleasedVersion) {
+ // Ignore development API level in consideration of max API level.
+ return;
+ }
*maxApiLevel = max(*maxApiLevel, max(info.minVersion, info.maxVersion));
}
-int SystemSpecification::getMaximumApiLevel() {
- int maxApiLevel = 0;
+unsigned int SystemSpecification::getMaximumApiLevel() {
+ unsigned int maxApiLevel = 0;
for (auto i : mConstants) {
for (auto j: i.second->getSpecifications()) {
updateMaxApiLevel(j->getVersionInfo(), &maxApiLevel);
@@ -839,7 +859,7 @@
return maxApiLevel;
}
-bool SystemSpecification::generateFiles(bool forVerification, int maxApiLevel) const {
+bool SystemSpecification::generateFiles(bool forVerification, unsigned int maxApiLevel) const {
bool success = generateHeaderFiles("scriptc") &&
generateDocumentation("docs", forVerification) &&
generateTestFiles("test", maxApiLevel) &&
diff --git a/api/Specification.h b/api/Specification.h
index 87969a6..d3fbad5 100644
--- a/api/Specification.h
+++ b/api/Specification.h
@@ -19,6 +19,7 @@
// See Generator.cpp for documentation of the .spec file format.
+#include <climits>
#include <fstream>
#include <list>
#include <map>
@@ -124,8 +125,8 @@
* If non zero, both versions should be at least 9, the API level that introduced
* RenderScript.
*/
- int minVersion;
- int maxVersion;
+ unsigned int minVersion;
+ unsigned int maxVersion;
// Either 0, 32 or 64. If 0, this definition is valid for both 32 and 64 bits.
int intSize;
@@ -134,12 +135,14 @@
* we are interested in. This may alter maxVersion. This method returns false if the
* minVersion is greater than the maxApiLevel.
*/
- bool scan(Scanner* scanner, int maxApiLevel);
+ bool scan(Scanner* scanner, unsigned int maxApiLevel);
/* Return true if the target can be found whitin the range. */
bool includesVersion(int target) const {
return (minVersion == 0 || target >= minVersion) &&
(maxVersion == 0 || target <= maxVersion);
}
+
+ static constexpr unsigned int kUnreleasedVersion = UINT_MAX;
};
// We have three type of definitions
@@ -266,7 +269,7 @@
std::string getValue() const { return mValue; }
// Parse a constant specification and add it to specFile.
- static void scanConstantSpecification(Scanner* scanner, SpecFile* specFile, int maxApiLevel);
+ static void scanConstantSpecification(Scanner* scanner, SpecFile* specFile, unsigned int maxApiLevel);
};
enum TypeKind {
@@ -313,7 +316,7 @@
const std::vector<std::string>& getValueComments() const { return mValueComments; }
// Parse a type specification and add it to specFile.
- static void scanTypeSpecification(Scanner* scanner, SpecFile* specFile, int maxApiLevel);
+ static void scanTypeSpecification(Scanner* scanner, SpecFile* specFile, unsigned int maxApiLevel);
};
// Maximum number of placeholders (like #1, #2) in function specifications.
@@ -343,6 +346,9 @@
* "": Don't test. This is the default.
*/
std::string mTest;
+ bool mInternal; // Internal. Not visible to users. (Default: false)
+ bool mIntrinsic; // Compiler intrinsic that is lowered to an internal API.
+ // (Default: false)
std::string mAttribute; // Function attributes.
std::string mPrecisionLimit; // Maximum precision required when checking output of this
// function.
@@ -379,10 +385,13 @@
void createPermutations(Function* function, Scanner* scanner);
public:
- FunctionSpecification(Function* function) : mFunction(function), mReturn(nullptr) {}
+ FunctionSpecification(Function* function) : mFunction(function), mInternal(false),
+ mIntrinsic(false), mReturn(nullptr) {}
~FunctionSpecification();
Function* getFunction() const { return mFunction; }
+ bool isInternal() const { return mInternal; }
+ bool isIntrinsic() const { return mIntrinsic; }
std::string getAttribute() const { return mAttribute; }
std::string getTest() const { return mTest; }
std::string getPrecisionLimit() const { return mPrecisionLimit; }
@@ -402,7 +411,7 @@
void parseTest(Scanner* scanner);
// Return true if we need to generate tests for this function.
- bool hasTests(int versionOfTestFiles) const;
+ bool hasTests(unsigned int versionOfTestFiles) const;
bool hasInline() const { return mInline.size() > 0; }
@@ -415,7 +424,7 @@
}
// Parse a function specification and add it to specFile.
- static void scanFunctionSpecification(Scanner* scanner, SpecFile* specFile, int maxApiLevel);
+ static void scanFunctionSpecification(Scanner* scanner, SpecFile* specFile, unsigned int maxApiLevel);
};
/* A concrete version of a function specification, where all placeholders have been replaced by
@@ -527,7 +536,7 @@
!mDocumentedFunctions.empty();
}
- bool readSpecFile(int maxApiLevel);
+ bool readSpecFile(unsigned int maxApiLevel);
/* These are called by the parser to keep track of the specifications defined in this file.
* hasDocumentation is true if this specification containes the documentation.
@@ -562,9 +571,9 @@
/* Parse the spec file and create the object hierarchy, adding a pointer to mSpecFiles.
* We won't include information passed the specified level.
*/
- bool readSpecFile(const std::string& fileName, int maxApiLevel);
+ bool readSpecFile(const std::string& fileName, unsigned int maxApiLevel);
// Generate all the files.
- bool generateFiles(bool forVerification, int maxApiLevel) const;
+ bool generateFiles(bool forVerification, unsigned int maxApiLevel) const;
const std::vector<SpecFile*>& getSpecFiles() const { return mSpecFiles; }
const std::map<std::string, Constant*>& getConstants() const { return mConstants; }
@@ -575,7 +584,7 @@
std::string getHtmlAnchor(const std::string& name) const;
// Returns the maximum API level specified in any spec file.
- int getMaximumApiLevel();
+ unsigned int getMaximumApiLevel();
};
// Singleton that represents the collection of all the specs we're processing.
diff --git a/api/Utilities.cpp b/api/Utilities.cpp
index 4268278..841d824 100644
--- a/api/Utilities.cpp
+++ b/api/Utilities.cpp
@@ -163,7 +163,7 @@
}
string makeAttributeTag(const string& userAttribute, const string& additionalAttribute,
- int deprecatedApiLevel, const string& deprecatedMessage) {
+ unsigned int deprecatedApiLevel, const string& deprecatedMessage) {
ostringstream stream;
bool needComma = false;
if (userAttribute[0] == '=') {
diff --git a/api/Utilities.h b/api/Utilities.h
index cd0db72..eced68d 100644
--- a/api/Utilities.h
+++ b/api/Utilities.h
@@ -51,7 +51,7 @@
* use the additionalAttribute. An empty string will be returned if there are no attributes.
*/
std::string makeAttributeTag(const std::string& userAttribute,
- const std::string& additionalAttribute, int deprecatedApiLevel,
+ const std::string& additionalAttribute, unsigned int deprecatedApiLevel,
const std::string& deprecatedMessage);
/* This class is used to generate one source file. There will be one instance
diff --git a/api/generate.sh b/api/generate.sh
index 3ff882f..55d3f04 100755
--- a/api/generate.sh
+++ b/api/generate.sh
@@ -50,7 +50,7 @@
do
mv slangtest/all$i.rs ../../compile/slang/tests/P_all_api_$i
done
-rmdir slangtest
+rm -rf slangtest
mv RSStubsWhiteList.cpp ../../compile/libbcc/lib/Renderscript/
diff --git a/api/rs_for_each.spec b/api/rs_for_each.spec
index c0c09b2..c9bb2e6 100644
--- a/api/rs_for_each.spec
+++ b/api/rs_for_each.spec
@@ -83,6 +83,15 @@
over cells 4, 5, 6, and 7 in the X dimension, set xStart to 4 and xEnd to 8.
end:
+type: rs_kernel
+version: UNRELEASED
+simple: void*
+summary: Handle to a kernel function
+description:
+ An opaque type for a function that is defined with the kernel attribute. A value
+ of this type can be used in a @rsForEach call to launch a kernel.
+end:
+
function: rsForEach
version: 9 13
ret: void
@@ -91,26 +100,34 @@
arg: rs_allocation output, "Allocation to write date into."
arg: const void* usrData, "User defined data to pass to the script. May be NULL."
arg: const rs_script_call_t* sc, "Extra control information used to select a sub-region of the allocation to be processed or suggest a walking strategy. May be NULL."
-summary: Invoke the root kernel of a script
+summary: Launches a kernel
description:
- Invoke the kernel named "root" of the specified script. Like other kernels, this root()
- function will be invoked repeatedly over the cells of the specificed allocation, filling
- the output allocation with the results.
+ Runs the kernel over zero or more input allocations. They are passed after the
+ @rs_kernel argument. If the specified kernel returns a value, an output allocation
+ must be specified as the last argument. All input allocations,
+ and the output allocation if it exists, must have the same dimensions.
- When rsForEach is called, the root script is launched immediately. rsForEach returns
- only when the script has completed and the output allocation is ready to use.
+ This is a synchronous function. A call to this function only returns after all
+ the work has completed for all cells of the input allocations. If the kernel
+ function returns any value, the call waits until all results have been written
+ to the output allocation.
- The rs_script argument is typically initialized using a global variable set from Java.
+ Up to API level 23, the kernel is implicitly specified as the kernel named
+ "root" in the specified script, and only a single input allocation can be used.
+ Starting in API level *UNRELEASED*, an arbitrary kernel function can be used,
+ as specified by the kernel argument. The script argument is removed.
+ The kernel must be defined in the current script. In addition, more than one
+ inputs can be used.
- The kernel can be invoked with just an input allocation or just an output allocation.
- This can be done by defining an rs_allocation variable and not initializing it. E.g.<code><br/>
- rs_script gCustomScript;<br/>
- void specializedProcessing(rs_allocation in) {<br/>
- rs_allocation ignoredOut;<br/>
- rsForEach(gCustomScript, in, ignoredOut);<br/>
- }<br/></code>
-
- If both input and output allocations are specified, they must have the same dimensions.
+E.g.<code><br/>
+ float __attribute__((kernel)) square(float a) {<br/>
+ return a * a;<br/>
+ }<br/>
+<br/>
+ void compute(rs_allocation ain, rs_allocation aout) {<br/>
+ rsForEach(square, ain, aout);<br/>
+ }<br/>
+<br/></code>
test: none
end:
@@ -148,7 +165,7 @@
end:
function: rsForEach
-version: 14
+version: 14 23
ret: void
arg: rs_script script
arg: rs_allocation input
@@ -156,6 +173,59 @@
test: none
end:
+function: rsForEach
+version: UNRELEASED
+intrinsic: true
+attrib: = # Not overloadable
+ret: void
+arg: rs_kernel kernel, "Function designator to a function that is defined with the kernel attribute."
+arg: ..., "Input and output allocations"
+test: none
+end:
+
+function: rsForEachWithOptions
+version: UNRELEASED
+intrinsic: true
+attrib: = # Not overloadable
+ret: void
+arg: rs_kernel kernel, "Function designator to a function that is defined with the kernel attribute."
+arg: rs_script_call_t* options, "Launch options"
+arg: ..., "Input and output allocations"
+summary: Launches a kernel with options
+description:
+ Launches kernel in a way similar to @rsForEach. However, instead of processing
+ all cells in the input, this function only processes cells in the subspace of
+ the index space specified in options. With the index space explicitly specified
+ by options, no input or output allocation is required for a kernel launch using
+ this API. If allocations are passed in, they must match the number of arguments
+ and return value expected by the kernel function. The output allocation is
+ present if and only if the kernel has a non-void return value.
+
+ E.g., <code><br/>
+ rs_script_call_t opts = {0};<br/>
+ opts.xStart = 0;<br/>
+ opts.xEnd = dimX;<br/>
+ opts.yStart = 0;<br/>
+ opts.yEnd = dimY / 2;<br/>
+ rsForEachWithOptions(foo, &opts, out, out);<br/>
+</code>
+
+test: none
+end:
+
+function: rsForEachInternal
+version: UNRELEASED
+internal: true
+ret: void
+arg: int slot
+arg: rs_script_call_t* options
+arg: rs_allocation input
+arg: rs_allocation output
+summary: (Internal API) Launch a kernel in the current Script (with the slot number)
+description:
+test: none
+end:
+
function: rsGetArray0
version: 23
ret: uint32_t
diff --git a/cpp/rsDispatch.cpp b/cpp/rsDispatch.cpp
index fd09c87..ce147d9 100644
--- a/cpp/rsDispatch.cpp
+++ b/cpp/rsDispatch.cpp
@@ -20,7 +20,7 @@
#include <dlfcn.h>
#include <limits.h>
-#define LOG_API(...)
+#define LOG_ERR(...) __android_log_print(ANDROID_LOG_ERROR, "RS Dispatch", __VA_ARGS__);
#define REDUCE_API_LEVEL INT_MAX
bool loadSymbols(void* handle, dispatchTable& dispatchTab, int device_api) {
@@ -28,340 +28,340 @@
// Function to set the native lib path for 64bit compat lib.
dispatchTab.SetNativeLibDir = (SetNativeLibDirFnPtr)dlsym(handle, "rsaContextSetNativeLibDir");
if (dispatchTab.SetNativeLibDir == NULL) {
- LOG_API("Couldn't initialize dispatchTab.SetNativeLibDir");
+ LOG_ERR("Couldn't initialize dispatchTab.SetNativeLibDir");
return false;
}
#endif
dispatchTab.AllocationGetType = (AllocationGetTypeFnPtr)dlsym(handle, "rsaAllocationGetType");
if (dispatchTab.AllocationGetType == NULL) {
- LOG_API("Couldn't initialize dispatchTab.AllocationGetType");
+ LOG_ERR("Couldn't initialize dispatchTab.AllocationGetType");
return false;
}
dispatchTab.TypeGetNativeData = (TypeGetNativeDataFnPtr)dlsym(handle, "rsaTypeGetNativeData");
if (dispatchTab.TypeGetNativeData == NULL) {
- LOG_API("Couldn't initialize dispatchTab.TypeGetNativeData");
+ LOG_ERR("Couldn't initialize dispatchTab.TypeGetNativeData");
return false;
}
dispatchTab.ElementGetNativeData = (ElementGetNativeDataFnPtr)dlsym(handle, "rsaElementGetNativeData");
if (dispatchTab.ElementGetNativeData == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ElementGetNativeData");
+ LOG_ERR("Couldn't initialize dispatchTab.ElementGetNativeData");
return false;
}
dispatchTab.ElementGetSubElements = (ElementGetSubElementsFnPtr)dlsym(handle, "rsaElementGetSubElements");
if (dispatchTab.ElementGetSubElements == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ElementGetSubElements");
+ LOG_ERR("Couldn't initialize dispatchTab.ElementGetSubElements");
return false;
}
dispatchTab.DeviceCreate = (DeviceCreateFnPtr)dlsym(handle, "rsDeviceCreate");
if (dispatchTab.DeviceCreate == NULL) {
- LOG_API("Couldn't initialize dispatchTab.DeviceCreate");
+ LOG_ERR("Couldn't initialize dispatchTab.DeviceCreate");
return false;
}
dispatchTab.DeviceDestroy = (DeviceDestroyFnPtr)dlsym(handle, "rsDeviceDestroy");
if (dispatchTab.DeviceDestroy == NULL) {
- LOG_API("Couldn't initialize dispatchTab.DeviceDestroy");
+ LOG_ERR("Couldn't initialize dispatchTab.DeviceDestroy");
return false;
}
dispatchTab.DeviceSetConfig = (DeviceSetConfigFnPtr)dlsym(handle, "rsDeviceSetConfig");
if (dispatchTab.DeviceSetConfig == NULL) {
- LOG_API("Couldn't initialize dispatchTab.DeviceSetConfig");
+ LOG_ERR("Couldn't initialize dispatchTab.DeviceSetConfig");
return false;
}
dispatchTab.ContextCreate = (ContextCreateFnPtr)dlsym(handle, "rsContextCreate");;
if (dispatchTab.ContextCreate == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ContextCreate");
+ LOG_ERR("Couldn't initialize dispatchTab.ContextCreate");
return false;
}
dispatchTab.GetName = (GetNameFnPtr)dlsym(handle, "rsaGetName");;
if (dispatchTab.GetName == NULL) {
- LOG_API("Couldn't initialize dispatchTab.GetName");
+ LOG_ERR("Couldn't initialize dispatchTab.GetName");
return false;
}
dispatchTab.ContextDestroy = (ContextDestroyFnPtr)dlsym(handle, "rsContextDestroy");
if (dispatchTab.ContextDestroy == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ContextDestroy");
+ LOG_ERR("Couldn't initialize dispatchTab.ContextDestroy");
return false;
}
dispatchTab.ContextGetMessage = (ContextGetMessageFnPtr)dlsym(handle, "rsContextGetMessage");
if (dispatchTab.ContextGetMessage == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ContextGetMessage");
+ LOG_ERR("Couldn't initialize dispatchTab.ContextGetMessage");
return false;
}
dispatchTab.ContextPeekMessage = (ContextPeekMessageFnPtr)dlsym(handle, "rsContextPeekMessage");
if (dispatchTab.ContextPeekMessage == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ContextPeekMessage");
+ LOG_ERR("Couldn't initialize dispatchTab.ContextPeekMessage");
return false;
}
dispatchTab.ContextSendMessage = (ContextSendMessageFnPtr)dlsym(handle, "rsContextSendMessage");
if (dispatchTab.ContextSendMessage == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ContextSendMessage");
+ LOG_ERR("Couldn't initialize dispatchTab.ContextSendMessage");
return false;
}
dispatchTab.ContextInitToClient = (ContextInitToClientFnPtr)dlsym(handle, "rsContextInitToClient");
if (dispatchTab.ContextInitToClient == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ContextInitToClient");
+ LOG_ERR("Couldn't initialize dispatchTab.ContextInitToClient");
return false;
}
dispatchTab.ContextDeinitToClient = (ContextDeinitToClientFnPtr)dlsym(handle, "rsContextDeinitToClient");
if (dispatchTab.ContextDeinitToClient == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ContextDeinitToClient");
+ LOG_ERR("Couldn't initialize dispatchTab.ContextDeinitToClient");
return false;
}
dispatchTab.TypeCreate = (TypeCreateFnPtr)dlsym(handle, "rsTypeCreate");
if (dispatchTab.TypeCreate == NULL) {
- LOG_API("Couldn't initialize dispatchTab.TypeCreate");
+ LOG_ERR("Couldn't initialize dispatchTab.TypeCreate");
return false;
}
dispatchTab.AllocationCreateTyped = (AllocationCreateTypedFnPtr)dlsym(handle, "rsAllocationCreateTyped");
if (dispatchTab.AllocationCreateTyped == NULL) {
- LOG_API("Couldn't initialize dispatchTab.AllocationCreateTyped");
+ LOG_ERR("Couldn't initialize dispatchTab.AllocationCreateTyped");
return false;
}
dispatchTab.AllocationCreateFromBitmap = (AllocationCreateFromBitmapFnPtr)dlsym(handle, "rsAllocationCreateFromBitmap");
if (dispatchTab.AllocationCreateFromBitmap == NULL) {
- LOG_API("Couldn't initialize dispatchTab.AllocationCreateFromBitmap");
+ LOG_ERR("Couldn't initialize dispatchTab.AllocationCreateFromBitmap");
return false;
}
dispatchTab.AllocationCubeCreateFromBitmap = (AllocationCubeCreateFromBitmapFnPtr)dlsym(handle, "rsAllocationCubeCreateFromBitmap");
if (dispatchTab.AllocationCubeCreateFromBitmap == NULL) {
- LOG_API("Couldn't initialize dispatchTab.AllocationCubeCreateFromBitmap");
+ LOG_ERR("Couldn't initialize dispatchTab.AllocationCubeCreateFromBitmap");
return false;
}
dispatchTab.AllocationGetSurface = (AllocationGetSurfaceFnPtr)dlsym(handle, "rsAllocationGetSurface");
if (dispatchTab.AllocationGetSurface == NULL) {
- LOG_API("Couldn't initialize dispatchTab.AllocationGetSurface");
+ LOG_ERR("Couldn't initialize dispatchTab.AllocationGetSurface");
return false;
}
dispatchTab.AllocationSetSurface = (AllocationSetSurfaceFnPtr)dlsym(handle, "rsAllocationSetSurface");
if (dispatchTab.AllocationSetSurface == NULL) {
- LOG_API("Couldn't initialize dispatchTab.AllocationSetSurface");
+ LOG_ERR("Couldn't initialize dispatchTab.AllocationSetSurface");
return false;
}
dispatchTab.ContextFinish = (ContextFinishFnPtr)dlsym(handle, "rsContextFinish");
if (dispatchTab.ContextFinish == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ContextFinish");
+ LOG_ERR("Couldn't initialize dispatchTab.ContextFinish");
return false;
}
dispatchTab.ContextDump = (ContextDumpFnPtr)dlsym(handle, "rsContextDump");
if (dispatchTab.ContextDump == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ContextDump");
+ LOG_ERR("Couldn't initialize dispatchTab.ContextDump");
return false;
}
dispatchTab.ContextSetPriority = (ContextSetPriorityFnPtr)dlsym(handle, "rsContextSetPriority");
if (dispatchTab.ContextSetPriority == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ContextSetPriority");
+ LOG_ERR("Couldn't initialize dispatchTab.ContextSetPriority");
return false;
}
dispatchTab.AssignName = (AssignNameFnPtr)dlsym(handle, "rsAssignName");
if (dispatchTab.AssignName == NULL) {
- LOG_API("Couldn't initialize dispatchTab.AssignName");
+ LOG_ERR("Couldn't initialize dispatchTab.AssignName");
return false;
}
dispatchTab.ObjDestroy = (ObjDestroyFnPtr)dlsym(handle, "rsObjDestroy");
if (dispatchTab.ObjDestroy == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ObjDestroy");
+ LOG_ERR("Couldn't initialize dispatchTab.ObjDestroy");
return false;
}
dispatchTab.ElementCreate = (ElementCreateFnPtr)dlsym(handle, "rsElementCreate");
if (dispatchTab.ElementCreate == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ElementCreate");
+ LOG_ERR("Couldn't initialize dispatchTab.ElementCreate");
return false;
}
dispatchTab.ElementCreate2 = (ElementCreate2FnPtr)dlsym(handle, "rsElementCreate2");
if (dispatchTab.ElementCreate2 == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ElementCreate2");
+ LOG_ERR("Couldn't initialize dispatchTab.ElementCreate2");
return false;
}
dispatchTab.AllocationCopyToBitmap = (AllocationCopyToBitmapFnPtr)dlsym(handle, "rsAllocationCopyToBitmap");
if (dispatchTab.AllocationCopyToBitmap == NULL) {
- LOG_API("Couldn't initialize dispatchTab.AllocationCopyToBitmap");
+ LOG_ERR("Couldn't initialize dispatchTab.AllocationCopyToBitmap");
return false;
}
dispatchTab.Allocation1DData = (Allocation1DDataFnPtr)dlsym(handle, "rsAllocation1DData");
if (dispatchTab.Allocation1DData == NULL) {
- LOG_API("Couldn't initialize dispatchTab.Allocation1DData");
+ LOG_ERR("Couldn't initialize dispatchTab.Allocation1DData");
return false;
}
dispatchTab.Allocation1DElementData = (Allocation1DElementDataFnPtr)dlsym(handle, "rsAllocation1DElementData");
if (dispatchTab.Allocation1DElementData == NULL) {
- LOG_API("Couldn't initialize dispatchTab.Allocation1DElementData");
+ LOG_ERR("Couldn't initialize dispatchTab.Allocation1DElementData");
return false;
}
dispatchTab.Allocation2DData = (Allocation2DDataFnPtr)dlsym(handle, "rsAllocation2DData");
if (dispatchTab.Allocation2DData == NULL) {
- LOG_API("Couldn't initialize dispatchTab.Allocation2DData");
+ LOG_ERR("Couldn't initialize dispatchTab.Allocation2DData");
return false;
}
dispatchTab.Allocation3DData = (Allocation3DDataFnPtr)dlsym(handle, "rsAllocation3DData");
if (dispatchTab.Allocation3DData == NULL) {
- LOG_API("Couldn't initialize dispatchTab.Allocation3DData");
+ LOG_ERR("Couldn't initialize dispatchTab.Allocation3DData");
return false;
}
dispatchTab.AllocationGenerateMipmaps = (AllocationGenerateMipmapsFnPtr)dlsym(handle, "rsAllocationGenerateMipmaps");
if (dispatchTab.AllocationGenerateMipmaps == NULL) {
- LOG_API("Couldn't initialize dispatchTab.AllocationGenerateMipmaps");
+ LOG_ERR("Couldn't initialize dispatchTab.AllocationGenerateMipmaps");
return false;
}
dispatchTab.AllocationRead = (AllocationReadFnPtr)dlsym(handle, "rsAllocationRead");
if (dispatchTab.AllocationRead == NULL) {
- LOG_API("Couldn't initialize dispatchTab.AllocationRead");
+ LOG_ERR("Couldn't initialize dispatchTab.AllocationRead");
return false;
}
dispatchTab.Allocation1DRead = (Allocation1DReadFnPtr)dlsym(handle, "rsAllocation1DRead");
if (dispatchTab.Allocation1DRead == NULL) {
- LOG_API("Couldn't initialize dispatchTab.Allocation1DRead");
+ LOG_ERR("Couldn't initialize dispatchTab.Allocation1DRead");
return false;
}
dispatchTab.Allocation2DRead = (Allocation2DReadFnPtr)dlsym(handle, "rsAllocation2DRead");
if (dispatchTab.Allocation2DRead == NULL) {
- LOG_API("Couldn't initialize dispatchTab.Allocation2DRead");
+ LOG_ERR("Couldn't initialize dispatchTab.Allocation2DRead");
return false;
}
dispatchTab.AllocationSyncAll = (AllocationSyncAllFnPtr)dlsym(handle, "rsAllocationSyncAll");
if (dispatchTab.AllocationSyncAll == NULL) {
- LOG_API("Couldn't initialize dispatchTab.AllocationSyncAll");
+ LOG_ERR("Couldn't initialize dispatchTab.AllocationSyncAll");
return false;
}
dispatchTab.AllocationResize1D = (AllocationResize1DFnPtr)dlsym(handle, "rsAllocationResize1D");
if (dispatchTab.AllocationResize1D == NULL) {
- LOG_API("Couldn't initialize dispatchTab.AllocationResize1D");
+ LOG_ERR("Couldn't initialize dispatchTab.AllocationResize1D");
return false;
}
dispatchTab.AllocationCopy2DRange = (AllocationCopy2DRangeFnPtr)dlsym(handle, "rsAllocationCopy2DRange");
if (dispatchTab.AllocationCopy2DRange == NULL) {
- LOG_API("Couldn't initialize dispatchTab.AllocationCopy2DRange");
+ LOG_ERR("Couldn't initialize dispatchTab.AllocationCopy2DRange");
return false;
}
dispatchTab.AllocationCopy3DRange = (AllocationCopy3DRangeFnPtr)dlsym(handle, "rsAllocationCopy3DRange");
if (dispatchTab.AllocationCopy3DRange == NULL) {
- LOG_API("Couldn't initialize dispatchTab.AllocationCopy3DRange");
+ LOG_ERR("Couldn't initialize dispatchTab.AllocationCopy3DRange");
return false;
}
dispatchTab.SamplerCreate = (SamplerCreateFnPtr)dlsym(handle, "rsSamplerCreate");
if (dispatchTab.SamplerCreate == NULL) {
- LOG_API("Couldn't initialize dispatchTab.SamplerCreate");
+ LOG_ERR("Couldn't initialize dispatchTab.SamplerCreate");
return false;
}
dispatchTab.ScriptBindAllocation = (ScriptBindAllocationFnPtr)dlsym(handle, "rsScriptBindAllocation");
if (dispatchTab.ScriptBindAllocation == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptBindAllocation");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptBindAllocation");
return false;
}
dispatchTab.ScriptSetTimeZone = (ScriptSetTimeZoneFnPtr)dlsym(handle, "rsScriptSetTimeZone");
if (dispatchTab.ScriptSetTimeZone == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptSetTimeZone");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptSetTimeZone");
return false;
}
dispatchTab.ScriptInvoke = (ScriptInvokeFnPtr)dlsym(handle, "rsScriptInvoke");
if (dispatchTab.ScriptInvoke == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptInvoke");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptInvoke");
return false;
}
dispatchTab.ScriptInvokeV = (ScriptInvokeVFnPtr)dlsym(handle, "rsScriptInvokeV");
if (dispatchTab.ScriptInvokeV == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptInvokeV");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptInvokeV");
return false;
}
dispatchTab.ScriptForEach = (ScriptForEachFnPtr)dlsym(handle, "rsScriptForEach");
if (dispatchTab.ScriptForEach == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptForEach");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptForEach");
return false;
}
dispatchTab.ScriptSetVarI = (ScriptSetVarIFnPtr)dlsym(handle, "rsScriptSetVarI");
if (dispatchTab.ScriptSetVarI == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptSetVarI");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptSetVarI");
return false;
}
dispatchTab.ScriptSetVarObj = (ScriptSetVarObjFnPtr)dlsym(handle, "rsScriptSetVarObj");
if (dispatchTab.ScriptSetVarObj == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptSetVarObj");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptSetVarObj");
return false;
}
dispatchTab.ScriptSetVarJ = (ScriptSetVarJFnPtr)dlsym(handle, "rsScriptSetVarJ");
if (dispatchTab.ScriptSetVarJ == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptSetVarJ");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptSetVarJ");
return false;
}
dispatchTab.ScriptSetVarF = (ScriptSetVarFFnPtr)dlsym(handle, "rsScriptSetVarF");
if (dispatchTab.ScriptSetVarF == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptSetVarF");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptSetVarF");
return false;
}
dispatchTab.ScriptSetVarD = (ScriptSetVarDFnPtr)dlsym(handle, "rsScriptSetVarD");
if (dispatchTab.ScriptSetVarD == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptSetVarD");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptSetVarD");
return false;
}
dispatchTab.ScriptSetVarV = (ScriptSetVarVFnPtr)dlsym(handle, "rsScriptSetVarV");
if (dispatchTab.ScriptSetVarV == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptSetVarV");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptSetVarV");
return false;
}
dispatchTab.ScriptGetVarV = (ScriptGetVarVFnPtr)dlsym(handle, "rsScriptGetVarV");
if (dispatchTab.ScriptGetVarV == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptGetVarV");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptGetVarV");
return false;
}
dispatchTab.ScriptSetVarVE = (ScriptSetVarVEFnPtr)dlsym(handle, "rsScriptSetVarVE");
if (dispatchTab.ScriptSetVarVE == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptSetVarVE");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptSetVarVE");
return false;
}
dispatchTab.ScriptCCreate = (ScriptCCreateFnPtr)dlsym(handle, "rsScriptCCreate");
if (dispatchTab.ScriptCCreate == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptCCreate");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptCCreate");
return false;
}
dispatchTab.ScriptIntrinsicCreate = (ScriptIntrinsicCreateFnPtr)dlsym(handle, "rsScriptIntrinsicCreate");
if (dispatchTab.ScriptIntrinsicCreate == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptIntrinsicCreate");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptIntrinsicCreate");
return false;
}
dispatchTab.ScriptKernelIDCreate = (ScriptKernelIDCreateFnPtr)dlsym(handle, "rsScriptKernelIDCreate");
if (dispatchTab.ScriptKernelIDCreate == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptKernelIDCreate");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptKernelIDCreate");
return false;
}
dispatchTab.ScriptFieldIDCreate = (ScriptFieldIDCreateFnPtr)dlsym(handle, "rsScriptFieldIDCreate");
if (dispatchTab.ScriptFieldIDCreate == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptFieldIDCreate");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptFieldIDCreate");
return false;
}
dispatchTab.ScriptGroupCreate = (ScriptGroupCreateFnPtr)dlsym(handle, "rsScriptGroupCreate");
if (dispatchTab.ScriptGroupCreate == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptGroupCreate");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptGroupCreate");
return false;
}
dispatchTab.ScriptGroupSetOutput = (ScriptGroupSetOutputFnPtr)dlsym(handle, "rsScriptGroupSetOutput");
if (dispatchTab.ScriptGroupSetOutput == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptGroupSetOutput");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptGroupSetOutput");
return false;
}
dispatchTab.ScriptGroupSetInput = (ScriptGroupSetInputFnPtr)dlsym(handle, "rsScriptGroupSetInput");
if (dispatchTab.ScriptGroupSetInput == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptGroupSetInput");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptGroupSetInput");
return false;
}
dispatchTab.ScriptGroupExecute = (ScriptGroupExecuteFnPtr)dlsym(handle, "rsScriptGroupExecute");
if (dispatchTab.ScriptGroupExecute == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptGroupExecute");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptGroupExecute");
return false;
}
dispatchTab.AllocationIoSend = (AllocationIoSendFnPtr)dlsym(handle, "rsAllocationIoSend");
if (dispatchTab.AllocationIoSend == NULL) {
- LOG_API("Couldn't initialize dispatchTab.AllocationIoSend");
+ LOG_ERR("Couldn't initialize dispatchTab.AllocationIoSend");
return false;
}
dispatchTab.AllocationIoReceive = (AllocationIoReceiveFnPtr)dlsym(handle, "rsAllocationIoReceive");
if (dispatchTab.AllocationIoReceive == NULL) {
- LOG_API("Couldn't initialize dispatchTab.AllocationIoReceive");
+ LOG_ERR("Couldn't initialize dispatchTab.AllocationIoReceive");
return false;
}
// API_21 functions
if (device_api >= 21) {
dispatchTab.AllocationGetPointer = (AllocationGetPointerFnPtr)dlsym(handle, "rsAllocationGetPointer");
if (dispatchTab.AllocationGetPointer == NULL) {
- LOG_API("Couldn't initialize dispatchTab.AllocationGetPointer");
+ LOG_ERR("Couldn't initialize dispatchTab.AllocationGetPointer");
return false;
}
}
@@ -370,52 +370,52 @@
// ScriptGroup V2 functions
dispatchTab.ScriptInvokeIDCreate = (ScriptInvokeIDCreateFnPtr)dlsym(handle, "rsScriptInvokeIDCreate");
if (dispatchTab.ScriptInvokeIDCreate == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptInvokeIDCreate");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptInvokeIDCreate");
return false;
}
dispatchTab.ClosureCreate = (ClosureCreateFnPtr)dlsym(handle, "rsClosureCreate");
if (dispatchTab.ClosureCreate == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ClosureCreate");
+ LOG_ERR("Couldn't initialize dispatchTab.ClosureCreate");
return false;
}
dispatchTab.InvokeClosureCreate = (InvokeClosureCreateFnPtr)dlsym(handle, "rsInvokeClosureCreate");
if (dispatchTab.InvokeClosureCreate == NULL) {
- LOG_API("Couldn't initialize dispatchTab.InvokeClosureCreate");
+ LOG_ERR("Couldn't initialize dispatchTab.InvokeClosureCreate");
return false;
}
dispatchTab.ClosureSetArg = (ClosureSetArgFnPtr)dlsym(handle, "rsClosureSetArg");
if (dispatchTab.ClosureSetArg == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ClosureSetArg");
+ LOG_ERR("Couldn't initialize dispatchTab.ClosureSetArg");
return false;
}
dispatchTab.ClosureSetGlobal = (ClosureSetGlobalFnPtr)dlsym(handle, "rsClosureSetGlobal");
if (dispatchTab.ClosureSetGlobal == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ClosureSetGlobal");
+ LOG_ERR("Couldn't initialize dispatchTab.ClosureSetGlobal");
return false;
}
dispatchTab.ScriptGroup2Create = (ScriptGroup2CreateFnPtr)dlsym(handle, "rsScriptGroup2Create");
if (dispatchTab.ScriptGroup2Create == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptGroup2Create");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptGroup2Create");
return false;
}
dispatchTab.AllocationElementData = (AllocationElementDataFnPtr)dlsym(handle, "rsAllocationElementData");
if (dispatchTab.AllocationElementData == NULL) {
- LOG_API("Couldn't initialize dispatchTab.AllocationElementData");
+ LOG_ERR("Couldn't initialize dispatchTab.AllocationElementData");
return false;
}
dispatchTab.AllocationElementRead = (AllocationElementReadFnPtr)dlsym(handle, "rsAllocationElementRead");
if (dispatchTab.AllocationElementRead == NULL) {
- LOG_API("Couldn't initialize dispatchTab.AllocationElementRead");
+ LOG_ERR("Couldn't initialize dispatchTab.AllocationElementRead");
return false;
}
dispatchTab.Allocation3DRead = (Allocation3DReadFnPtr)dlsym(handle, "rsAllocation3DRead");
if (dispatchTab.Allocation3DRead == NULL) {
- LOG_API("Couldn't initialize dispatchTab.Allocation3DRead");
+ LOG_ERR("Couldn't initialize dispatchTab.Allocation3DRead");
return false;
}
dispatchTab.ScriptForEachMulti = (ScriptForEachMultiFnPtr)dlsym(handle, "rsScriptForEachMulti");
if (dispatchTab.ScriptForEachMulti == NULL) {
- LOG_API("Couldn't initialize dispatchTab.ScriptForEachMulti");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptForEachMulti");
return false;
}
}
@@ -423,7 +423,7 @@
if (device_api >= REDUCE_API_LEVEL) {
dispatchTab.ScriptReduce = (ScriptReduceFnPtr)dlsym(handle, "rsScriptReduce");
if (dispatchTab.ScriptReduce == nullptr) {
- LOG_API("Couldn't initialize dispatchTab.ScriptReduce");
+ LOG_ERR("Couldn't initialize dispatchTab.ScriptReduce");
return false;
}
}
@@ -436,7 +436,7 @@
bool loadIOSuppSyms(void* handleIO, ioSuppDT& ioDispatch){
ioDispatch.sAllocationSetSurface = (sAllocationSetSurfaceFnPtr)dlsym(handleIO, "AllocationSetSurface");
if (ioDispatch.sAllocationSetSurface == NULL) {
- LOG_API("Couldn't initialize ioDispatch.sAllocationSetSurface");
+ LOG_ERR("Couldn't initialize ioDispatch.sAllocationSetSurface");
return false;
}
return true;
diff --git a/cpu_ref/Android.mk b/cpu_ref/Android.mk
index c2f565c..c816c7d 100644
--- a/cpu_ref/Android.mk
+++ b/cpu_ref/Android.mk
@@ -53,9 +53,6 @@
rsCpuIntrinsics_advsimd_YuvToRGB.S
# rsCpuIntrinsics_advsimd_Blend.S \
-# Clang does not support nested .irp in *_Blur.S
-LOCAL_CLANG_ASFLAGS_arm64 += -no-integrated-as
-
ifeq ($(ARCH_ARM_HAVE_NEON),true)
LOCAL_CFLAGS_arm += -DARCH_ARM_HAVE_NEON
endif
@@ -72,8 +69,6 @@
rsCpuIntrinsics_neon_YuvToRGB.S \
LOCAL_ASFLAGS_arm := -mfpu=neon
- # Clang does not support nested .irp in *_Blur.S
- LOCAL_CLANG_ASFLAGS_arm += -no-integrated-as
endif
ifeq ($(ARCH_X86_HAVE_SSSE3),true)
diff --git a/cpu_ref/rsCpuIntrinsics_advsimd_Blur.S b/cpu_ref/rsCpuIntrinsics_advsimd_Blur.S
index 7ea80a0..f73290f 100644
--- a/cpu_ref/rsCpuIntrinsics_advsimd_Blur.S
+++ b/cpu_ref/rsCpuIntrinsics_advsimd_Blur.S
@@ -18,6 +18,8 @@
#define PRIVATE(f) .text; .align 4; .type f,#function; f:
#define END(f) .size f, .-f;
+//#define ARCH_ARM64_USE_BLUR_PRELOAD
+
.set FRACTION_BITS, 7
.set MAX_R, 25
@@ -32,6 +34,15 @@
.endif
.endm
+/* It's not always clear that prefetching is beneficial and this needs further
+ * testing on different cores, so it's made switchable here.
+ */
+#if defined(ARCH_ARM64_USE_BLUR_PRELOAD)
+#define VERTPLD(...) prfm PLDL1KEEP, [__VA_ARGS__]
+#else
+#define VERTPLD(...) nop
+#endif
+
/* Fetch 16 columns of bytes (regardless of image format), convolve these
* vertically, and leave them in the register file. If working near the top or
* bottom of an image then clamp the addressing while loading the data in.
@@ -71,7 +82,7 @@
mov x10, x15
uxtl v14.8h, v15.8b
-// prfm PLDL1KEEP,[x1, #16] // TODO: confirm
+ VERTPLD(x1, #16)
uxtl2 v15.8h, v15.16b
.if \max_r < 16 // approximate
ifcc adr \reg, 1f
@@ -89,40 +100,128 @@
umull2 v15.4s, v15.8h, v0.h[0]
br \reg
- .irp rowclamp, 1, 0
- .set cc, \rowclamp
- .align 4
- // clang does not support nested .irp
- .irp dreg, 4, 3, 2, 1, 0 ; .irp lane, 7, 6, 5, 4, 3, 2, 1, 0 ; .irp doth, .h
- .set i, \dreg * 8 + \lane
- .if 0 < i && i <= \max_r
+ /* This version of the vertical fetch loop body is used away from the edges
+ * of the source image. The pointers start at the top and bottom source rows
+ * and work their way towards the centre on each iteration. This way the
+ * number of taps used can be controlled by jumping directly into the middle
+ * of the loop and running to completion.
+ * If the loop body changes size then the code which caculates the address of
+ * the initial iteration must be updated to accordingly.
+ */
+ .macro vertfetch_noclamp i, dreg
+ .if 0 < \i && \i <= \max_r
ld1 {v10.16b}, [x10], x2
- ifcc cmp x6, #i
ld1 {v11.16b}, [x11], x13
- ifcc csel x10, x15, x10, lo
uaddl v16.8h, v10.8b, v11.8b
- ifcc cmp x7, #i
uaddl2 v11.8h, v10.16b, v11.16b
- ifcc csel x11, x19, x11, lo
- umlal v12.4s, v16.4h, v\dreg\doth[\lane]
- umlal2 v13.4s, v16.8h, v\dreg\doth[\lane]
-// prfm PLDL1KEEP,[x10, #32] // TODO: confirm
-nop
- umlal v14.4s, v11.4h, v\dreg\doth[\lane]
-// prfm PLDL1KEEP,[x11, #32] // TODO: confirm
-nop
- umlal2 v15.4s, v11.8h, v\dreg\doth[\lane]
- .endif
- .endr ; .endr ; .endr
- .if \rowclamp == 1
- 1: \labelc :
- b 2f
- .else
- 2: \labelnc :
+ umlal v12.4s, v16.4h, \dreg
+ umlal2 v13.4s, v16.8h, \dreg
+ VERTPLD(x10, #32)
+ umlal v14.4s, v11.4h, \dreg
+ VERTPLD(x11, #32)
+ umlal2 v15.4s, v11.8h, \dreg
.endif
- .endr
+ .endm
- uqrshrn v10.4h, v12.4s, #16 - FRACTION_BITS
+ /* This version of the vertical fetch loop body is used near the edges of the
+ * source image, where one or both of the accesses may start with a clamped
+ * value, and the row addresses only begin to change after some number of
+ * iterations before the end.
+ * If the loop body changes size then the code which caculates the address of
+ * the initial iteration must be updated to accordingly.
+ */
+ .macro vertfetch_clamped i, dreg
+ .if 0 < \i && \i <= \max_r
+ ld1 {v10.16b}, [x10], x2
+ cmp x6, #\i
+ ld1 {v11.16b}, [x11], x13
+ csel x10, x15, x10, lo
+ uaddl v16.8h, v10.8b, v11.8b
+ cmp x7, #\i
+ uaddl2 v11.8h, v10.16b, v11.16b
+ csel x11, x19, x11, lo
+ umlal v12.4s, v16.4h, \dreg
+ umlal2 v13.4s, v16.8h, \dreg
+ VERTPLD(x10, #32)
+ umlal v14.4s, v11.4h, \dreg
+ VERTPLD(x11, #32)
+ umlal2 v15.4s, v11.8h, \dreg
+ .endif
+ .endm
+
+ /* Entry into this unrolled loop is computed as a negative index from
+ * \labelc at the end of the block.
+ */
+ .align 4
+ vertfetch_clamped 27, v3.h[3]
+ vertfetch_clamped 26, v3.h[2]
+ vertfetch_clamped 25, v3.h[1]
+ vertfetch_clamped 24, v3.h[0]
+ vertfetch_clamped 23, v2.h[7]
+ vertfetch_clamped 22, v2.h[6]
+ vertfetch_clamped 21, v2.h[5]
+ vertfetch_clamped 20, v2.h[4]
+ vertfetch_clamped 19, v2.h[3]
+ vertfetch_clamped 18, v2.h[2]
+ vertfetch_clamped 17, v2.h[1]
+ vertfetch_clamped 16, v2.h[0]
+ vertfetch_clamped 15, v1.h[7]
+ vertfetch_clamped 14, v1.h[6]
+ vertfetch_clamped 13, v1.h[5]
+ vertfetch_clamped 12, v1.h[4]
+ vertfetch_clamped 11, v1.h[3]
+ vertfetch_clamped 10, v1.h[2]
+ vertfetch_clamped 9, v1.h[1]
+ vertfetch_clamped 8, v1.h[0]
+ vertfetch_clamped 7, v0.h[7]
+ vertfetch_clamped 6, v0.h[6]
+ vertfetch_clamped 5, v0.h[5]
+ vertfetch_clamped 4, v0.h[4]
+ vertfetch_clamped 3, v0.h[3]
+ vertfetch_clamped 2, v0.h[2]
+ vertfetch_clamped 1, v0.h[1]
+ vertfetch_clamped 0, v0.h[0]
+ 1:
+ \labelc : b 2f /* done with clamped loop, skip over non-clamped loop */
+
+ /* Entry into this unrolled loop is computed as a negative index from
+ * \labelnc at the end of the block.
+ */
+ .align 4
+ vertfetch_noclamp 27, v3.h[3]
+ vertfetch_noclamp 26, v3.h[2]
+ vertfetch_noclamp 25, v3.h[1]
+ vertfetch_noclamp 24, v3.h[0]
+ vertfetch_noclamp 23, v2.h[7]
+ vertfetch_noclamp 22, v2.h[6]
+ vertfetch_noclamp 21, v2.h[5]
+ vertfetch_noclamp 20, v2.h[4]
+ vertfetch_noclamp 19, v2.h[3]
+ vertfetch_noclamp 18, v2.h[2]
+ vertfetch_noclamp 17, v2.h[1]
+ vertfetch_noclamp 16, v2.h[0]
+ vertfetch_noclamp 15, v1.h[7]
+ vertfetch_noclamp 14, v1.h[6]
+ vertfetch_noclamp 13, v1.h[5]
+ vertfetch_noclamp 12, v1.h[4]
+ vertfetch_noclamp 11, v1.h[3]
+ vertfetch_noclamp 10, v1.h[2]
+ vertfetch_noclamp 9, v1.h[1]
+ vertfetch_noclamp 8, v1.h[0]
+ vertfetch_noclamp 7, v0.h[7]
+ vertfetch_noclamp 6, v0.h[6]
+ vertfetch_noclamp 5, v0.h[5]
+ vertfetch_noclamp 4, v0.h[4]
+ vertfetch_noclamp 3, v0.h[3]
+ vertfetch_noclamp 2, v0.h[2]
+ vertfetch_noclamp 1, v0.h[1]
+ vertfetch_noclamp 0, v0.h[0]
+ \labelnc :
+
+ .purgem vertfetch_clamped
+ .purgem vertfetch_noclamp
+
+ 2: uqrshrn v10.4h, v12.4s, #16 - FRACTION_BITS
add x15, x15, #16
uqrshrn2 v10.8h, v13.4s, #16 - FRACTION_BITS
add x19, x19, #16
diff --git a/cpu_ref/rsCpuIntrinsics_neon_Blur.S b/cpu_ref/rsCpuIntrinsics_neon_Blur.S
index 4ab1340..a6479cb 100644
--- a/cpu_ref/rsCpuIntrinsics_neon_Blur.S
+++ b/cpu_ref/rsCpuIntrinsics_neon_Blur.S
@@ -18,6 +18,8 @@
#define PRIVATE(f) .text; .align 4; .type f,#function; f: .fnstart
#define END(f) .fnend; .size f, .-f;
+#define ARCH_ARM_USE_BLUR_PRELOAD
+
.eabi_attribute 25,1 @Tag_ABI_align8_preserved
.arm
@@ -40,6 +42,15 @@
.endif
.endm
+/* It's not always clear that prefetching is beneficial and this needs further
+ * testing on different cores, so it's made switchable here.
+ */
+#if defined(ARCH_ARM_USE_BLUR_PRELOAD)
+#define VERTPLD(...) pld [__VA_ARGS__]
+#else
+#define VERTPLD(...) nop
+#endif
+
/* Fetch 16 columns of bytes (regardless of image format), convolve these
* vertically, and leave them in the register file. If working near the top or
* bottom of an image then clamp the addressing while loading the data in.
@@ -76,7 +87,7 @@
mls r10, r2, r6, r1
vmovl.u8 q14, d30
- pld [r1, #32]
+ VERTPLD(r1, #32)
vmovl.u8 q15, d31
.if \max_r < 16 // approximate
ifcc adr \reg, 1f
@@ -97,45 +108,132 @@
ifcc .align 2
2: ifcc .word 1f-1b-8
- .irp rowclamp, 1, 0
- .set cc, \rowclamp
- .align 4
- .irp dreg, 6, 5, 4, 3, 2, 1, 0 ; .irp lane, 3, 2, 1, 0
- .set i, \dreg * 4 + \lane
- .if 0 < i && i <= \max_r
- .if \rowclamp
- vld1.8 {d20,d21}, [r10]
- vld1.8 {d22,d23}, [r11]
- cmp r6, #i
- .else
+ /* This version of the vertical fetch loop body is used away from the edges
+ * of the source image. The pointers start at the top and bottom source rows
+ * and work their way towards the centre on each iteration. This way the
+ * number of taps used can be controlled by jumping directly into the middle
+ * of the loop and running to completion.
+ * If the loop body changes size then the code which caculates the address of
+ * the initial iteration must be updated to accordingly.
+ */
+ .macro vertfetch_noclamp i, dreg
+ .if 0 < \i && \i <= \max_r
vld1.8 {d20,d21}, [r10], r2
vld1.8 {d22,d23}, [r11]
sub r11, r11, r2
- .endif
vswp d21, d22
- pld [r10, #32]
+ VERTPLD(r10, #32)
vaddl.u8 q10, d20, d21
- ifcc addhs r10, r10, r2
vaddl.u8 q11, d22, d23
- ifcc cmp r7, #i
- vmlal.u16 q12, d20, d\dreg[\lane]
- pld [r11, #32]
- vmlal.u16 q13, d21, d\dreg[\lane]
- ifcc subhs r11, r11, r2
- vmlal.u16 q14, d22, d\dreg[\lane]
- ifcc nop
- vmlal.u16 q15, d23, d\dreg[\lane]
- .endif
- .endr ; .endr
- .if \rowclamp == 1
- 1: \labelc :
- b 2f
- .else
- 2: \labelnc :
+ vmlal.u16 q12, d20, \dreg
+ VERTPLD(r11, #32)
+ vmlal.u16 q13, d21, \dreg
+ vmlal.u16 q14, d22, \dreg
+ vmlal.u16 q15, d23, \dreg
.endif
- .endr
+ .endm
- vqrshrn.u32 d20, q12, #16 - FRACTION_BITS
+ /* This version of the vertical fetch loop body is used near the edges of the
+ * source image, where one or both of the accesses may start with a clamped
+ * value, and the row addresses only begin to change after some number of
+ * iterations before the end.
+ * If the loop body changes size then the code which caculates the address of
+ * the initial iteration must be updated to accordingly.
+ */
+ .macro vertfetch_clamped i, dreg
+ .if 0 < \i && \i <= \max_r
+ vld1.8 {d20,d21}, [r10]
+ vld1.8 {d22,d23}, [r11]
+ cmp r6, #\i
+ vswp d21, d22
+ VERTPLD(r10, #32)
+ vaddl.u8 q10, d20, d21
+ addhs r10, r10, r2
+ vaddl.u8 q11, d22, d23
+ cmp r7, #\i
+ vmlal.u16 q12, d20, \dreg
+ VERTPLD(r11, #32)
+ vmlal.u16 q13, d21, \dreg
+ subhs r11, r11, r2
+ vmlal.u16 q14, d22, \dreg
+ nop
+ vmlal.u16 q15, d23, \dreg
+ .endif
+ .endm
+
+ /* Entry into this unrolled loop is computed as a negative index from
+ * \labelc at the end of the block.
+ */
+ .align 4
+ vertfetch_clamped 27, d6[3]
+ vertfetch_clamped 26, d6[2]
+ vertfetch_clamped 25, d6[1]
+ vertfetch_clamped 24, d6[0]
+ vertfetch_clamped 23, d5[3]
+ vertfetch_clamped 22, d5[2]
+ vertfetch_clamped 21, d5[1]
+ vertfetch_clamped 20, d5[0]
+ vertfetch_clamped 19, d4[3]
+ vertfetch_clamped 18, d4[2]
+ vertfetch_clamped 17, d4[1]
+ vertfetch_clamped 16, d4[0]
+ vertfetch_clamped 15, d3[3]
+ vertfetch_clamped 14, d3[2]
+ vertfetch_clamped 13, d3[1]
+ vertfetch_clamped 12, d3[0]
+ vertfetch_clamped 11, d2[3]
+ vertfetch_clamped 10, d2[2]
+ vertfetch_clamped 9, d2[1]
+ vertfetch_clamped 8, d2[0]
+ vertfetch_clamped 7, d1[3]
+ vertfetch_clamped 6, d1[2]
+ vertfetch_clamped 5, d1[1]
+ vertfetch_clamped 4, d1[0]
+ vertfetch_clamped 3, d0[3]
+ vertfetch_clamped 2, d0[2]
+ vertfetch_clamped 1, d0[1]
+ vertfetch_clamped 0, d0[0]
+ 1:
+ \labelc : b 2f /* done with clamped loop, skip over non-clamped loop */
+
+ /* Entry into this unrolled loop is computed as a negative index from
+ * \labelnc at the end of the block.
+ */
+ .align 4
+ vertfetch_noclamp 27, d6[3]
+ vertfetch_noclamp 26, d6[2]
+ vertfetch_noclamp 25, d6[1]
+ vertfetch_noclamp 24, d6[0]
+ vertfetch_noclamp 23, d5[3]
+ vertfetch_noclamp 22, d5[2]
+ vertfetch_noclamp 21, d5[1]
+ vertfetch_noclamp 20, d5[0]
+ vertfetch_noclamp 19, d4[3]
+ vertfetch_noclamp 18, d4[2]
+ vertfetch_noclamp 17, d4[1]
+ vertfetch_noclamp 16, d4[0]
+ vertfetch_noclamp 15, d3[3]
+ vertfetch_noclamp 14, d3[2]
+ vertfetch_noclamp 13, d3[1]
+ vertfetch_noclamp 12, d3[0]
+ vertfetch_noclamp 11, d2[3]
+ vertfetch_noclamp 10, d2[2]
+ vertfetch_noclamp 9, d2[1]
+ vertfetch_noclamp 8, d2[0]
+ vertfetch_noclamp 7, d1[3]
+ vertfetch_noclamp 6, d1[2]
+ vertfetch_noclamp 5, d1[1]
+ vertfetch_noclamp 4, d1[0]
+ vertfetch_noclamp 3, d0[3]
+ vertfetch_noclamp 2, d0[2]
+ vertfetch_noclamp 1, d0[1]
+ vertfetch_noclamp 0, d0[0]
+ \labelnc :
+
+ .purgem vertfetch_clamped
+ .purgem vertfetch_noclamp
+
+ 2: vqrshrn.u32 d20, q12, #16 - FRACTION_BITS
vqrshrn.u32 d21, q13, #16 - FRACTION_BITS
vqrshrn.u32 d22, q14, #16 - FRACTION_BITS
vqrshrn.u32 d23, q15, #16 - FRACTION_BITS
diff --git a/driver/rsdRuntimeStubs.cpp b/driver/rsdRuntimeStubs.cpp
index f28c946..718d611 100644
--- a/driver/rsdRuntimeStubs.cpp
+++ b/driver/rsdRuntimeStubs.cpp
@@ -433,13 +433,40 @@
//////////////////////////////////////////////////////////////////////////////
// ForEach routines
//////////////////////////////////////////////////////////////////////////////
+void rsForEachInternal(int slot,
+ rs_script_call *call,
+ int hasOutput,
+ int numIn,
+ ...) {
+ Context *rsc = RsdCpuReference::getTlsContext();
+ Script *s = const_cast<Script*>(RsdCpuReference::getTlsScript());
+ if (numIn > 100) {
+ ALOGE("rsForEachInternal: too many inputs to a kernel.");
+ return;
+ }
+ Allocation* inputs[100];
+ Allocation* out = nullptr;
+ va_list argp;
+ va_start(argp, numIn);
+ for (int i = 0; i < numIn; i++) {
+ ::rs_allocation alloc = va_arg(argp, ::rs_allocation);
+ inputs[i] = (Allocation*)alloc.p;
+ }
+ if (hasOutput) {
+ ::rs_allocation outAlloc = va_arg(argp, ::rs_allocation);
+ out = (Allocation*)outAlloc.p;
+ }
+ va_end(argp);
+ rsrForEach(rsc, s, slot, numIn, numIn > 0 ? inputs : nullptr, out, nullptr, 0, (RsScriptCall*)call);
+}
+
void __attribute__((overloadable)) rsForEach(::rs_script script,
::rs_allocation in,
::rs_allocation out,
const void *usr,
const rs_script_call *call) {
Context *rsc = RsdCpuReference::getTlsContext();
- rsrForEach(rsc, (Script *)script.p, (Allocation *)in.p,
+ rsrForEach(rsc, (Script *)script.p, 0, 1, (Allocation **)&in.p,
(Allocation *)out.p, usr, 0, (RsScriptCall *)call);
}
@@ -448,7 +475,7 @@
::rs_allocation out,
const void *usr) {
Context *rsc = RsdCpuReference::getTlsContext();
- rsrForEach(rsc, (Script *)script.p, (Allocation *)in.p, (Allocation *)out.p,
+ rsrForEach(rsc, (Script *)script.p, 0, 1, (Allocation **)&in.p, (Allocation *)out.p,
usr, 0, nullptr);
}
@@ -456,7 +483,7 @@
::rs_allocation in,
::rs_allocation out) {
Context *rsc = RsdCpuReference::getTlsContext();
- rsrForEach(rsc, (Script *)script.p, (Allocation *)in.p, (Allocation *)out.p,
+ rsrForEach(rsc, (Script *)script.p, 0, 1, (Allocation **)&in.p, (Allocation *)out.p,
nullptr, 0, nullptr);
}
@@ -468,7 +495,7 @@
const void *usr,
uint32_t usrLen) {
Context *rsc = RsdCpuReference::getTlsContext();
- rsrForEach(rsc, (Script *)script.p, (Allocation *)in.p, (Allocation *)out.p,
+ rsrForEach(rsc, (Script *)script.p, 0, 1, (Allocation **)&in.p, (Allocation *)out.p,
usr, usrLen, nullptr);
}
@@ -479,7 +506,7 @@
uint32_t usrLen,
const rs_script_call *call) {
Context *rsc = RsdCpuReference::getTlsContext();
- rsrForEach(rsc, (Script *)script.p, (Allocation *)in.p, (Allocation *)out.p,
+ rsrForEach(rsc, (Script *)script.p, 0, 1, (Allocation **)&in.p, (Allocation *)out.p,
usr, usrLen, (RsScriptCall *)call);
}
#endif
diff --git a/java/tests/RsTest/src/com/android/rs/test/RSTestCore.java b/java/tests/RsTest/src/com/android/rs/test/RSTestCore.java
index 3294aed..ecd661e 100644
--- a/java/tests/RsTest/src/com/android/rs/test/RSTestCore.java
+++ b/java/tests/RsTest/src/com/android/rs/test/RSTestCore.java
@@ -85,6 +85,7 @@
unitTests.add(new UT_foreach(this, mRes, mCtx));
unitTests.add(new UT_foreach_bounds(this, mRes, mCtx));
unitTests.add(new UT_noroot(this, mRes, mCtx));
+ unitTests.add(new UT_single_source_script(this, mRes, mCtx));
unitTests.add(new UT_script_group2_pointwise(this, mRes, mCtx));
unitTests.add(new UT_script_group2_gatherscatter(this, mRes, mCtx));
unitTests.add(new UT_script_group2_nochain(this, mRes, mCtx));
diff --git a/java/tests/RsTest/src/com/android/rs/test/UT_single_source_script.java b/java/tests/RsTest/src/com/android/rs/test/UT_single_source_script.java
new file mode 100644
index 0000000..5765d4e
--- /dev/null
+++ b/java/tests/RsTest/src/com/android/rs/test/UT_single_source_script.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.rs.test;
+
+import android.content.Context;
+import android.content.res.Resources;
+import android.renderscript.*;
+
+public class UT_single_source_script extends UnitTest {
+ private Resources mRes;
+ private Allocation testAllocation1, testAllocation2;
+
+ protected UT_single_source_script(RSTestCore rstc, Resources res, Context ctx) {
+ super(rstc, "SingleSourceScript", ctx);
+ mRes = res;
+ }
+
+ private void initializeGlobals(RenderScript RS, ScriptC_single_source_script s) {
+ Type.Builder i32TypeBuilder = new Type.Builder(RS, Element.I32(RS));
+ int X = 1024;
+ int Y = 768;
+ s.set_dimX(X);
+ s.set_dimY(Y);
+ i32TypeBuilder.setX(X).setY(Y);
+ testAllocation1 = Allocation.createTyped(RS, i32TypeBuilder.create());
+ testAllocation2 = Allocation.createTyped(RS, i32TypeBuilder.create());
+ }
+
+ public void run() {
+ RenderScript pRS = RenderScript.create(mCtx);
+ ScriptC_single_source_script s = new ScriptC_single_source_script(pRS);
+ pRS.setMessageHandler(mRsMessage);
+ initializeGlobals(pRS, s);
+
+ s.invoke_entrypoint(testAllocation1, testAllocation2);
+
+ pRS.finish();
+ waitForMessage();
+ pRS.destroy();
+ }
+}
diff --git a/java/tests/RsTest/src/com/android/rs/test/single_source_script.rs b/java/tests/RsTest/src/com/android/rs/test/single_source_script.rs
new file mode 100644
index 0000000..e34dd5b
--- /dev/null
+++ b/java/tests/RsTest/src/com/android/rs/test/single_source_script.rs
@@ -0,0 +1,70 @@
+#include "shared.rsh"
+
+int dimX;
+int dimY;
+
+int __attribute__((kernel)) foo(int a) {
+ return a * 2;
+}
+
+int __attribute__((kernel)) goo(int a, int b) {
+ return a + b;
+}
+
+static void validate(rs_allocation out) {
+ bool failed = false;
+
+ int i, j;
+
+ for (j = 0; j < dimY; j++) {
+ for (i = 0; i < dimX; i++) {
+ const int actual = rsGetElementAt_int(out, i, j);
+ int expected = (i + j * dimX) * 4;
+ if (j < dimY / 2) {
+ expected *= 2;
+ }
+ expected += (i + j * dimX);
+ if (actual != expected) {
+ failed = true;
+ rsDebug("row ", j);
+ rsDebug("column ", i);
+ rsDebug("expects ", expected);
+ rsDebug("got ", actual);
+ }
+ }
+ }
+
+ if (failed) {
+ rsDebug("FAILED", 0);
+ } else {
+ rsDebug("PASSED", 0);
+ }
+
+ if (failed) {
+ rsSendToClientBlocking(RS_MSG_TEST_FAILED);
+ } else {
+ rsSendToClientBlocking(RS_MSG_TEST_PASSED);
+ }
+}
+
+void entrypoint(rs_allocation in, rs_allocation out) {
+ int i, j;
+ for (i = 0; i < dimX; i++) {
+ for (j = 0; j < dimY; j++) {
+ rsSetElementAt_int(in, j * dimX + i, i, j);
+ }
+ }
+
+ rsForEach(foo, in, out);
+ rsForEach(foo, out, out);
+ rs_script_call_t opts = {0};
+ opts.xStart = 0;
+ opts.xEnd = dimX;
+ opts.yStart = 0;
+ opts.yEnd = dimY / 2;
+ rsForEachWithOptions(foo, &opts, out, out);
+
+ rsForEach(goo, in, out, out);
+
+ validate(out);
+}
diff --git a/java/tests/RsTest/src/com/android/rs/test/test_root.rs b/java/tests/RsTest/src/com/android/rs/test/test_root.rs
deleted file mode 100644
index 6dc83ba..0000000
--- a/java/tests/RsTest/src/com/android/rs/test/test_root.rs
+++ /dev/null
@@ -1,23 +0,0 @@
-// Fountain test script
-#pragma version(1)
-
-#pragma rs java_package_name(com.android.rs.test)
-
-#pragma stateFragment(parent)
-
-#include "rs_graphics.rsh"
-
-
-typedef struct TestResult {
- rs_allocation name;
- bool pass;
- float score;
-} TestResult_t;
-TestResult_t *results;
-
-int root() {
-
- return 0;
-}
-
-
diff --git a/rsRuntime.h b/rsRuntime.h
index 5a05883..9bc05b3 100644
--- a/rsRuntime.h
+++ b/rsRuntime.h
@@ -155,7 +155,9 @@
void rsrForEach(Context *, Script *target,
- Allocation *in,
+ uint32_t slot,
+ uint32_t numInputs,
+ Allocation **in,
Allocation *out,
const void *usr,
uint32_t usrBytes,
diff --git a/rsScriptC_Lib.cpp b/rsScriptC_Lib.cpp
index c404bde..a411e34 100644
--- a/rsScriptC_Lib.cpp
+++ b/rsScriptC_Lib.cpp
@@ -236,20 +236,12 @@
void rsrForEach(Context *rsc,
Script *target,
- Allocation *in, Allocation *out,
+ uint32_t slot,
+ uint32_t numInputs,
+ Allocation **in, Allocation *out,
const void *usr, uint32_t usrBytes,
const RsScriptCall *call) {
-
- if (in == nullptr) {
- target->runForEach(rsc, /* root slot */ 0, nullptr, 0, out, usr,
- usrBytes, call);
-
- } else {
- const Allocation *ins[1] = {in};
- target->runForEach(rsc, /* root slot */ 0, ins,
- sizeof(ins) / sizeof(RsAllocation), out, usr,
- usrBytes, call);
- }
+ target->runForEach(rsc, slot, (const Allocation**)in, numInputs, out, usr, usrBytes, call);
}
void rsrAllocationSyncAll(Context *rsc, Allocation *a, RsAllocationUsageType usage) {
diff --git a/scriptc/rs_for_each.rsh b/scriptc/rs_for_each.rsh
index 9771d09..6a42b41 100644
--- a/scriptc/rs_for_each.rsh
+++ b/scriptc/rs_for_each.rsh
@@ -91,6 +91,16 @@
} rs_script_call_t;
/*
+ * rs_kernel: Handle to a kernel function
+ *
+ * An opaque type for a function that is defined with the kernel attribute. A value
+ * of this type can be used in a rsForEach call to launch a kernel.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 4294967295) && (defined(RS_DECLARE_EXPIRED_APIS) || RS_VERSION <= 4294967295))
+typedef void* rs_kernel;
+#endif
+
+/*
* rsForEach: Invoke the root kernel of a script
*
* Invoke the kernel named "root" of the specified script. Like other kernels, this root()
@@ -119,6 +129,8 @@
* usrData: User defined data to pass to the script. May be NULL.
* sc: Extra control information used to select a sub-region of the allocation to be processed or suggest a walking strategy. May be NULL.
* usrDataLen: Size of the userData structure. This will be used to perform a shallow copy of the data if necessary.
+ * kernel: Function designator to a function that is defined with the kernel attribute.
+ * ...: Input and output allocations
*/
#if !defined(RS_VERSION) || (RS_VERSION <= 13)
extern void __attribute__((overloadable))
@@ -143,11 +155,31 @@
size_t usrDataLen);
#endif
-#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+#if (defined(RS_VERSION) && (RS_VERSION >= 14) && (RS_VERSION <= 23))
extern void __attribute__((overloadable))
rsForEach(rs_script script, rs_allocation input, rs_allocation output);
#endif
+#if (defined(RS_VERSION) && (RS_VERSION >= 4294967295) && (defined(RS_DECLARE_EXPIRED_APIS) || RS_VERSION <= 4294967295))
+extern void
+ rsForEach(rs_kernel kernel, ...);
+#endif
+
+/*
+ * rsForEachWithOptions: TBD
+ *
+ * TBD
+ *
+ * Parameters:
+ * kernel: Function designator to a function that is defined with the kernel attribute.
+ * options: Launch options
+ * ...: Input and output allocations
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 4294967295) && (defined(RS_DECLARE_EXPIRED_APIS) || RS_VERSION <= 4294967295))
+extern void
+ rsForEachWithOptions(rs_kernel kernel, rs_script_call_t* options, ...);
+#endif
+
/*
* rsGetArray0: Index in the Array0 dimension for the specified kernel context
*